From e22b973cdf7aa1125dd22905fa367750731c8502 Mon Sep 17 00:00:00 2001 From: Vesa Meskanen Date: Mon, 13 Jun 2016 11:24:58 +0300 Subject: [PATCH 1/2] Do not consider absence of an additional name as a difference OSM data includes two almost identical 'Keskustori, Tampere' entries. The second one does not have additional 'name.ru' property. This is no longer considered as a difference in deduping. --- middleware/dedupe.js | 5 +++- .../fixture/dedupe_elasticsearch_results.js | 28 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/middleware/dedupe.js b/middleware/dedupe.js index 7adb33e2..3723512b 100644 --- a/middleware/dedupe.js +++ b/middleware/dedupe.js @@ -51,7 +51,10 @@ function isDifferent(item1, item2) { if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) { for (var lang in item1.name) { - propMatch(item1.name, item2.name, lang); + if(item2.name[lang] || lang === 'default') { + // do not consider absence of an additional name as a difference + propMatch(item1.name, item2.name, lang); + } } } else { diff --git a/test/unit/fixture/dedupe_elasticsearch_results.js b/test/unit/fixture/dedupe_elasticsearch_results.js index 59da3746..e4780097 100644 --- a/test/unit/fixture/dedupe_elasticsearch_results.js +++ b/test/unit/fixture/dedupe_elasticsearch_results.js @@ -80,6 +80,34 @@ module.exports = [ '_score': 1.2367082, 'confidence': 0.879 }, + { // same as #1, but with an additional name + 'center_point': { + 'lon': -76.207456, + 'lat': 40.039265 + }, + 'address_parts': {}, + 'parent': { + 'localadmin': ['East Lampeter'], + 'region_a': ['PA'], + 'region': ['Pennsylvania'], + 'locality': ['Smoketown'], + 'country_a': ['USA'], + 'county': ['Lancaster County'], + 'country': ['United States'], + 'neighbourhood': ['Greenland'] + }, + 'name': { + 'default': 'East Lampeter High School', + 'alt': 'High School of East Lampeter', + }, + 'category': [ + 'education' + ], + '_id': '357321757', + '_type': 'venue', + '_score': 1.2367082, + 'confidence': 0.879 + }, { 'center_point': { 'lon': -76.207456, From 73f64ce3e1f6d48762ddba20dcbdef974ae8f7bc Mon Sep 17 00:00:00 2001 From: Vesa Meskanen Date: Thu, 16 Jun 2016 08:36:49 +0300 Subject: [PATCH 2/2] Cleanup: tabs -> spaces --- middleware/dedupe.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/middleware/dedupe.js b/middleware/dedupe.js index 3723512b..786675c4 100644 --- a/middleware/dedupe.js +++ b/middleware/dedupe.js @@ -51,10 +51,10 @@ function isDifferent(item1, item2) { if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) { for (var lang in item1.name) { - if(item2.name[lang] || lang === 'default') { - // do not consider absence of an additional name as a difference - propMatch(item1.name, item2.name, lang); - } + if(item2.name[lang] || lang === 'default') { + // do not consider absence of an additional name as a difference + propMatch(item1.name, item2.name, lang); + } } } else {