From 4c6797b695550c99281033d8d62627b6fbc8376c Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Wed, 31 Oct 2018 15:26:26 +0100 Subject: [PATCH] feat(dedupe_improved_parent_matching): only check parent fields above the layer of the least granular match --- helper/diffPlaces.js | 21 ++++++++-- test/unit/helper/diffPlaces.js | 76 ++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 3 deletions(-) diff --git a/helper/diffPlaces.js b/helper/diffPlaces.js index 92b9ea51..dfe0e5db 100644 --- a/helper/diffPlaces.js +++ b/helper/diffPlaces.js @@ -39,11 +39,26 @@ function isParentHierarchyDifferent(item1, item2){ if( !isPojo1 || !isPojo2 ){ return false; } // else both have parent info + // iterate over all the placetypes, comparing between items - return placeTypes.some( placeType => { - // skip the parent field corresponding to the item placetype - if( placeType === item1.layer ){ return false; } + // we only want to check parent items representing places less granular + // than the highest matched layer. + // eg. if we are comparing layer=address & layer=country then we only + // check for differences in layers above country, so continent, planet etc. + let highestLayerIndex = Math.min( + placeTypes.findIndex(el => el === item1.layer), + placeTypes.findIndex(el => el === item2.layer) + ); + + // in the case where we couldn't find either later in the $placeTypes array + // we will enforce that all parent fields are checked. + if( highestLayerIndex === -1 ){ highestLayerIndex = Infinity; } + + return placeTypes.some((placeType, pos) => { + + // skip layers that are less granular than the highest matched layer + if( pos > highestLayerIndex ){ return false; } // ensure the parent ids are the same for all placetypes return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' ); diff --git a/test/unit/helper/diffPlaces.js b/test/unit/helper/diffPlaces.js index 6fc27c36..ef5a40df 100644 --- a/test/unit/helper/diffPlaces.js +++ b/test/unit/helper/diffPlaces.js @@ -60,6 +60,82 @@ module.exports.tests.dedupe = function(test, common) { t.end(); }); + test('isParentHierarchyDifferent: do not compare parentage at lower levels to the highest item placetypes', function(t) { + var item1 = { + 'layer': 'country', + 'parent': { + 'localadmin_id': '12345', + 'locality_id': '54321' + } + }; + var item2 = { + 'layer': 'country', + 'parent': { + 'localadmin_id': '56789', + 'locality_id': '98765' + } + }; + + t.false(isDifferent(item1, item2), 'should not be considered different'); + t.end(); + }); + + test('isParentHierarchyDifferent: do compare parentage at the same level as the item placetypes', function(t) { + var item1 = { + 'layer': 'country', + 'parent': { + 'country_id': '12345' + } + }; + var item2 = { + 'layer': 'country', + 'parent': { + 'country_id': '54321' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('isParentHierarchyDifferent: do compare parentage at higher levels than the highest item placetypes', function(t) { + var item1 = { + 'layer': 'country', + 'parent': { + 'localadmin_id': '12345', + 'ocean_id': '54321' + } + }; + var item2 = { + 'layer': 'country', + 'parent': { + 'localadmin_id': '56789', + 'ocean_id': '98765' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('isParentHierarchyDifferent: consider parentage at same level as placetype for comparison', function(t) { + var item1 = { + 'layer': 'country', + 'parent': { + 'country_id': '12345' + } + }; + var item2 = { + 'layer': 'country', + 'parent': { + 'country_id': '54321' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + test('catch diff name', function(t) { var item1 = { 'name': {