Browse Source

feat(dedupe_improved_parent_matching): only check parent fields above the layer of the least granular match

dedupe_improved_parent_matching
Peter Johnson 6 years ago
parent
commit
4c6797b695
  1. 21
      helper/diffPlaces.js
  2. 76
      test/unit/helper/diffPlaces.js

21
helper/diffPlaces.js

@ -39,11 +39,26 @@ function isParentHierarchyDifferent(item1, item2){
if( !isPojo1 || !isPojo2 ){ return false; }
// else both have parent info
// iterate over all the placetypes, comparing between items
return placeTypes.some( placeType => {
// skip the parent field corresponding to the item placetype
if( placeType === item1.layer ){ return false; }
// we only want to check parent items representing places less granular
// than the highest matched layer.
// eg. if we are comparing layer=address & layer=country then we only
// check for differences in layers above country, so continent, planet etc.
let highestLayerIndex = Math.min(
placeTypes.findIndex(el => el === item1.layer),
placeTypes.findIndex(el => el === item2.layer)
);
// in the case where we couldn't find either later in the $placeTypes array
// we will enforce that all parent fields are checked.
if( highestLayerIndex === -1 ){ highestLayerIndex = Infinity; }
return placeTypes.some((placeType, pos) => {
// skip layers that are less granular than the highest matched layer
if( pos > highestLayerIndex ){ return false; }
// ensure the parent ids are the same for all placetypes
return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' );

76
test/unit/helper/diffPlaces.js

@ -60,6 +60,82 @@ module.exports.tests.dedupe = function(test, common) {
t.end();
});
test('isParentHierarchyDifferent: do not compare parentage at lower levels to the highest item placetypes', function(t) {
var item1 = {
'layer': 'country',
'parent': {
'localadmin_id': '12345',
'locality_id': '54321'
}
};
var item2 = {
'layer': 'country',
'parent': {
'localadmin_id': '56789',
'locality_id': '98765'
}
};
t.false(isDifferent(item1, item2), 'should not be considered different');
t.end();
});
test('isParentHierarchyDifferent: do compare parentage at the same level as the item placetypes', function(t) {
var item1 = {
'layer': 'country',
'parent': {
'country_id': '12345'
}
};
var item2 = {
'layer': 'country',
'parent': {
'country_id': '54321'
}
};
t.true(isDifferent(item1, item2), 'should be different');
t.end();
});
test('isParentHierarchyDifferent: do compare parentage at higher levels than the highest item placetypes', function(t) {
var item1 = {
'layer': 'country',
'parent': {
'localadmin_id': '12345',
'ocean_id': '54321'
}
};
var item2 = {
'layer': 'country',
'parent': {
'localadmin_id': '56789',
'ocean_id': '98765'
}
};
t.true(isDifferent(item1, item2), 'should be different');
t.end();
});
test('isParentHierarchyDifferent: consider parentage at same level as placetype for comparison', function(t) {
var item1 = {
'layer': 'country',
'parent': {
'country_id': '12345'
}
};
var item2 = {
'layer': 'country',
'parent': {
'country_id': '54321'
}
};
t.true(isDifferent(item1, item2), 'should be different');
t.end();
});
test('catch diff name', function(t) {
var item1 = {
'name': {

Loading…
Cancel
Save