Browse Source

feat(dedupe): improved handling of cases where "name", "parent" or "address_parts" propertiees are not set

pull/1222/head
missinglink 6 years ago committed by Julian Simioni
parent
commit
b2069606f2
No known key found for this signature in database
GPG Key ID: B9EEB0C6EE0910A1
  1. 93
      helper/diffPlaces.js
  2. 6
      test/unit/middleware/dedupe.js

93
helper/diffPlaces.js

@ -27,25 +27,27 @@ function isParentHierarchyDifferent(item1, item2){
let parent1 = _.get(item1, 'parent'); let parent1 = _.get(item1, 'parent');
let parent2 = _.get(item2, 'parent'); let parent2 = _.get(item2, 'parent');
// if neither object has parent info, we consider them the same // check if these are plain 'ol javascript objects
if( !parent1 && !parent2 ){ return false; } let isPojo1 = _.isPlainObject(parent1);
let isPojo2 = _.isPlainObject(parent2);
// both have parent info // if neither object has parent info, we consider them the same
if( _.isPlainObject(parent1) && _.isPlainObject(parent2) ){ if( !isPojo1 && !isPojo2 ){ return false; }
// iterate over all the placetypes, comparing between items // if only one has parent info, we consider them the same
return placeTypes.some( placeType => { // note: this really shouldn't happen as at least on parent should exist
if( !isPojo1 || !isPojo2 ){ return false; }
// skip the parent field corresponding to the item placetype // else both have parent info
if( placeType === item1.layer ){ return false; } // iterate over all the placetypes, comparing between items
return placeTypes.some( placeType => {
// ensure the parent ids are the same for all placetypes // skip the parent field corresponding to the item placetype
return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' ); if( placeType === item1.layer ){ return false; }
});
}
// if one has parent info and the other doesn't, we consider them different // ensure the parent ids are the same for all placetypes
return true; return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' );
});
} }
/** /**
@ -56,27 +58,29 @@ function isNameDifferent(item1, item2){
let names1 = _.get(item1, 'name'); let names1 = _.get(item1, 'name');
let names2 = _.get(item2, 'name'); let names2 = _.get(item2, 'name');
// if neither object has name info, we consider them the same // check if these are plain 'ol javascript objects
if( !names1 && !names2 ){ return false; } let isPojo1 = _.isPlainObject(names1);
let isPojo2 = _.isPlainObject(names2);
// if both have name info // if neither object has name info, we consider them the same
if( _.isPlainObject(names1) && _.isPlainObject(names2) ){ if( !isPojo1 && !isPojo2 ){ return false; }
// iterate over all the languages in item1, comparing between items // if only one has name info, we consider them the same
return Object.keys(names1).some( lang => { // note: this really shouldn't happen as name is a mandatory field
if( !isPojo1 || !isPojo2 ){ return false; }
// do not consider absence of an additional name as a difference // else both have name info
// but strictly enfore that 'default' must be present and match // iterate over all the languages in item1, comparing between items
if( _.has(names2, lang) || lang === 'default' ){ return Object.keys(names1).some( lang => {
// do not consider absence of an additional name as a difference // do not consider absence of an additional name as a difference
return isPropertyDifferent(names1, names2, lang); // but strictly enfore that 'default' must be present and match
} if( _.has(names2, lang) || lang === 'default' ){
});
}
// if one has name info and the other doesn't, we consider them different // do not consider absence of an additional name as a difference
return true; return isPropertyDifferent(names1, names2, lang);
}
});
} }
/** /**
@ -87,26 +91,27 @@ function isAddressDifferent(item1, item2){
let address1 = _.get(item1, 'address_parts'); let address1 = _.get(item1, 'address_parts');
let address2 = _.get(item2, 'address_parts'); let address2 = _.get(item2, 'address_parts');
// if neither object has address info, we consider them the same // check if these are plain 'ol javascript objects
if( !address1 && !address2 ){ return false; } let isPojo1 = _.isPlainObject(address1);
let isPojo2 = _.isPlainObject(address2);
// if both have address info // if neither object has address info, we consider them the same
if( _.isPlainObject(address1) && _.isPlainObject(address2) ){ if( !isPojo1 && !isPojo2 ){ return false; }
if( isPropertyDifferent(address1, address2, 'number') ){ return true; } // if only one has address info, we consider them the same
if( isPropertyDifferent(address1, address2, 'street') ){ return true; } if( !isPojo1 || !isPojo2 ){ return false; }
// only compare zip if both records have it, otherwise just ignore and assume it's the same // else both have address info
// since by this time we've already compared parent hierarchies if( isPropertyDifferent(address1, address2, 'number') ){ return true; }
if( _.has(address1, 'zip') && _.has(address2, 'zip') ){ if( isPropertyDifferent(address1, address2, 'street') ){ return true; }
if( isPropertyDifferent(address1, address2, 'zip') ){ return true; }
}
return false; // only compare zip if both records have it, otherwise just ignore and assume it's the same
// since by this time we've already compared parent hierarchies
if( _.has(address1, 'zip') && _.has(address2, 'zip') ){
if( isPropertyDifferent(address1, address2, 'zip') ){ return true; }
} }
// one has address and the other doesn't, different! return false;
return true;
} }
/** /**

6
test/unit/middleware/dedupe.js

@ -1,7 +1,7 @@
var data = require('../fixture/dedupe_elasticsearch_results'); var data = require('../fixture/dedupe_elasticsearch_results');
var nonAsciiData = require('../fixture/dedupe_elasticsearch_nonascii_results'); var nonAsciiData = require('../fixture/dedupe_elasticsearch_nonascii_results');
var customLayerData = require('../fixture/dedupe_elasticsearch_custom_layer_results'); var customLayerData = require('../fixture/dedupe_elasticsearch_custom_layer_results');
var onlyPostalcodeDiffers = require('../fixture/dedupe_only_postalcode_differs'); var onlyPostalcodeDiffersData = require('../fixture/dedupe_only_postalcode_differs');
var dedupe = require('../../../middleware/dedupe')(); var dedupe = require('../../../middleware/dedupe')();
module.exports.tests = {}; module.exports.tests = {};
@ -84,9 +84,9 @@ module.exports.tests.dedupe = function(test, common) {
} }
}; };
var res = { var res = {
data: onlyPostalcodeDiffers data: onlyPostalcodeDiffersData
}; };
var expected = onlyPostalcodeDiffers[1]; // non-canonical record var expected = onlyPostalcodeDiffersData[1]; // record with postcode
dedupe(req, res, function () { dedupe(req, res, function () {
t.equal(res.data.length, 1, 'only one result displayed'); t.equal(res.data.length, 1, 'only one result displayed');

Loading…
Cancel
Save