Browse Source

feat(dedupe): improved handling of cases where "name", "parent" or "address_parts" propertiees are not set

pull/1222/head
missinglink 6 years ago committed by Julian Simioni
parent
commit
b2069606f2
No known key found for this signature in database
GPG Key ID: B9EEB0C6EE0910A1
  1. 93
      helper/diffPlaces.js
  2. 6
      test/unit/middleware/dedupe.js

93
helper/diffPlaces.js

@ -27,25 +27,27 @@ function isParentHierarchyDifferent(item1, item2){
let parent1 = _.get(item1, 'parent');
let parent2 = _.get(item2, 'parent');
// if neither object has parent info, we consider them the same
if( !parent1 && !parent2 ){ return false; }
// check if these are plain 'ol javascript objects
let isPojo1 = _.isPlainObject(parent1);
let isPojo2 = _.isPlainObject(parent2);
// both have parent info
if( _.isPlainObject(parent1) && _.isPlainObject(parent2) ){
// if neither object has parent info, we consider them the same
if( !isPojo1 && !isPojo2 ){ return false; }
// iterate over all the placetypes, comparing between items
return placeTypes.some( placeType => {
// if only one has parent info, we consider them the same
// note: this really shouldn't happen as at least on parent should exist
if( !isPojo1 || !isPojo2 ){ return false; }
// skip the parent field corresponding to the item placetype
if( placeType === item1.layer ){ return false; }
// else both have parent info
// iterate over all the placetypes, comparing between items
return placeTypes.some( placeType => {
// ensure the parent ids are the same for all placetypes
return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' );
});
}
// skip the parent field corresponding to the item placetype
if( placeType === item1.layer ){ return false; }
// if one has parent info and the other doesn't, we consider them different
return true;
// ensure the parent ids are the same for all placetypes
return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' );
});
}
/**
@ -56,27 +58,29 @@ function isNameDifferent(item1, item2){
let names1 = _.get(item1, 'name');
let names2 = _.get(item2, 'name');
// if neither object has name info, we consider them the same
if( !names1 && !names2 ){ return false; }
// check if these are plain 'ol javascript objects
let isPojo1 = _.isPlainObject(names1);
let isPojo2 = _.isPlainObject(names2);
// if both have name info
if( _.isPlainObject(names1) && _.isPlainObject(names2) ){
// if neither object has name info, we consider them the same
if( !isPojo1 && !isPojo2 ){ return false; }
// iterate over all the languages in item1, comparing between items
return Object.keys(names1).some( lang => {
// if only one has name info, we consider them the same
// note: this really shouldn't happen as name is a mandatory field
if( !isPojo1 || !isPojo2 ){ return false; }
// do not consider absence of an additional name as a difference
// but strictly enfore that 'default' must be present and match
if( _.has(names2, lang) || lang === 'default' ){
// else both have name info
// iterate over all the languages in item1, comparing between items
return Object.keys(names1).some( lang => {
// do not consider absence of an additional name as a difference
return isPropertyDifferent(names1, names2, lang);
}
});
}
// do not consider absence of an additional name as a difference
// but strictly enfore that 'default' must be present and match
if( _.has(names2, lang) || lang === 'default' ){
// if one has name info and the other doesn't, we consider them different
return true;
// do not consider absence of an additional name as a difference
return isPropertyDifferent(names1, names2, lang);
}
});
}
/**
@ -87,26 +91,27 @@ function isAddressDifferent(item1, item2){
let address1 = _.get(item1, 'address_parts');
let address2 = _.get(item2, 'address_parts');
// if neither object has address info, we consider them the same
if( !address1 && !address2 ){ return false; }
// check if these are plain 'ol javascript objects
let isPojo1 = _.isPlainObject(address1);
let isPojo2 = _.isPlainObject(address2);
// if both have address info
if( _.isPlainObject(address1) && _.isPlainObject(address2) ){
// if neither object has address info, we consider them the same
if( !isPojo1 && !isPojo2 ){ return false; }
if( isPropertyDifferent(address1, address2, 'number') ){ return true; }
if( isPropertyDifferent(address1, address2, 'street') ){ return true; }
// if only one has address info, we consider them the same
if( !isPojo1 || !isPojo2 ){ return false; }
// only compare zip if both records have it, otherwise just ignore and assume it's the same
// since by this time we've already compared parent hierarchies
if( _.has(address1, 'zip') && _.has(address2, 'zip') ){
if( isPropertyDifferent(address1, address2, 'zip') ){ return true; }
}
// else both have address info
if( isPropertyDifferent(address1, address2, 'number') ){ return true; }
if( isPropertyDifferent(address1, address2, 'street') ){ return true; }
return false;
// only compare zip if both records have it, otherwise just ignore and assume it's the same
// since by this time we've already compared parent hierarchies
if( _.has(address1, 'zip') && _.has(address2, 'zip') ){
if( isPropertyDifferent(address1, address2, 'zip') ){ return true; }
}
// one has address and the other doesn't, different!
return true;
return false;
}
/**

6
test/unit/middleware/dedupe.js

@ -1,7 +1,7 @@
var data = require('../fixture/dedupe_elasticsearch_results');
var nonAsciiData = require('../fixture/dedupe_elasticsearch_nonascii_results');
var customLayerData = require('../fixture/dedupe_elasticsearch_custom_layer_results');
var onlyPostalcodeDiffers = require('../fixture/dedupe_only_postalcode_differs');
var onlyPostalcodeDiffersData = require('../fixture/dedupe_only_postalcode_differs');
var dedupe = require('../../../middleware/dedupe')();
module.exports.tests = {};
@ -84,9 +84,9 @@ module.exports.tests.dedupe = function(test, common) {
}
};
var res = {
data: onlyPostalcodeDiffers
data: onlyPostalcodeDiffersData
};
var expected = onlyPostalcodeDiffers[1]; // non-canonical record
var expected = onlyPostalcodeDiffersData[1]; // record with postcode
dedupe(req, res, function () {
t.equal(res.data.length, 1, 'only one result displayed');

Loading…
Cancel
Save