From b2069606f20ea03940d6ebe08a8f81f4a571ddaa Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 30 Oct 2018 15:59:38 +0100 Subject: [PATCH] feat(dedupe): improved handling of cases where "name", "parent" or "address_parts" propertiees are not set --- helper/diffPlaces.js | 93 ++++++++++++++++++---------------- test/unit/middleware/dedupe.js | 6 +-- 2 files changed, 52 insertions(+), 47 deletions(-) diff --git a/helper/diffPlaces.js b/helper/diffPlaces.js index 0f327bfb..92b9ea51 100644 --- a/helper/diffPlaces.js +++ b/helper/diffPlaces.js @@ -27,25 +27,27 @@ function isParentHierarchyDifferent(item1, item2){ let parent1 = _.get(item1, 'parent'); let parent2 = _.get(item2, 'parent'); - // if neither object has parent info, we consider them the same - if( !parent1 && !parent2 ){ return false; } + // check if these are plain 'ol javascript objects + let isPojo1 = _.isPlainObject(parent1); + let isPojo2 = _.isPlainObject(parent2); - // both have parent info - if( _.isPlainObject(parent1) && _.isPlainObject(parent2) ){ + // if neither object has parent info, we consider them the same + if( !isPojo1 && !isPojo2 ){ return false; } - // iterate over all the placetypes, comparing between items - return placeTypes.some( placeType => { + // if only one has parent info, we consider them the same + // note: this really shouldn't happen as at least on parent should exist + if( !isPojo1 || !isPojo2 ){ return false; } - // skip the parent field corresponding to the item placetype - if( placeType === item1.layer ){ return false; } + // else both have parent info + // iterate over all the placetypes, comparing between items + return placeTypes.some( placeType => { - // ensure the parent ids are the same for all placetypes - return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' ); - }); - } + // skip the parent field corresponding to the item placetype + if( placeType === item1.layer ){ return false; } - // if one has parent info and the other doesn't, we consider them different - return true; + // ensure the parent ids are the same for all placetypes + return isPropertyDifferent( item1.parent, item2.parent, placeType + '_id' ); + }); } /** @@ -56,27 +58,29 @@ function isNameDifferent(item1, item2){ let names1 = _.get(item1, 'name'); let names2 = _.get(item2, 'name'); - // if neither object has name info, we consider them the same - if( !names1 && !names2 ){ return false; } + // check if these are plain 'ol javascript objects + let isPojo1 = _.isPlainObject(names1); + let isPojo2 = _.isPlainObject(names2); - // if both have name info - if( _.isPlainObject(names1) && _.isPlainObject(names2) ){ + // if neither object has name info, we consider them the same + if( !isPojo1 && !isPojo2 ){ return false; } - // iterate over all the languages in item1, comparing between items - return Object.keys(names1).some( lang => { + // if only one has name info, we consider them the same + // note: this really shouldn't happen as name is a mandatory field + if( !isPojo1 || !isPojo2 ){ return false; } - // do not consider absence of an additional name as a difference - // but strictly enfore that 'default' must be present and match - if( _.has(names2, lang) || lang === 'default' ){ + // else both have name info + // iterate over all the languages in item1, comparing between items + return Object.keys(names1).some( lang => { - // do not consider absence of an additional name as a difference - return isPropertyDifferent(names1, names2, lang); - } - }); - } + // do not consider absence of an additional name as a difference + // but strictly enfore that 'default' must be present and match + if( _.has(names2, lang) || lang === 'default' ){ - // if one has name info and the other doesn't, we consider them different - return true; + // do not consider absence of an additional name as a difference + return isPropertyDifferent(names1, names2, lang); + } + }); } /** @@ -87,26 +91,27 @@ function isAddressDifferent(item1, item2){ let address1 = _.get(item1, 'address_parts'); let address2 = _.get(item2, 'address_parts'); - // if neither object has address info, we consider them the same - if( !address1 && !address2 ){ return false; } + // check if these are plain 'ol javascript objects + let isPojo1 = _.isPlainObject(address1); + let isPojo2 = _.isPlainObject(address2); - // if both have address info - if( _.isPlainObject(address1) && _.isPlainObject(address2) ){ + // if neither object has address info, we consider them the same + if( !isPojo1 && !isPojo2 ){ return false; } - if( isPropertyDifferent(address1, address2, 'number') ){ return true; } - if( isPropertyDifferent(address1, address2, 'street') ){ return true; } + // if only one has address info, we consider them the same + if( !isPojo1 || !isPojo2 ){ return false; } - // only compare zip if both records have it, otherwise just ignore and assume it's the same - // since by this time we've already compared parent hierarchies - if( _.has(address1, 'zip') && _.has(address2, 'zip') ){ - if( isPropertyDifferent(address1, address2, 'zip') ){ return true; } - } + // else both have address info + if( isPropertyDifferent(address1, address2, 'number') ){ return true; } + if( isPropertyDifferent(address1, address2, 'street') ){ return true; } - return false; + // only compare zip if both records have it, otherwise just ignore and assume it's the same + // since by this time we've already compared parent hierarchies + if( _.has(address1, 'zip') && _.has(address2, 'zip') ){ + if( isPropertyDifferent(address1, address2, 'zip') ){ return true; } } - // one has address and the other doesn't, different! - return true; + return false; } /** diff --git a/test/unit/middleware/dedupe.js b/test/unit/middleware/dedupe.js index b1b6e9ef..63cb3373 100644 --- a/test/unit/middleware/dedupe.js +++ b/test/unit/middleware/dedupe.js @@ -1,7 +1,7 @@ var data = require('../fixture/dedupe_elasticsearch_results'); var nonAsciiData = require('../fixture/dedupe_elasticsearch_nonascii_results'); var customLayerData = require('../fixture/dedupe_elasticsearch_custom_layer_results'); -var onlyPostalcodeDiffers = require('../fixture/dedupe_only_postalcode_differs'); +var onlyPostalcodeDiffersData = require('../fixture/dedupe_only_postalcode_differs'); var dedupe = require('../../../middleware/dedupe')(); module.exports.tests = {}; @@ -84,9 +84,9 @@ module.exports.tests.dedupe = function(test, common) { } }; var res = { - data: onlyPostalcodeDiffers + data: onlyPostalcodeDiffersData }; - var expected = onlyPostalcodeDiffers[1]; // non-canonical record + var expected = onlyPostalcodeDiffersData[1]; // record with postcode dedupe(req, res, function () { t.equal(res.data.length, 1, 'only one result displayed');