From 8f3a778aece94db5f382ad44afdb7340a090136c Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Wed, 31 Oct 2018 14:05:37 +0100 Subject: [PATCH] feat(dedupe): consider the user agent language in deduplication --- helper/diffPlaces.js | 13 +++++++----- middleware/dedupe.js | 2 +- test/unit/helper/diffPlaces.js | 36 ++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/helper/diffPlaces.js b/helper/diffPlaces.js index d9b93478..7a646851 100644 --- a/helper/diffPlaces.js +++ b/helper/diffPlaces.js @@ -54,7 +54,7 @@ function isParentHierarchyDifferent(item1, item2){ * Compare the name properties if they exist. * Returns false if the objects are the same, else true. */ -function isNameDifferent(item1, item2){ +function isNameDifferent(item1, item2, requestLanguage){ let names1 = _.get(item1, 'name'); let names2 = _.get(item2, 'name'); @@ -72,15 +72,17 @@ function isNameDifferent(item1, item2){ // else both have name info // iterate over all the languages in item2, comparing them to the - // 'default' name of item1 + // 'default' name of item1 and also against the language requested by the user. for( let lang in names2 ){ if( !isPropertyDifferent({[lang]: names1.default}, names2, lang) ){ return false; } + if( requestLanguage && !isPropertyDifferent({[lang]: names1[requestLanguage]}, names2, lang) ){ return false; } } // iterate over all the languages in item1, comparing them to the - // 'default' name of item2 + // 'default' name of item2 and also against the language requested by the user. for( let lang in names1 ){ if( !isPropertyDifferent({[lang]: names2.default}, names1, lang) ){ return false; } + if( requestLanguage && !isPropertyDifferent({[lang]: names2[requestLanguage]}, names1, lang) ){ return false; } } return true; @@ -119,11 +121,12 @@ function isAddressDifferent(item1, item2){ /** * Compare the two records and return true if they differ and false if same. + * Optionally provide $requestLanguage (req.clean.lang.iso6393) to improve name deduplication. */ -function isDifferent(item1, item2){ +function isDifferent(item1, item2, requestLanguage){ if( isLayerDifferent( item1, item2 ) ){ return true; } if( isParentHierarchyDifferent( item1, item2 ) ){ return true; } - if( isNameDifferent( item1, item2 ) ){ return true; } + if( isNameDifferent( item1, item2, requestLanguage ) ){ return true; } if( isAddressDifferent( item1, item2 ) ){ return true; } return false; } diff --git a/middleware/dedupe.js b/middleware/dedupe.js index 18054335..c15b6026 100644 --- a/middleware/dedupe.js +++ b/middleware/dedupe.js @@ -17,7 +17,7 @@ function dedupeResults(req, res, next) { let unique = [ res.data[0] ]; // convenience function to search unique array for an existing element which matches a hit - let findMatch = (hit) => unique.findIndex(elem => !isDifferent(elem, hit)); + let findMatch = (hit) => unique.findIndex(elem => !isDifferent(elem, hit, _.get(req, 'clean.lang.iso6393') )); // iterate over res.data using an old-school for loop starting at index 1 // we can call break at any time to end the iterator diff --git a/test/unit/helper/diffPlaces.js b/test/unit/helper/diffPlaces.js index 495fab02..a476f784 100644 --- a/test/unit/helper/diffPlaces.js +++ b/test/unit/helper/diffPlaces.js @@ -150,6 +150,42 @@ module.exports.tests.dedupe = function(test, common) { t.end(); }); + test('improved matching across languages - if default different, but user language matches default, consider this a match', function(t) { + var item1 = { + 'name': { + 'default': 'English Name', + 'eng': 'A Name' + } + }; + var item2 = { + 'name': { + 'default': 'A Name' + } + }; + + t.false(isDifferent(item1, item2, 'eng'), 'should be the same'); + t.end(); + }); + + + test('improved matching across languages - if default different, but user language matches (fra), consider this a match', function(t) { + var item1 = { + 'name': { + 'default': 'Name', + 'fra': 'French Name' + } + }; + var item2 = { + 'name': { + 'default': 'Another Name', + 'fra': 'French Name' + } + }; + + t.false(isDifferent(item1, item2, 'fra'), 'should be the same'); + t.end(); + }); + test('improved matching across languages - default names differ but match another language', function(t) { var item1 = { 'name': {