Browse Source

feat(dedupe): consider the user agent language in deduplication

dedupe
Peter Johnson 6 years ago
parent
commit
8f3a778aec
  1. 13
      helper/diffPlaces.js
  2. 2
      middleware/dedupe.js
  3. 36
      test/unit/helper/diffPlaces.js

13
helper/diffPlaces.js

@ -54,7 +54,7 @@ function isParentHierarchyDifferent(item1, item2){
* Compare the name properties if they exist. * Compare the name properties if they exist.
* Returns false if the objects are the same, else true. * Returns false if the objects are the same, else true.
*/ */
function isNameDifferent(item1, item2){ function isNameDifferent(item1, item2, requestLanguage){
let names1 = _.get(item1, 'name'); let names1 = _.get(item1, 'name');
let names2 = _.get(item2, 'name'); let names2 = _.get(item2, 'name');
@ -72,15 +72,17 @@ function isNameDifferent(item1, item2){
// else both have name info // else both have name info
// iterate over all the languages in item2, comparing them to the // iterate over all the languages in item2, comparing them to the
// 'default' name of item1 // 'default' name of item1 and also against the language requested by the user.
for( let lang in names2 ){ for( let lang in names2 ){
if( !isPropertyDifferent({[lang]: names1.default}, names2, lang) ){ return false; } if( !isPropertyDifferent({[lang]: names1.default}, names2, lang) ){ return false; }
if( requestLanguage && !isPropertyDifferent({[lang]: names1[requestLanguage]}, names2, lang) ){ return false; }
} }
// iterate over all the languages in item1, comparing them to the // iterate over all the languages in item1, comparing them to the
// 'default' name of item2 // 'default' name of item2 and also against the language requested by the user.
for( let lang in names1 ){ for( let lang in names1 ){
if( !isPropertyDifferent({[lang]: names2.default}, names1, lang) ){ return false; } if( !isPropertyDifferent({[lang]: names2.default}, names1, lang) ){ return false; }
if( requestLanguage && !isPropertyDifferent({[lang]: names2[requestLanguage]}, names1, lang) ){ return false; }
} }
return true; return true;
@ -119,11 +121,12 @@ function isAddressDifferent(item1, item2){
/** /**
* Compare the two records and return true if they differ and false if same. * Compare the two records and return true if they differ and false if same.
* Optionally provide $requestLanguage (req.clean.lang.iso6393) to improve name deduplication.
*/ */
function isDifferent(item1, item2){ function isDifferent(item1, item2, requestLanguage){
if( isLayerDifferent( item1, item2 ) ){ return true; } if( isLayerDifferent( item1, item2 ) ){ return true; }
if( isParentHierarchyDifferent( item1, item2 ) ){ return true; } if( isParentHierarchyDifferent( item1, item2 ) ){ return true; }
if( isNameDifferent( item1, item2 ) ){ return true; } if( isNameDifferent( item1, item2, requestLanguage ) ){ return true; }
if( isAddressDifferent( item1, item2 ) ){ return true; } if( isAddressDifferent( item1, item2 ) ){ return true; }
return false; return false;
} }

2
middleware/dedupe.js

@ -17,7 +17,7 @@ function dedupeResults(req, res, next) {
let unique = [ res.data[0] ]; let unique = [ res.data[0] ];
// convenience function to search unique array for an existing element which matches a hit // convenience function to search unique array for an existing element which matches a hit
let findMatch = (hit) => unique.findIndex(elem => !isDifferent(elem, hit)); let findMatch = (hit) => unique.findIndex(elem => !isDifferent(elem, hit, _.get(req, 'clean.lang.iso6393') ));
// iterate over res.data using an old-school for loop starting at index 1 // iterate over res.data using an old-school for loop starting at index 1
// we can call break at any time to end the iterator // we can call break at any time to end the iterator

36
test/unit/helper/diffPlaces.js

@ -150,6 +150,42 @@ module.exports.tests.dedupe = function(test, common) {
t.end(); t.end();
}); });
test('improved matching across languages - if default different, but user language matches default, consider this a match', function(t) {
var item1 = {
'name': {
'default': 'English Name',
'eng': 'A Name'
}
};
var item2 = {
'name': {
'default': 'A Name'
}
};
t.false(isDifferent(item1, item2, 'eng'), 'should be the same');
t.end();
});
test('improved matching across languages - if default different, but user language matches (fra), consider this a match', function(t) {
var item1 = {
'name': {
'default': 'Name',
'fra': 'French Name'
}
};
var item2 = {
'name': {
'default': 'Another Name',
'fra': 'French Name'
}
};
t.false(isDifferent(item1, item2, 'fra'), 'should be the same');
t.end();
});
test('improved matching across languages - default names differ but match another language', function(t) { test('improved matching across languages - default names differ but match another language', function(t) {
var item1 = { var item1 = {
'name': { 'name': {

Loading…
Cancel
Save