mirror of https://github.com/pelias/api.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
122 lines
3.0 KiB
122 lines
3.0 KiB
var logger = require('pelias-logger').get('api'); |
|
var _ = require('lodash'); |
|
|
|
function setup() { |
|
return dedupeResults; |
|
} |
|
|
|
function dedupeResults(req, res, next) { |
|
// do nothing if no result data set |
|
if (_.isUndefined(req.clean) || _.isUndefined(res) || _.isUndefined(res.data)) { |
|
return next(); |
|
} |
|
|
|
// loop through data items and only copy unique items to uniqueResults |
|
var uniqueResults = []; |
|
|
|
_.some(res.data, function (hit) { |
|
if (uniqueResults.length === 0 || _.every(uniqueResults, isDifferent.bind(null, hit)) ) { |
|
uniqueResults.push(hit); |
|
} |
|
else { |
|
logger.info('[dupe]', { query: req.clean.text, hit: hit.name.default }); |
|
} |
|
|
|
// stop looping when requested size has been reached in uniqueResults |
|
return req.clean.size <= uniqueResults.length; |
|
}); |
|
|
|
res.data = uniqueResults; |
|
|
|
next(); |
|
} |
|
|
|
/** |
|
* @param {object} item1 |
|
* @param {object} item2 |
|
* @returns {boolean} |
|
* @throws {Error} |
|
*/ |
|
function isDifferent(item1, item2) { |
|
try { |
|
if (item1.hasOwnProperty('parent') && item2.hasOwnProperty('parent')) { |
|
propMatch(item1.parent, item2.parent, 'region_a'); |
|
propMatch(item1.parent, item2.parent, 'country'); |
|
propMatch(item1.parent, item2.parent, 'locality'); |
|
propMatch(item1.parent, item2.parent, 'neighborhood'); |
|
} |
|
else if (item1.parent !== item2.parent) { |
|
throw new Error('different'); |
|
} |
|
|
|
if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) { |
|
for (var lang in item1.name) { |
|
propMatch(item1.name, item2.name, lang); |
|
} |
|
} |
|
else { |
|
propMatch(item1, item2, 'name'); |
|
} |
|
|
|
if (item1.hasOwnProperty('address') && item2.hasOwnProperty('address')) { |
|
propMatch(item1.address, item2.address, 'number'); |
|
propMatch(item1.address, item2.address, 'street'); |
|
propMatch(item1.address, item2.address, 'zip'); |
|
} |
|
else if (item1.address !== item2.address) { |
|
throw new Error('different'); |
|
} |
|
} |
|
catch (err) { |
|
if (err.message === 'different') { |
|
return true; |
|
} |
|
throw err; |
|
} |
|
|
|
return false; |
|
} |
|
|
|
/** |
|
* Throw exception if properties are different |
|
* |
|
* @param {object} item1 |
|
* @param {object} item2 |
|
* @param {string} prop |
|
* @throws {Error} |
|
*/ |
|
function propMatch(item1, item2, prop) { |
|
var prop1 = item1[prop]; |
|
var prop2 = item2[prop]; |
|
|
|
// in the case the property is an array (currently only in parent schema) |
|
// simply take the 1st item. this will change in the near future to support multiple hierarchies |
|
if (_.isArray(prop1) && _.isArray(prop2)) { |
|
prop1 = prop1[0]; |
|
prop2= prop2[0]; |
|
} |
|
|
|
if (!prop1 || !prop2) { |
|
return; // absence of information does not make an item different |
|
} |
|
|
|
if (normalizeString(prop1) !== normalizeString(prop2)) { |
|
throw new Error('different'); |
|
} |
|
} |
|
|
|
/** |
|
* Remove punctuation and lowercase |
|
* |
|
* @param {string} str |
|
* @returns {string} |
|
*/ |
|
function normalizeString(str) { |
|
if (!str) { |
|
return ''; |
|
} |
|
return str.toLowerCase().split(/[ ,-]+/).join(' '); |
|
} |
|
|
|
|
|
module.exports = setup;
|
|
|