You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
3.1 KiB

var logger = require('pelias-logger').get('api');
var _ = require('lodash');
function setup() {
return dedupeResults;
}
function dedupeResults(req, res, next) {
// do nothing if no result data set
if (_.isUndefined(req.clean) || _.isUndefined(res) || _.isUndefined(res.data)) {
return next();
}
// loop through data items and only copy unique items to uniqueResults
var uniqueResults = [];
_.some(res.data, function (hit) {
if (uniqueResults.length === 0 || _.every(uniqueResults, isDifferent.bind(null, hit)) ) {
uniqueResults.push(hit);
}
else {
logger.info('[dupe]', { query: req.clean.text, hit: hit.name.default });
}
// stop looping when requested size has been reached in uniqueResults
return req.clean.size <= uniqueResults.length;
});
res.data = uniqueResults;
next();
}
/**
* @param {object} item1
* @param {object} item2
* @returns {boolean}
* @throws {Error}
*/
function isDifferent(item1, item2) {
try {
if (item1.hasOwnProperty('parent') && item2.hasOwnProperty('parent')) {
propMatch(item1.parent, item2.parent, 'region_a');
propMatch(item1.parent, item2.parent, 'country');
propMatch(item1.parent, item2.parent, 'locality');
propMatch(item1.parent, item2.parent, 'neighbourhood');
}
else if (item1.parent !== item2.parent) {
throw new Error('different');
}
if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) {
for (var lang in item1.name) {
if(item2.name[lang] || lang === 'default') {
// do not consider absence of an additional name as a difference
propMatch(item1.name, item2.name, lang);
}
}
}
else {
propMatch(item1, item2, 'name');
}
if (item1.hasOwnProperty('address_parts') && item2.hasOwnProperty('address_parts')) {
propMatch(item1.address_parts, item2.address_parts, 'number');
propMatch(item1.address_parts, item2.address_parts, 'street');
propMatch(item1.address_parts, item2.address_parts, 'zip');
}
else if (item1.address_parts !== item2.address_parts) {
throw new Error('different');
}
}
catch (err) {
if (err.message === 'different') {
return true;
}
throw err;
}
return false;
}
/**
* Throw exception if properties are different
*
* @param {object} item1
* @param {object} item2
* @param {string} prop
* @throws {Error}
*/
function propMatch(item1, item2, prop) {
var prop1 = item1[prop];
var prop2 = item2[prop];
// in the case the property is an array (currently only in parent schema)
// simply take the 1st item. this will change in the near future to support multiple hierarchies
if (_.isArray(prop1)) { prop1 = prop1[0]; }
if (_.isArray(prop2)) { prop2 = prop2[0]; }
if (normalizeString(prop1) !== normalizeString(prop2)) {
throw new Error('different');
}
}
/**
* Remove punctuation and lowercase
*
* @param {string} str
* @returns {string}
*/
function normalizeString(str) {
if (!str) {
return '';
}
return str.toLowerCase().split(/[ ,-]+/).join(' ');
}
module.exports = setup;