Browse Source

Add simple normalizer (lowercase + remove punctuation)

pull/376/head
Diana Shkolnikov 9 years ago
parent
commit
42d940f8c8
  1. 2
      controller/search.js
  2. 29
      middleware/dedupe.js
  3. 25
      test/unit/fixture/dedupe_elasticsearch_results.js

2
controller/search.js

@ -16,7 +16,7 @@ function setup( backend, query ){
}
// log clean parameters for stats
logger.info(req.clean);
logger.info('[req]', 'endpoint=' + req.path, req.clean);
// backend command
var cmd = {

29
middleware/dedupe.js

@ -1,5 +1,4 @@
var util = require('util');
var logger = require('pelias-logger').get('api:middle:dedupe');
var logger = require('pelias-logger').get('api');
var _ = require('lodash');
function setup() {
@ -45,8 +44,8 @@ function isDifferent(item1, item2) {
if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) {
propMatch(item1.name, item2.name, 'default');
}
else if (item1.name !== item2.name) {
throw 'different';
else {
propMatch(item1, item2, 'name');
}
if (item1.hasOwnProperty('address') && item2.hasOwnProperty('address')) {
@ -68,15 +67,29 @@ function isDifferent(item1, item2) {
/**
* Throw exception if properties are different
*
* @param item1
* @param item2
* @param prop
* @param {object} item1
* @param {object} item2
* @param {string} prop
* @throws {string}
*/
function propMatch(item1, item2, prop) {
if (item1[prop] !== item2[prop]) {
if (normalizeString(item1[prop]) !== normalizeString(item2[prop])) {
throw 'different';
}
}
/**
* Remove punctuation and lowercase
*
* @param {string} str
* @returns {string}
*/
function normalizeString(str) {
if (!str) {
return '';
}
return _.words(str.toLowerCase()).join(' ');
}
module.exports = setup;

25
test/unit/fixture/dedupe_elasticsearch_results.js

@ -24,6 +24,31 @@ module.exports = [
'_score': 1.2367082,
'confidence': 0.879
},
{
'center_point': {
'lon': -76.207456,
'lat': 40.039265
},
'address': {},
'local_admin': 'East Lampeter',
'admin1_abbr': 'PA',
'name': {
'default': 'East Lampeter, High-School'
},
'admin1': 'Pennsylvania',
'locality': 'Smoketown',
'alpha3': 'USA',
'admin2': 'Lancaster County',
'admin0': 'United States',
'neighborhood': 'Greenland',
'category': [
'education'
],
'_id': '357321757',
'_type': 'osmnode',
'_score': 1.2367082,
'confidence': 0.879
},
{
'center_point': {
'lon': -76.23246,

Loading…
Cancel
Save