Browse Source

Add simple normalizer (lowercase + remove punctuation)

pull/376/head
Diana Shkolnikov 9 years ago
parent
commit
42d940f8c8
  1. 2
      controller/search.js
  2. 29
      middleware/dedupe.js
  3. 25
      test/unit/fixture/dedupe_elasticsearch_results.js

2
controller/search.js

@ -16,7 +16,7 @@ function setup( backend, query ){
} }
// log clean parameters for stats // log clean parameters for stats
logger.info(req.clean); logger.info('[req]', 'endpoint=' + req.path, req.clean);
// backend command // backend command
var cmd = { var cmd = {

29
middleware/dedupe.js

@ -1,5 +1,4 @@
var util = require('util'); var logger = require('pelias-logger').get('api');
var logger = require('pelias-logger').get('api:middle:dedupe');
var _ = require('lodash'); var _ = require('lodash');
function setup() { function setup() {
@ -45,8 +44,8 @@ function isDifferent(item1, item2) {
if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) { if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) {
propMatch(item1.name, item2.name, 'default'); propMatch(item1.name, item2.name, 'default');
} }
else if (item1.name !== item2.name) { else {
throw 'different'; propMatch(item1, item2, 'name');
} }
if (item1.hasOwnProperty('address') && item2.hasOwnProperty('address')) { if (item1.hasOwnProperty('address') && item2.hasOwnProperty('address')) {
@ -68,15 +67,29 @@ function isDifferent(item1, item2) {
/** /**
* Throw exception if properties are different * Throw exception if properties are different
* *
* @param item1 * @param {object} item1
* @param item2 * @param {object} item2
* @param prop * @param {string} prop
* @throws {string}
*/ */
function propMatch(item1, item2, prop) { function propMatch(item1, item2, prop) {
if (item1[prop] !== item2[prop]) { if (normalizeString(item1[prop]) !== normalizeString(item2[prop])) {
throw 'different'; throw 'different';
} }
} }
/**
* Remove punctuation and lowercase
*
* @param {string} str
* @returns {string}
*/
function normalizeString(str) {
if (!str) {
return '';
}
return _.words(str.toLowerCase()).join(' ');
}
module.exports = setup; module.exports = setup;

25
test/unit/fixture/dedupe_elasticsearch_results.js

@ -24,6 +24,31 @@ module.exports = [
'_score': 1.2367082, '_score': 1.2367082,
'confidence': 0.879 'confidence': 0.879
}, },
{
'center_point': {
'lon': -76.207456,
'lat': 40.039265
},
'address': {},
'local_admin': 'East Lampeter',
'admin1_abbr': 'PA',
'name': {
'default': 'East Lampeter, High-School'
},
'admin1': 'Pennsylvania',
'locality': 'Smoketown',
'alpha3': 'USA',
'admin2': 'Lancaster County',
'admin0': 'United States',
'neighborhood': 'Greenland',
'category': [
'education'
],
'_id': '357321757',
'_type': 'osmnode',
'_score': 1.2367082,
'confidence': 0.879
},
{ {
'center_point': { 'center_point': {
'lon': -76.23246, 'lon': -76.23246,

Loading…
Cancel
Save