diff --git a/helper/diffPlaces.js b/helper/diffPlaces.js new file mode 100644 index 00000000..e6a48af5 --- /dev/null +++ b/helper/diffPlaces.js @@ -0,0 +1,172 @@ +var _ = require('lodash'); +var placeTypes = require('../helper/placeTypes'); + +/** + * Compare the layer properties if they exist. + * Returns false if the objects are the same, and throws + * an exception with the message 'different' if not. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function isDiffLayer(item1, item2) { + if (item1.layer === item2.layer) { + return false; + } + + throw new Error('different'); +} + +/** + * Compare the parent.* properties if they exist. + * Returns false if the objects are the same, and throws + * an exception with the message 'different' if not. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function isDiffParentHierarchy(item1, item2) { + // if neither object has parent, assume same + if (!item1.hasOwnProperty('parent') && !item2.hasOwnProperty('parent')) { + return false; + } + + // if both have parent, do the rest of the checking + if (item1.hasOwnProperty('parent') && item2.hasOwnProperty('parent')) { + placeTypes.forEach(function (placeType) { + // don't consider its own id + if (placeType === item1.layer) { + return; + } + propMatch(item1.parent, item2.parent, placeType + '_id'); + }); + return false; + } + + // if one has parent and the other doesn't consider different + throw new Error('different'); +} + +/** + * Compare the name.* properties if they exist. + * Returns false if the objects are the same, and throws + * an exception with the message 'different' if not. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function isDiffName(item1, item2) { + if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) { + for (var lang in item1.name) { + if(item2.name[lang] || lang === 'default') { + // do not consider absence of an additional name as a difference + propMatch(item1.name, item2.name, lang); + } + } + } + else { + propMatch(item1, item2, 'name'); + } +} + +/** + * Compare the address_parts properties if they exist. + * Returns false if the objects are the same, and throws + * an exception with the message 'different' if not. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function isDiffAddress(item1, item2) { + // if neither record has address, assume same + if (!item1.hasOwnProperty('address_parts') && !item2.hasOwnProperty('address_parts')) { + return false; + } + + // if both have address, check parts + if (item1.hasOwnProperty('address_parts') && item2.hasOwnProperty('address_parts')) { + propMatch(item1.address_parts, item2.address_parts, 'number'); + propMatch(item1.address_parts, item2.address_parts, 'street'); + + // only compare zip if both records have it, otherwise just ignore and assume it's the same + // since by this time we've already compared parent hierarchies + if (item1.address_parts.hasOwnProperty('zip') && item2.address_parts.hasOwnProperty('zip')) { + propMatch(item1.address_parts, item2.address_parts, 'zip'); + } + + return false; + } + + // one has address and the other doesn't, different! + throw new Error('different'); +} + +/** + * Compare the two records and return true if they differ and false if same. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function isDifferent(item1, item2) { + try { + isDiffLayer(item1, item2); + isDiffParentHierarchy(item1, item2); + isDiffName(item1, item2); + isDiffAddress(item1, item2); + } + catch (err) { + if (err.message === 'different') { + return true; + } + throw err; + } + + return false; +} + +/** + * Throw exception if properties are different + * + * @param {object} item1 + * @param {object} item2 + * @param {string} prop + * @throws {Error} + */ +function propMatch(item1, item2, prop) { + var prop1 = item1[prop]; + var prop2 = item2[prop]; + + // in the case the property is an array (currently only in parent schema) + // simply take the 1st item. this will change in the near future to support multiple hierarchies + if (_.isArray(prop1)) { prop1 = prop1[0]; } + if (_.isArray(prop2)) { prop2 = prop2[0]; } + + if (normalizeString(prop1) !== normalizeString(prop2)) { + throw new Error('different'); + } +} + +/** + * Remove punctuation and lowercase + * + * @param {string} str + * @returns {string} + */ +function normalizeString(str) { + if (!str) { + return ''; + } + return str.toLowerCase().split(/[ ,-]+/).join(' '); +} + +module.exports.isDifferent = isDifferent; \ No newline at end of file diff --git a/middleware/dedupe.js b/middleware/dedupe.js index 786675c4..b40f1806 100644 --- a/middleware/dedupe.js +++ b/middleware/dedupe.js @@ -1,5 +1,6 @@ var logger = require('pelias-logger').get('api'); var _ = require('lodash'); +var isDifferent = require('../helper/diffPlaces').isDifferent; function setup() { return dedupeResults; @@ -19,7 +20,7 @@ function dedupeResults(req, res, next) { uniqueResults.push(hit); } else { - logger.info('[dupe]', { query: req.clean.text, hit: hit.name.default }); + logger.info('[dupe]', { query: req.clean.text, hit: hit.name.default + ' ' + hit.source + ':' + hit._id }); } // stop looping when requested size has been reached in uniqueResults @@ -31,89 +32,4 @@ function dedupeResults(req, res, next) { next(); } -/** - * @param {object} item1 - * @param {object} item2 - * @returns {boolean} - * @throws {Error} - */ -function isDifferent(item1, item2) { - try { - if (item1.hasOwnProperty('parent') && item2.hasOwnProperty('parent')) { - propMatch(item1.parent, item2.parent, 'region_a'); - propMatch(item1.parent, item2.parent, 'country'); - propMatch(item1.parent, item2.parent, 'locality'); - propMatch(item1.parent, item2.parent, 'neighbourhood'); - } - else if (item1.parent !== item2.parent) { - throw new Error('different'); - } - - if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) { - for (var lang in item1.name) { - if(item2.name[lang] || lang === 'default') { - // do not consider absence of an additional name as a difference - propMatch(item1.name, item2.name, lang); - } - } - } - else { - propMatch(item1, item2, 'name'); - } - - if (item1.hasOwnProperty('address_parts') && item2.hasOwnProperty('address_parts')) { - propMatch(item1.address_parts, item2.address_parts, 'number'); - propMatch(item1.address_parts, item2.address_parts, 'street'); - propMatch(item1.address_parts, item2.address_parts, 'zip'); - } - else if (item1.address_parts !== item2.address_parts) { - throw new Error('different'); - } - } - catch (err) { - if (err.message === 'different') { - return true; - } - throw err; - } - - return false; -} - -/** - * Throw exception if properties are different - * - * @param {object} item1 - * @param {object} item2 - * @param {string} prop - * @throws {Error} - */ -function propMatch(item1, item2, prop) { - var prop1 = item1[prop]; - var prop2 = item2[prop]; - - // in the case the property is an array (currently only in parent schema) - // simply take the 1st item. this will change in the near future to support multiple hierarchies - if (_.isArray(prop1)) { prop1 = prop1[0]; } - if (_.isArray(prop2)) { prop2 = prop2[0]; } - - if (normalizeString(prop1) !== normalizeString(prop2)) { - throw new Error('different'); - } -} - -/** - * Remove punctuation and lowercase - * - * @param {string} str - * @returns {string} - */ -function normalizeString(str) { - if (!str) { - return ''; - } - return str.toLowerCase().split(/[ ,-]+/).join(' '); -} - - module.exports = setup; diff --git a/test/unit/fixture/dedupe_elasticsearch_results.js b/test/unit/fixture/dedupe_elasticsearch_results.js index e4780097..8b61535f 100644 --- a/test/unit/fixture/dedupe_elasticsearch_results.js +++ b/test/unit/fixture/dedupe_elasticsearch_results.js @@ -1,377 +1,1192 @@ module.exports = [ { 'center_point': { - 'lon': -76.207456, - 'lat': 40.039265 + 'lon': -76.293127, + 'lat': 40.032787 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School' + 'default': 'Hand Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'node:357289197', 'category': [ 'education' ], - '_id': '357321757', + 'layer': 'venue', + '_id': 'node:357289197', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, - { // same as above, but change the neighbourhood + { 'center_point': { - 'lon': -77.207456, - 'lat': 41.039265 + 'lon': -76.32746, + 'lat': 40.02343 }, - 'address': {}, 'parent': { - 'localadmin': 'East Lampeter', - 'region_a': 'PA', - 'region': 'Pennsylvania', - 'locality': 'Smoketown', - 'country_a': 'USA', - 'county': 'Lancaster County', - 'country': 'United States', - 'neighbourhood': 'Blueland' // ### + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [], + 'localadmin_id': [ + '404487185' + ], + 'macroregion_id': [], + 'neighbourhood_id': [], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [], + 'locality_id': [], + 'neighbourhood_a': [], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School' + 'default': 'Wheatland Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5219083', 'category': [ 'education' ], - '_id': '357321757', + 'layer': 'venue', + '_id': '5219083', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, - { // same as #1, but change the locality + { 'center_point': { - 'lon': -73.207456, - 'lat': 42.039265 + 'lon': -76.30107, + 'lat': 40.05926 }, - 'address': {}, 'parent': { - 'localadmin': 'East Lampeter', - 'region_a': 'PA', - 'region': 'Pennsylvania', - 'locality': 'Firetown', // ### - 'country_a': 'USA', - 'county': 'Lancaster County', - 'country': 'United States', - 'neighbourhood': 'Greenland' + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Rossmere' + ], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [ + '85846173' + ], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School' + 'default': 'Catholic High School Stadium' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5183465', 'category': [ - 'education' + 'entertainment', + 'recreation' ], - '_id': '357321757', + 'layer': 'venue', + '_id': '5183465', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, - { // same as #1, but with an additional name + { 'center_point': { - 'lon': -76.207456, - 'lat': 40.039265 + 'lon': -76.285474, + 'lat': 40.048535 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School', - 'alt': 'High School of East Lampeter', + 'default': 'McCaskey East High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'node:368338500', 'category': [ 'education' ], - '_id': '357321757', + 'layer': 'venue', + '_id': 'node:368338500', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.207456, - 'lat': 40.039265 + 'lon': -76.327063, + 'lat': 40.031869 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [], + 'localadmin_id': [ + '404487185' + ], + 'macroregion_id': [], + 'neighbourhood_id': [], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [], + 'locality_id': [], + 'neighbourhood_a': [], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, + 'bounding_box': '{\'min_lat\':40.031513,\'max_lat\':40.032233,\'min_lon\':-76.328429,\'max_lon\':-76.326216}', 'name': { - 'default': 'East Lampeter, High-School' + 'default': 'Wheatland Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'way:84969670', 'category': [ 'education' ], - '_id': '357321757', + 'layer': 'venue', + '_id': 'way:84969670', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.23246, - 'lat': 39.99288 + 'lon': -76.29274, + 'lat': 40.03288 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['West Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Lampeter'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Wheatland Mills'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'Lampeter-Strasburg High School' + 'default': 'Hand Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5192545', 'category': [ 'education' ], - '_id': '4559068', - '_type': 'geoname', - '_score': 1.2367082, - 'confidence': 0.879 + 'layer': 'venue', + '_id': '5192545', + '_type': 'venue', + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.20746, - 'lat': 40.03927 + 'lon': -76.28496, + 'lat': 40.04732 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School' + 'default': 'Lincoln Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5198085', 'category': [ 'education' ], - '_id': '5187980', - '_type': 'geoname', - '_score': 1.2367082, - 'confidence': 0.879 + 'layer': 'venue', + '_id': '5198085', + '_type': 'venue', + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.232457, - 'lat': 39.992877 + 'lon': -76.31857, + 'lat': 40.04204 }, - 'address_parts': {}, 'parent': { - 'region': ['Pennsylvania'], - 'locality': ['Lampeter'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Wheatland Mills'], - 'localadmin': ['West Lampeter'], - 'region_a': ['PA'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'Lampeter-Strasburg High School' + 'default': 'Reynolds Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5208101', 'category': [ 'education' ], - '_id': '357294404', + 'layer': 'venue', + '_id': '5208101', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.207456, - 'lat': 40.038987 + 'lon': -76.290392, + 'lat': 40.048281 }, - 'address_parts': {}, 'parent': { - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'], - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, + 'bounding_box': '{\'min_lat\':40.047288,\'max_lat\':40.049171,\'min_lon\':-76.291609,\'max_lon\':-76.289314}', 'name': { - 'default': 'East Lampeter School' + 'default': 'McCaskey High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'way:161088588', 'category': [ 'education' ], - '_id': '357283977', + 'layer': 'venue', + '_id': 'way:161088588', '_type': 'venue', - '_score': 1.1036991, - 'confidence': 0.664 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.20746, - 'lat': 40.03899 + 'lon': -76.29051, + 'lat': 40.04788 }, - 'address_parts': {}, 'parent': { - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'], - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter School' + 'default': 'McCaskey High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5200263', 'category': [ 'education' ], - '_id': '5187966', - '_type': 'geoname', - '_score': 1.1036991, - 'confidence': 0.664 + 'layer': 'venue', + '_id': '5200263', + '_type': 'venue', + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -94.167445, - 'lat': 38.762788 + 'lon': -76.318983, + 'lat': 40.042051 }, - 'address_parts': {}, 'parent': { - 'region': ['Missouri'], - 'locality': ['Strasburg'], - 'country_a': ['USA'], - 'county': ['Cass County'], - 'country': ['United States'], - 'localadmin': ['Polk'], - 'region_a': ['MO'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, + 'bounding_box': '{\'min_lat\':40.041542,\'max_lat\':40.042777,\'min_lon\':-76.31963,\'max_lon\':-76.318094}', 'name': { - 'default': 'Strasburg School' + 'default': 'Reynolds Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'way:34212977', 'category': [ 'education' ], - '_id': '358058986', + 'layer': 'venue', + '_id': 'way:34212977', '_type': 'venue', - '_score': 1.0492544, - 'confidence': 0.658 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -78.36317, - 'lat': 38.98445 - }, - 'address_parts': {}, - 'name': { - 'default': 'Strasburg High School' + 'lon': -76.284958, + 'lat': 40.04732 }, 'parent': { - 'region_a': ['VA'], - 'region': ['Virginia'], - 'locality': ['Strasburg'], - 'country_a': ['USA'], - 'county': ['Shenandoah County'], - 'country': ['United States'], - 'neighbourhood': ['Strasburg Junction'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] + }, + 'name': { + 'default': 'Lincoln Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'node:357330916', 'category': [ 'education' ], - '_id': '4787978', - '_type': 'geoname', - '_score': 0.9724125, - 'confidence': 0.649 + 'layer': 'venue', + '_id': 'node:357330916', + '_type': 'venue', + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -100.16516, - 'lat': 46.13427 - }, - 'address_parts': {}, - 'name': { - 'default': 'Strasburg High School' + 'lon': -76.280791, + 'lat': 40.045098 }, 'parent': { - 'localadmin': ['Strasburg'], - 'region_a': ['ND'], - 'region': ['North Dakota'], - 'locality': ['Strasburg'], - 'country_a': ['USA'], - 'county': ['Emmons County'], - 'country': ['United States'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] + }, + 'name': { + 'default': 'Lancaster Christian Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'node:357330919', 'category': [ 'education' ], - '_id': '9683163', - '_type': 'geoname', - '_score': 0.9724125, - 'confidence': 0.649 + 'layer': 'venue', + '_id': 'node:357330919', + '_type': 'venue', + '_score': 0.4432487, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -81.532392, - 'lat': 40.597578 - }, - 'address_parts': {}, - 'name': { - 'default': 'Strasburg High School' + 'lon': -76.28079, + 'lat': 40.0451 }, 'parent': { - 'localadmin': ['Franklin'], - 'region_a': ['OH'], - 'region': ['Ohio'], - 'locality': ['Strasburg'], - 'country_a': ['USA'], - 'county': ['Tuscarawas County'], - 'country': ['United States'] + 'country': [ + 'United States' + ], + 'macrocounty_a': [], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'borough_a': [], + 'borough_id': [], + 'borough': [], + 'macroregion': [], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'macrocounty': [], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [], + 'localadmin_id': [ + '404487183' + ], + 'macroregion_id': [], + 'neighbourhood_id': [], + 'country_a': [ + 'USA' + ], + 'macroregion_a': [], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [], + 'region': [ + 'Pennsylvania' + ], + 'macrocounty_id': [], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, + 'name': { + 'default': 'Lancaster Christian Junior High School' + }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5197082', 'category': [ 'education' ], - '_id': '356646971', + 'layer': 'venue', + '_id': '5197082', '_type': 'venue', - '_score': 0.9724125, - 'confidence': 0.649 + '_score': 0.4432487, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 } -]; +]; \ No newline at end of file diff --git a/test/unit/helper/diffPlaces.js b/test/unit/helper/diffPlaces.js new file mode 100644 index 00000000..a7dd692d --- /dev/null +++ b/test/unit/helper/diffPlaces.js @@ -0,0 +1,180 @@ +var isDifferent= require('../../../helper/diffPlaces').isDifferent; + +module.exports.tests = {}; + +module.exports.tests.dedupe = function(test, common) { + + test('match same object', function(t) { + var item1 = { + 'parent': { + 'country': [ 'United States' ], + 'county': [ 'Otsego County' ], + 'region_a': [ 'NY' ], + 'localadmin': [ 'Cherry Valley' ], + 'county_id': [ '102082399' ], + 'localadmin_id': [ '404522887' ], + 'country_a': [ 'USA' ], + 'region_id': [ '85688543' ], + 'locality': [ 'Cherry Valley' ], + 'locality_id': [ '85978799' ], + 'region': [ 'New York' ], + 'country_id': [ '85633793' ] + }, + 'name': { + 'default': '1 Main Street' + }, + 'address_parts': { + 'number': '1', + 'street': 'Main Street' + }, + 'layer': 'address' + }; + + t.false(isDifferent(item1, item1), 'should be the same'); + t.end(); + }); + + test('catch diff layers', function(t) { + var item1 = { 'layer': 'address' }; + var item2 = { 'layer': 'venue' }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('catch diff parent', function(t) { + var item1 = { + 'layer': 'same', + 'parent': { + 'country_id': '12345' + } + }; + var item2 = { + 'layer': 'same', + 'parent': { + 'country_id': '54321' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('catch diff name', function(t) { + var item1 = { + 'name': { + 'default': '1 Main St' + } + }; + var item2 = { + 'name': { + 'default': '1 Broad St' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('match diff capitalization in name', function(t) { + var item1 = { + 'name': { + 'default': '1 MAIN ST' + } + }; + var item2 = { + 'name': { + 'default': '1 Main St' + } + }; + + t.false(isDifferent(item1, item2), 'should be the same'); + t.end(); + }); + + test('do not handle expansions', function(t) { + // we currently don't handle expansions and abbreviations and + // this is a test waiting to be updated as soon as we fix it + + var item1 = { + 'name': { + 'default': '1 Main Street' + } + }; + var item2 = { + 'name': { + 'default': '1 Main St' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('missing names in other langs should not be a diff', function(t) { + var item1 = { + 'name': { + 'default': 'Moscow', + 'rus': 'Москва' + } + }; + var item2 = { + 'name': { + 'default': 'Moscow' + } + }; + + t.false(isDifferent(item1, item2), 'should be the same'); + t.end(); + }); + + test('catch diff address', function(t) { + var item1 = { + 'address_parts': { + 'number': '1', + 'street': 'Main Street', + 'zip': '90210' + } + }; + var item2 = { + 'address_parts': { + 'number': '2', + 'street': 'Main Street', + 'zip': '90210' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('catch diff address', function(t) { + var item1 = { + 'address_parts': { + 'number': '1', + 'street': 'Main Street', + 'zip': '90210' + } + }; + var item2 = { + 'address_parts': { + 'number': '1', + 'street': 'Main Street' + } + }; + + t.false(isDifferent(item1, item2), 'should be the same'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('[helper] diffPlaces: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/middleware/dedupe.js b/test/unit/middleware/dedupe.js index ad553f9c..b8100955 100644 --- a/test/unit/middleware/dedupe.js +++ b/test/unit/middleware/dedupe.js @@ -16,7 +16,7 @@ module.exports.tests.dedupe = function(test, common) { data: data }; - var expectedCount = 9; + var expectedCount = 8; dedupe(req, res, function () { t.equal(res.data.length, expectedCount, 'results have fewer items than before'); t.end(); diff --git a/test/unit/run.js b/test/unit/run.js index 77fb3415..a8b4b252 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -12,6 +12,7 @@ var tests = [ require('./controller/index'), require('./controller/place'), require('./controller/search'), + require('./helper/diffPlaces'), require('./helper/geojsonify'), require('./helper/labelGenerator_examples'), require('./helper/labelGenerator_default'),