From c5f16e32ad7264bf012ee8705a0c69d689b0bcdc Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Fri, 19 May 2017 17:41:03 -0400 Subject: [PATCH] added support for `boundary.rect` and `boundary.circle` - tigthened up parsing of string-ish numbers to deal with lodash weirdness - added lots of guard clauses to deal with bad data --- controller/placeholder.js | 171 ++++++-- test/unit/controller/placeholder.js | 647 +++++++++++++++++++++++++++- 2 files changed, 787 insertions(+), 31 deletions(-) diff --git a/controller/placeholder.js b/controller/placeholder.js index ab47b326..f6ca3d04 100644 --- a/controller/placeholder.js +++ b/controller/placeholder.js @@ -1,11 +1,45 @@ const _ = require('lodash'); const logger = require('pelias-logger').get('api'); const Document = require('pelias-model').Document; +const geolib = require('geolib'); // composition of toNumber and isFinite, useful for single call to convert a value // to a number, then checking to see if it's finite -const isFiniteNumber = _.flow(_.toNumber, _.isFinite); -const isNonNegativeFiniteNumber = _.flow(_.toNumber, (val) => { return val >= 0; }); +function isFiniteNumber(value) { + return !_.isEmpty(_.trim(value)) && _.isFinite(_.toNumber(value)); +} + +// returns true if value is parseable as finite non-negative number +function isNonNegativeFiniteNumber(value) { + return isFiniteNumber(value) && _.gte(value, 0); +} + +function hasLatLon(result) { + return _.isFinite(_.get(result.geom, 'lat')) && _.isFinite(_.get(result.geom, 'lon')); +} + +function getLatLon(result) { + return { + latitude: result.geom.lat, + longitude: result.geom.lon + }; +} + +// if geom.lat/lon are parseable as finite numbers, convert to a finite number +// otherwise remove the field +function numberifyGeomLatLon(result) { + ['lat', 'lon'].forEach((f) => { + if (isFiniteNumber(_.get(result.geom, f))) { + result.geom[f] = _.toFinite(result.geom[f]); + } else { + // result.geom may not exist, so use unset instead of delete + _.unset(result.geom, f); + } + }); + + return result; + +} // returns true if all 4 ,-delimited (max) substrings are parseable as finite numbers // '12.12,21.21,13.13,31.31' returns true @@ -15,7 +49,7 @@ const isNonNegativeFiniteNumber = _.flow(_.toNumber, (val) => { return val >= 0; // '12.12,NaN,13.13,31.31' returns false // '12.12,Infinity,13.13,31.31' returns false function is4CommaDelimitedNumbers(bbox) { - return bbox. + return _.defaultTo(bbox, ''). split(','). filter(isFiniteNumber).length === 4; } @@ -24,6 +58,20 @@ function hasName(result) { return !_.isEmpty(_.trim(result.name)); } +// filter that passes only results that match on requested layers +// passes everything if req.clean.layers is not found +function getLayersFilter(clean) { + if (_.isEmpty(_.get(clean, 'layers', []))) { + return _.constant(true); + } + + // otherwise return a function that checks for set inclusion of a result placetype + return (result) => { + return _.includes(clean.layers, result.placetype); + }; + +} + // return true if the hierarchy does not have a country.abbr // OR hierarchy country.abbr matches boundary.country function matchesBoundaryCountry(boundaryCountry, hierarchy) { @@ -33,15 +81,80 @@ function matchesBoundaryCountry(boundaryCountry, hierarchy) { // return true if the result does not have a lineage // OR at least one lineage matches the requested boundary.country function atLeastOneLineageMatchesBoundaryCountry(boundaryCountry, result) { - return !result.lineage || result.lineage.some(_.curry(matchesBoundaryCountry)(boundaryCountry)); + return !result.lineage || result.lineage.some(_.partial(matchesBoundaryCountry, boundaryCountry)); } +// return a function that detects if a result has at least one lineage in boundary.country +// if there's no boundary.country, return a function that always returns true +function getBoundaryCountryFilter(clean) { + if (_.has(clean, 'boundary.country')) { + return _.partial(atLeastOneLineageMatchesBoundaryCountry, clean['boundary.country']); + } + + return _.constant(true); + +} + +// return a function that detects if a result is inside a bbox if a bbox is available +// if there's no bbox, return a function that always returns true +function getBoundaryRectangleFilter(clean) { + if (['min_lat', 'min_lon', 'max_lat', 'max_lon'].every((f) => { + return _.has(clean, `boundary.rect.${f}`); + })) { + const polygon = [ + { latitude: clean['boundary.rect.min_lat'], longitude: clean['boundary.rect.min_lon'] }, + { latitude: clean['boundary.rect.max_lat'], longitude: clean['boundary.rect.min_lon'] }, + { latitude: clean['boundary.rect.max_lat'], longitude: clean['boundary.rect.max_lon'] }, + { latitude: clean['boundary.rect.min_lat'], longitude: clean['boundary.rect.max_lon'] } + ]; + const isPointInsidePolygon = _.partialRight(geolib.isPointInside, polygon); + + return _.partial(isInsideGeometry, isPointInsidePolygon); + + } + + return _.constant(true); + +} + +// return a function that detects if a result is inside a circle if a circle is available +// if there's no circle, return a function that always returns true +function getBoundaryCircleFilter(clean) { + if (['lat', 'lon', 'radius'].every((f) => { + return _.has(clean, `boundary.circle.${f}`); + })) { + const center = { + latitude: clean['boundary.circle.lat'], + longitude: clean['boundary.circle.lon'] + }; + const radiusInMeters = clean['boundary.circle.radius'] * 1000; + const isPointInCircle = _.partialRight(geolib.isPointInCircle, center, radiusInMeters); + + return _.partial(isInsideGeometry, isPointInCircle); + + } + + return _.constant(true); + +} + +// helper that calls an "is inside some geometry" function +function isInsideGeometry(f, result) { + return hasLatLon(result) ? f(getLatLon(result)) : false; +} + +function placetypeHasNameAndId(hierarchyElement) { + return !_.isEmpty(_.trim(hierarchyElement.name)) && + !_.isEmpty(_.trim(hierarchyElement.id)); +} + +// synthesize an ES doc from a placeholder result function synthesizeDocs(boundaryCountry, result) { const doc = new Document('whosonfirst', result.placetype, result.id.toString()); doc.setName('default', result.name); // only assign centroid if both lat and lon are finite numbers - if (_.conformsTo(result.geom, { 'lat': isFiniteNumber, 'lon': isFiniteNumber } )) { + if (hasLatLon(result)) { doc.setCentroid( { lat: result.geom.lat, lon: result.geom.lon } ); } else { logger.error(`could not parse centroid for id ${result.id}`); @@ -49,7 +162,7 @@ function synthesizeDocs(boundaryCountry, result) { // lodash conformsTo verifies that an object has a property with a certain format if (_.conformsTo(result.geom, { 'bbox': is4CommaDelimitedNumbers } )) { - const parsedBoundingBox = result.geom.bbox.split(',').map(_.toNumber); + const parsedBoundingBox = result.geom.bbox.split(',').map(_.toFinite); doc.setBoundingBox({ upperLeft: { lat: parsedBoundingBox[3], @@ -61,26 +174,29 @@ function synthesizeDocs(boundaryCountry, result) { } }); } else { - logger.error(`could not parse bbox for id ${result.id}: ${result.geom.bbox}`); + logger.error(`could not parse bbox for id ${result.id}: ${_.get(result, 'geom.bbox')}`); } - if (_.conformsTo(result, { 'population': isNonNegativeFiniteNumber })) { - doc.setPopulation(_.toNumber(result.population)); + // set population and popularity if parseable as finite number + if (isNonNegativeFiniteNumber(result.population)) { + doc.setPopulation(_.toFinite(result.population)); } - - if (_.conformsTo(result, { 'popularity': isNonNegativeFiniteNumber })) { - doc.setPopularity(_.toNumber(result.popularity)); + if (isNonNegativeFiniteNumber(result.popularity)) { + doc.setPopularity(_.toFinite(result.popularity)); } _.defaultTo(result.lineage, []) // remove all lineages that don't match an explicit boundary.country - .filter(_.curry(matchesBoundaryCountry)(boundaryCountry)) + .filter(_.partial(matchesBoundaryCountry, boundaryCountry)) // add all the lineages to the doc .map((hierarchy) => { Object.keys(hierarchy) .filter(doc.isSupportedParent) - .filter((placetype) => { return !_.isEmpty(_.trim(hierarchy[placetype].name)); } ) + .filter((placetype) => { + return placetypeHasNameAndId(hierarchy[placetype]); + }) .forEach((placetype) => { + // console.error(JSON.stringify(hierarchy[placetype], null, 2)); doc.addParent( placetype, hierarchy[placetype].name, @@ -115,30 +231,27 @@ function setup(placeholderService, should_execute) { } } else { - // filter that passes only results that match on requested layers - // passes everything if req.clean.layers is not found - const matchesLayers = (result) => { - return _.includes(req.clean.layers, result.placetype); - }; - const layersFilter = _.get(req, 'clean.layers', []).length ? - matchesLayers : _.constant(true); - - // filter that passes only documents that match on boundary.country - // passed everything if req.clean['boundary.country'] is not found const boundaryCountry = _.get(req, ['clean', 'boundary.country']); - const boundaryCountryFilter = !!boundaryCountry ? - _.curry(atLeastOneLineageMatchesBoundaryCountry)(boundaryCountry) : _.constant(true); // convert results to ES docs // boundary.country filter must happen after synthesis since multiple // lineages may produce different country docs res.meta = {}; res.data = results + // filter out results that don't have a name .filter(hasName) - .filter(layersFilter) + // filter out results that don't match on requested layer(s) + .filter(getLayersFilter(req.clean)) // filter out results that don't match on any lineage country - .filter(boundaryCountryFilter) - .map(_.curry(synthesizeDocs)(boundaryCountry)); + .filter(getBoundaryCountryFilter(req.clean)) + // clean up geom.lat/lon for boundary rect/circle checks + .map(numberifyGeomLatLon) + // filter out results that aren't in the boundary.rect + .filter(getBoundaryRectangleFilter(req.clean)) + // filter out results that aren't in the boundary.circle + .filter(getBoundaryCircleFilter(req.clean)) + // convert results to ES docs + .map(_.partial(synthesizeDocs, boundaryCountry)); const messageParts = [ '[controller:placeholder]', diff --git a/test/unit/controller/placeholder.js b/test/unit/controller/placeholder.js index eab37439..86ac0803 100644 --- a/test/unit/controller/placeholder.js +++ b/test/unit/controller/placeholder.js @@ -612,6 +612,376 @@ module.exports.tests.success = (test, common) => { }; module.exports.tests.result_filtering = (test, common) => { + test('when boundary.rect is available, results outside of it should be removed', (t) => { + const logger = require('pelias-mock-logger')(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { + param1: 'param1 value', + clean: { + 'boundary.rect.min_lat': -2, + 'boundary.rect.max_lat': 2, + 'boundary.rect.min_lon': -2, + 'boundary.rect.max_lon': 2 + } + }); + + const response = [ + { + // inside bbox + id: 1, + name: 'name 1', + placetype: 'neighbourhood', + geom: { + lat: -1, + lon: -1 + } + }, + { + // outside bbox on max_lon + id: 2, + name: 'name 2', + placetype: 'neighbourhood', + geom: { + lat: -1, + lon: 3 + } + }, + { + // outside bbox on max_lat + id: 3, + name: 'name 3', + placetype: 'neighbourhood', + geom: { + lat: 3, + lon: -1 + } + }, + { + // outside bbox on min_lon + id: 4, + name: 'name 4', + placetype: 'neighbourhood', + geom: { + lat: -1, + lon: -3 + } + }, + { + // outside bbox on min_lat + id: 5, + name: 'name 5', + placetype: 'neighbourhood', + geom: { + lat: -3, + lon: -1 + } + }, + { + // no lat/lon + id: 6, + name: 'name 6', + placetype: 'neighbourhood', + geom: { + } + }, + { + // empty string lat/lon + id: 7, + name: 'name 7', + placetype: 'neighbourhood', + geom: { + lat: '', + lon: '' + } + }, + { + // valid lat, empty string lon + id: 8, + name: 'name 8', + placetype: 'neighbourhood', + geom: { + lat: 0, + lon: ' ' + } + }, + { + // valid lon, empty string lat + id: 9, + name: 'name 9', + placetype: 'neighbourhood', + geom: { + lat: ' ', + lon: 0 + } + }, + { + // inside bbox + id: 10, + name: 'name 10', + placetype: 'neighbourhood', + geom: { + lat: 1, + lon: 1 + } + } + ]; + + callback(null, response); + }; + + const should_execute = (req, res) => { + return true; + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, _.constant(true)); + + const req = { + param1: 'param1 value', + clean: { + 'boundary.rect.min_lat': -2, + 'boundary.rect.max_lat': 2, + 'boundary.rect.min_lon': -2, + 'boundary.rect.max_lon': 2 + } + }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: {}, + data: [ + { + _id: '1', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '1', + center_point: { + lat: -1, + lon: -1 + }, + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + }, + parent: { } + }, + { + _id: '10', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '10', + center_point: { + lat: 1, + lon: 1 + }, + name: { + 'default': 'name 10' + }, + phrase: { + 'default': 'name 10' + }, + parent: { } + } + ] + }; + + t.deepEquals(res, expected_res); + t.end(); + }); + + }); + + test('when boundary.circle is available, results outside of it should be removed', (t) => { + const logger = require('pelias-mock-logger')(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { + param1: 'param1 value', + clean: { + 'boundary.circle.lat': 0, + 'boundary.circle.lon': 0, + 'boundary.circle.radius': 500 + } + }); + + const response = [ + { + // inside circle + id: 1, + name: 'name 1', + placetype: 'neighbourhood', + geom: { + lat: 1, + lon: 1 + } + }, + { + // outside circle on +lon + id: 2, + name: 'name 2', + placetype: 'neighbourhood', + geom: { + lat: 0, + lon: 45 + } + }, + { + // outside bbox on +lat + id: 3, + name: 'name 3', + placetype: 'neighbourhood', + geom: { + lat: 45, + lon: 0 + } + }, + { + // outside bbox on -lon + id: 4, + name: 'name 4', + placetype: 'neighbourhood', + geom: { + lat: 0, + lon: -45 + } + }, + { + // outside bbox on -lat + id: 5, + name: 'name 5', + placetype: 'neighbourhood', + geom: { + lat: -45, + lon: 0 + } + }, + { + // no lat/lon + id: 6, + name: 'name 6', + placetype: 'neighbourhood', + geom: { + } + }, + { + // empty string lat/lon + id: 7, + name: 'name 7', + placetype: 'neighbourhood', + geom: { + lat: '', + lon: '' + } + }, + { + // valid lat, empty string lon + id: 8, + name: 'name 8', + placetype: 'neighbourhood', + geom: { + lat: 0, + lon: ' ' + } + }, + { + // valid lon, empty string lat + id: 9, + name: 'name 9', + placetype: 'neighbourhood', + geom: { + lat: ' ', + lon: 0 + } + }, + { + // inside circle + id: 10, + name: 'name 10', + placetype: 'neighbourhood', + geom: { + lat: -1, + lon: -1 + } + } + ]; + + callback(null, response); + }; + + const should_execute = (req, res) => { + return true; + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, _.constant(true)); + + const req = { + param1: 'param1 value', + clean: { + 'boundary.circle.lat': 0, + 'boundary.circle.lon': 0, + 'boundary.circle.radius': 500 + } + }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: {}, + data: [ + { + _id: '1', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '1', + center_point: { + lat: 1, + lon: 1 + }, + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + }, + parent: { } + }, + { + _id: '10', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '10', + center_point: { + lat: -1, + lon: -1 + }, + name: { + 'default': 'name 10' + }, + phrase: { + 'default': 'name 10' + }, + parent: { } + } + ] + }; + + t.deepEquals(res, expected_res); + t.end(); + }); + + }); + test('only results matching explicit layers should be returned', (t) => { const logger = mock_logger(); @@ -910,7 +1280,280 @@ module.exports.tests.result_filtering = (test, common) => { }; -module.exports.centroid_errors = (test, common) => { +module.exports.tests.lineage_errors = (test, common) => { + test('unsupported lineage placetypes should be ignored', (t) => { + const logger = mock_logger(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { param1: 'param1 value' }); + + const response = [ + { + id: 123, + name: 'name 1', + placetype: 'neighbourhood', + lineage: [ + { + country: { + id: 1, + name: 'country name 1', + abbr: 'country abbr 1' + }, + unknown: { + id: 2, + name: 'unknown name 2', + abbr: 'unknown abbr 2' + } + + } + ], + geom: { + area: 12.34 + } + } + ]; + + callback(null, response); + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, _.constant(true)); + + const req = { param1: 'param1 value' }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: {}, + data: [ + { + _id: '123', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '123', + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + }, + parent: { + country: ['country name 1'], + country_id: ['1'], + country_a: ['country abbr 1'] + } + } + ] + }; + + t.deepEquals(res, expected_res); + t.end(); + }); + + }); + + test('lineage placetypes lacking names should be ignored', (t) => { + const logger = mock_logger(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { param1: 'param1 value' }); + + const response = [ + { + id: 123, + name: 'name 1', + placetype: 'neighbourhood', + lineage: [ + { + country: { + id: 1, + name: 'country name 1', + abbr: 'country abbr 1' + }, + region: { + id: 2, + abbr: 'region abbr 2' + } + + } + ], + geom: { + area: 12.34 + } + } + ]; + + callback(null, response); + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, _.constant(true)); + + const req = { param1: 'param1 value' }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: {}, + data: [ + { + _id: '123', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '123', + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + }, + parent: { + country: ['country name 1'], + country_id: ['1'], + country_a: ['country abbr 1'] + } + } + ] + }; + + t.deepEquals(res, expected_res); + t.end(); + }); + + }); + + test('lineage placetypes lacking ids should be ignored', (t) => { + const logger = mock_logger(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { param1: 'param1 value' }); + + const response = [ + { + id: 123, + name: 'name 1', + placetype: 'neighbourhood', + lineage: [ + { + country: { + id: 1, + name: 'country name 1', + abbr: 'country abbr 1' + }, + region: { + name: 'region name 2', + abbr: 'region abbr 2' + } + } + ], + geom: { + area: 12.34 + } + } + ]; + + callback(null, response); + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, _.constant(true)); + + const req = { param1: 'param1 value' }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: {}, + data: [ + { + _id: '123', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '123', + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + }, + parent: { + country: ['country name 1'], + country_id: ['1'], + country_a: ['country abbr 1'] + } + } + ] + }; + + t.deepEquals(res, expected_res); + t.end(); + }); + + }); + +}; + +module.exports.tests.geometry_errors = (test, common) => { + test('result without geometry should not cause problems', (t) => { + const logger = mock_logger(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { param1: 'param1 value' }); + + const response = [ + { + id: 123, + name: 'name 1', + placetype: 'neighbourhood' + } + ]; + + callback(null, response); + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, _.constant(true)); + + const req = { param1: 'param1 value' }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: {}, + data: [ + { + _id: '123', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '123', + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + }, + parent: {} + } + ] + }; + + t.deepEquals(res, expected_res); + t.end(); + }); + + }); + +}; + +module.exports.tests.centroid_errors = (test, common) => { test('result without geom.lat/geom.lon should leave centroid undefined', (t) => { const logger = require('pelias-mock-logger')(); @@ -1029,7 +1672,7 @@ module.exports.centroid_errors = (test, common) => { }; -module.exports.boundingbox_errors = (test, common) => { +module.exports.tests.boundingbox_errors = (test, common) => { test('result with invalid geom.bbox should leave bounding_box undefined and log error', (t) => { [ undefined,