Browse Source

added support for `boundary.rect` and `boundary.circle`

- tigthened up parsing of string-ish numbers to deal with lodash weirdness
- added lots of guard clauses to deal with bad data
pull/850/head
Stephen Hess 8 years ago
parent
commit
c5f16e32ad
  1. 171
      controller/placeholder.js
  2. 647
      test/unit/controller/placeholder.js

171
controller/placeholder.js

@ -1,11 +1,45 @@
const _ = require('lodash');
const logger = require('pelias-logger').get('api');
const Document = require('pelias-model').Document;
const geolib = require('geolib');
// composition of toNumber and isFinite, useful for single call to convert a value
// to a number, then checking to see if it's finite
const isFiniteNumber = _.flow(_.toNumber, _.isFinite);
const isNonNegativeFiniteNumber = _.flow(_.toNumber, (val) => { return val >= 0; });
function isFiniteNumber(value) {
return !_.isEmpty(_.trim(value)) && _.isFinite(_.toNumber(value));
}
// returns true if value is parseable as finite non-negative number
function isNonNegativeFiniteNumber(value) {
return isFiniteNumber(value) && _.gte(value, 0);
}
function hasLatLon(result) {
return _.isFinite(_.get(result.geom, 'lat')) && _.isFinite(_.get(result.geom, 'lon'));
}
function getLatLon(result) {
return {
latitude: result.geom.lat,
longitude: result.geom.lon
};
}
// if geom.lat/lon are parseable as finite numbers, convert to a finite number
// otherwise remove the field
function numberifyGeomLatLon(result) {
['lat', 'lon'].forEach((f) => {
if (isFiniteNumber(_.get(result.geom, f))) {
result.geom[f] = _.toFinite(result.geom[f]);
} else {
// result.geom may not exist, so use unset instead of delete
_.unset(result.geom, f);
}
});
return result;
}
// returns true if all 4 ,-delimited (max) substrings are parseable as finite numbers
// '12.12,21.21,13.13,31.31' returns true
@ -15,7 +49,7 @@ const isNonNegativeFiniteNumber = _.flow(_.toNumber, (val) => { return val >= 0;
// '12.12,NaN,13.13,31.31' returns false
// '12.12,Infinity,13.13,31.31' returns false
function is4CommaDelimitedNumbers(bbox) {
return bbox.
return _.defaultTo(bbox, '').
split(',').
filter(isFiniteNumber).length === 4;
}
@ -24,6 +58,20 @@ function hasName(result) {
return !_.isEmpty(_.trim(result.name));
}
// filter that passes only results that match on requested layers
// passes everything if req.clean.layers is not found
function getLayersFilter(clean) {
if (_.isEmpty(_.get(clean, 'layers', []))) {
return _.constant(true);
}
// otherwise return a function that checks for set inclusion of a result placetype
return (result) => {
return _.includes(clean.layers, result.placetype);
};
}
// return true if the hierarchy does not have a country.abbr
// OR hierarchy country.abbr matches boundary.country
function matchesBoundaryCountry(boundaryCountry, hierarchy) {
@ -33,15 +81,80 @@ function matchesBoundaryCountry(boundaryCountry, hierarchy) {
// return true if the result does not have a lineage
// OR at least one lineage matches the requested boundary.country
function atLeastOneLineageMatchesBoundaryCountry(boundaryCountry, result) {
return !result.lineage || result.lineage.some(_.curry(matchesBoundaryCountry)(boundaryCountry));
return !result.lineage || result.lineage.some(_.partial(matchesBoundaryCountry, boundaryCountry));
}
// return a function that detects if a result has at least one lineage in boundary.country
// if there's no boundary.country, return a function that always returns true
function getBoundaryCountryFilter(clean) {
if (_.has(clean, 'boundary.country')) {
return _.partial(atLeastOneLineageMatchesBoundaryCountry, clean['boundary.country']);
}
return _.constant(true);
}
// return a function that detects if a result is inside a bbox if a bbox is available
// if there's no bbox, return a function that always returns true
function getBoundaryRectangleFilter(clean) {
if (['min_lat', 'min_lon', 'max_lat', 'max_lon'].every((f) => {
return _.has(clean, `boundary.rect.${f}`);
})) {
const polygon = [
{ latitude: clean['boundary.rect.min_lat'], longitude: clean['boundary.rect.min_lon'] },
{ latitude: clean['boundary.rect.max_lat'], longitude: clean['boundary.rect.min_lon'] },
{ latitude: clean['boundary.rect.max_lat'], longitude: clean['boundary.rect.max_lon'] },
{ latitude: clean['boundary.rect.min_lat'], longitude: clean['boundary.rect.max_lon'] }
];
const isPointInsidePolygon = _.partialRight(geolib.isPointInside, polygon);
return _.partial(isInsideGeometry, isPointInsidePolygon);
}
return _.constant(true);
}
// return a function that detects if a result is inside a circle if a circle is available
// if there's no circle, return a function that always returns true
function getBoundaryCircleFilter(clean) {
if (['lat', 'lon', 'radius'].every((f) => {
return _.has(clean, `boundary.circle.${f}`);
})) {
const center = {
latitude: clean['boundary.circle.lat'],
longitude: clean['boundary.circle.lon']
};
const radiusInMeters = clean['boundary.circle.radius'] * 1000;
const isPointInCircle = _.partialRight(geolib.isPointInCircle, center, radiusInMeters);
return _.partial(isInsideGeometry, isPointInCircle);
}
return _.constant(true);
}
// helper that calls an "is inside some geometry" function
function isInsideGeometry(f, result) {
return hasLatLon(result) ? f(getLatLon(result)) : false;
}
function placetypeHasNameAndId(hierarchyElement) {
return !_.isEmpty(_.trim(hierarchyElement.name)) &&
!_.isEmpty(_.trim(hierarchyElement.id));
}
// synthesize an ES doc from a placeholder result
function synthesizeDocs(boundaryCountry, result) {
const doc = new Document('whosonfirst', result.placetype, result.id.toString());
doc.setName('default', result.name);
// only assign centroid if both lat and lon are finite numbers
if (_.conformsTo(result.geom, { 'lat': isFiniteNumber, 'lon': isFiniteNumber } )) {
if (hasLatLon(result)) {
doc.setCentroid( { lat: result.geom.lat, lon: result.geom.lon } );
} else {
logger.error(`could not parse centroid for id ${result.id}`);
@ -49,7 +162,7 @@ function synthesizeDocs(boundaryCountry, result) {
// lodash conformsTo verifies that an object has a property with a certain format
if (_.conformsTo(result.geom, { 'bbox': is4CommaDelimitedNumbers } )) {
const parsedBoundingBox = result.geom.bbox.split(',').map(_.toNumber);
const parsedBoundingBox = result.geom.bbox.split(',').map(_.toFinite);
doc.setBoundingBox({
upperLeft: {
lat: parsedBoundingBox[3],
@ -61,26 +174,29 @@ function synthesizeDocs(boundaryCountry, result) {
}
});
} else {
logger.error(`could not parse bbox for id ${result.id}: ${result.geom.bbox}`);
logger.error(`could not parse bbox for id ${result.id}: ${_.get(result, 'geom.bbox')}`);
}
if (_.conformsTo(result, { 'population': isNonNegativeFiniteNumber })) {
doc.setPopulation(_.toNumber(result.population));
// set population and popularity if parseable as finite number
if (isNonNegativeFiniteNumber(result.population)) {
doc.setPopulation(_.toFinite(result.population));
}
if (_.conformsTo(result, { 'popularity': isNonNegativeFiniteNumber })) {
doc.setPopularity(_.toNumber(result.popularity));
if (isNonNegativeFiniteNumber(result.popularity)) {
doc.setPopularity(_.toFinite(result.popularity));
}
_.defaultTo(result.lineage, [])
// remove all lineages that don't match an explicit boundary.country
.filter(_.curry(matchesBoundaryCountry)(boundaryCountry))
.filter(_.partial(matchesBoundaryCountry, boundaryCountry))
// add all the lineages to the doc
.map((hierarchy) => {
Object.keys(hierarchy)
.filter(doc.isSupportedParent)
.filter((placetype) => { return !_.isEmpty(_.trim(hierarchy[placetype].name)); } )
.filter((placetype) => {
return placetypeHasNameAndId(hierarchy[placetype]);
})
.forEach((placetype) => {
// console.error(JSON.stringify(hierarchy[placetype], null, 2));
doc.addParent(
placetype,
hierarchy[placetype].name,
@ -115,30 +231,27 @@ function setup(placeholderService, should_execute) {
}
} else {
// filter that passes only results that match on requested layers
// passes everything if req.clean.layers is not found
const matchesLayers = (result) => {
return _.includes(req.clean.layers, result.placetype);
};
const layersFilter = _.get(req, 'clean.layers', []).length ?
matchesLayers : _.constant(true);
// filter that passes only documents that match on boundary.country
// passed everything if req.clean['boundary.country'] is not found
const boundaryCountry = _.get(req, ['clean', 'boundary.country']);
const boundaryCountryFilter = !!boundaryCountry ?
_.curry(atLeastOneLineageMatchesBoundaryCountry)(boundaryCountry) : _.constant(true);
// convert results to ES docs
// boundary.country filter must happen after synthesis since multiple
// lineages may produce different country docs
res.meta = {};
res.data = results
// filter out results that don't have a name
.filter(hasName)
.filter(layersFilter)
// filter out results that don't match on requested layer(s)
.filter(getLayersFilter(req.clean))
// filter out results that don't match on any lineage country
.filter(boundaryCountryFilter)
.map(_.curry(synthesizeDocs)(boundaryCountry));
.filter(getBoundaryCountryFilter(req.clean))
// clean up geom.lat/lon for boundary rect/circle checks
.map(numberifyGeomLatLon)
// filter out results that aren't in the boundary.rect
.filter(getBoundaryRectangleFilter(req.clean))
// filter out results that aren't in the boundary.circle
.filter(getBoundaryCircleFilter(req.clean))
// convert results to ES docs
.map(_.partial(synthesizeDocs, boundaryCountry));
const messageParts = [
'[controller:placeholder]',

647
test/unit/controller/placeholder.js

@ -612,6 +612,376 @@ module.exports.tests.success = (test, common) => {
};
module.exports.tests.result_filtering = (test, common) => {
test('when boundary.rect is available, results outside of it should be removed', (t) => {
const logger = require('pelias-mock-logger')();
const placeholder_service = (req, callback) => {
t.deepEqual(req, {
param1: 'param1 value',
clean: {
'boundary.rect.min_lat': -2,
'boundary.rect.max_lat': 2,
'boundary.rect.min_lon': -2,
'boundary.rect.max_lon': 2
}
});
const response = [
{
// inside bbox
id: 1,
name: 'name 1',
placetype: 'neighbourhood',
geom: {
lat: -1,
lon: -1
}
},
{
// outside bbox on max_lon
id: 2,
name: 'name 2',
placetype: 'neighbourhood',
geom: {
lat: -1,
lon: 3
}
},
{
// outside bbox on max_lat
id: 3,
name: 'name 3',
placetype: 'neighbourhood',
geom: {
lat: 3,
lon: -1
}
},
{
// outside bbox on min_lon
id: 4,
name: 'name 4',
placetype: 'neighbourhood',
geom: {
lat: -1,
lon: -3
}
},
{
// outside bbox on min_lat
id: 5,
name: 'name 5',
placetype: 'neighbourhood',
geom: {
lat: -3,
lon: -1
}
},
{
// no lat/lon
id: 6,
name: 'name 6',
placetype: 'neighbourhood',
geom: {
}
},
{
// empty string lat/lon
id: 7,
name: 'name 7',
placetype: 'neighbourhood',
geom: {
lat: '',
lon: ''
}
},
{
// valid lat, empty string lon
id: 8,
name: 'name 8',
placetype: 'neighbourhood',
geom: {
lat: 0,
lon: ' '
}
},
{
// valid lon, empty string lat
id: 9,
name: 'name 9',
placetype: 'neighbourhood',
geom: {
lat: ' ',
lon: 0
}
},
{
// inside bbox
id: 10,
name: 'name 10',
placetype: 'neighbourhood',
geom: {
lat: 1,
lon: 1
}
}
];
callback(null, response);
};
const should_execute = (req, res) => {
return true;
};
const controller = proxyquire('../../../controller/placeholder', {
'pelias-logger': logger
})(placeholder_service, _.constant(true));
const req = {
param1: 'param1 value',
clean: {
'boundary.rect.min_lat': -2,
'boundary.rect.max_lat': 2,
'boundary.rect.min_lon': -2,
'boundary.rect.max_lon': 2
}
};
const res = { };
controller(req, res, () => {
const expected_res = {
meta: {},
data: [
{
_id: '1',
_type: 'neighbourhood',
layer: 'neighbourhood',
source: 'whosonfirst',
source_id: '1',
center_point: {
lat: -1,
lon: -1
},
name: {
'default': 'name 1'
},
phrase: {
'default': 'name 1'
},
parent: { }
},
{
_id: '10',
_type: 'neighbourhood',
layer: 'neighbourhood',
source: 'whosonfirst',
source_id: '10',
center_point: {
lat: 1,
lon: 1
},
name: {
'default': 'name 10'
},
phrase: {
'default': 'name 10'
},
parent: { }
}
]
};
t.deepEquals(res, expected_res);
t.end();
});
});
test('when boundary.circle is available, results outside of it should be removed', (t) => {
const logger = require('pelias-mock-logger')();
const placeholder_service = (req, callback) => {
t.deepEqual(req, {
param1: 'param1 value',
clean: {
'boundary.circle.lat': 0,
'boundary.circle.lon': 0,
'boundary.circle.radius': 500
}
});
const response = [
{
// inside circle
id: 1,
name: 'name 1',
placetype: 'neighbourhood',
geom: {
lat: 1,
lon: 1
}
},
{
// outside circle on +lon
id: 2,
name: 'name 2',
placetype: 'neighbourhood',
geom: {
lat: 0,
lon: 45
}
},
{
// outside bbox on +lat
id: 3,
name: 'name 3',
placetype: 'neighbourhood',
geom: {
lat: 45,
lon: 0
}
},
{
// outside bbox on -lon
id: 4,
name: 'name 4',
placetype: 'neighbourhood',
geom: {
lat: 0,
lon: -45
}
},
{
// outside bbox on -lat
id: 5,
name: 'name 5',
placetype: 'neighbourhood',
geom: {
lat: -45,
lon: 0
}
},
{
// no lat/lon
id: 6,
name: 'name 6',
placetype: 'neighbourhood',
geom: {
}
},
{
// empty string lat/lon
id: 7,
name: 'name 7',
placetype: 'neighbourhood',
geom: {
lat: '',
lon: ''
}
},
{
// valid lat, empty string lon
id: 8,
name: 'name 8',
placetype: 'neighbourhood',
geom: {
lat: 0,
lon: ' '
}
},
{
// valid lon, empty string lat
id: 9,
name: 'name 9',
placetype: 'neighbourhood',
geom: {
lat: ' ',
lon: 0
}
},
{
// inside circle
id: 10,
name: 'name 10',
placetype: 'neighbourhood',
geom: {
lat: -1,
lon: -1
}
}
];
callback(null, response);
};
const should_execute = (req, res) => {
return true;
};
const controller = proxyquire('../../../controller/placeholder', {
'pelias-logger': logger
})(placeholder_service, _.constant(true));
const req = {
param1: 'param1 value',
clean: {
'boundary.circle.lat': 0,
'boundary.circle.lon': 0,
'boundary.circle.radius': 500
}
};
const res = { };
controller(req, res, () => {
const expected_res = {
meta: {},
data: [
{
_id: '1',
_type: 'neighbourhood',
layer: 'neighbourhood',
source: 'whosonfirst',
source_id: '1',
center_point: {
lat: 1,
lon: 1
},
name: {
'default': 'name 1'
},
phrase: {
'default': 'name 1'
},
parent: { }
},
{
_id: '10',
_type: 'neighbourhood',
layer: 'neighbourhood',
source: 'whosonfirst',
source_id: '10',
center_point: {
lat: -1,
lon: -1
},
name: {
'default': 'name 10'
},
phrase: {
'default': 'name 10'
},
parent: { }
}
]
};
t.deepEquals(res, expected_res);
t.end();
});
});
test('only results matching explicit layers should be returned', (t) => {
const logger = mock_logger();
@ -910,7 +1280,280 @@ module.exports.tests.result_filtering = (test, common) => {
};
module.exports.centroid_errors = (test, common) => {
module.exports.tests.lineage_errors = (test, common) => {
test('unsupported lineage placetypes should be ignored', (t) => {
const logger = mock_logger();
const placeholder_service = (req, callback) => {
t.deepEqual(req, { param1: 'param1 value' });
const response = [
{
id: 123,
name: 'name 1',
placetype: 'neighbourhood',
lineage: [
{
country: {
id: 1,
name: 'country name 1',
abbr: 'country abbr 1'
},
unknown: {
id: 2,
name: 'unknown name 2',
abbr: 'unknown abbr 2'
}
}
],
geom: {
area: 12.34
}
}
];
callback(null, response);
};
const controller = proxyquire('../../../controller/placeholder', {
'pelias-logger': logger
})(placeholder_service, _.constant(true));
const req = { param1: 'param1 value' };
const res = { };
controller(req, res, () => {
const expected_res = {
meta: {},
data: [
{
_id: '123',
_type: 'neighbourhood',
layer: 'neighbourhood',
source: 'whosonfirst',
source_id: '123',
name: {
'default': 'name 1'
},
phrase: {
'default': 'name 1'
},
parent: {
country: ['country name 1'],
country_id: ['1'],
country_a: ['country abbr 1']
}
}
]
};
t.deepEquals(res, expected_res);
t.end();
});
});
test('lineage placetypes lacking names should be ignored', (t) => {
const logger = mock_logger();
const placeholder_service = (req, callback) => {
t.deepEqual(req, { param1: 'param1 value' });
const response = [
{
id: 123,
name: 'name 1',
placetype: 'neighbourhood',
lineage: [
{
country: {
id: 1,
name: 'country name 1',
abbr: 'country abbr 1'
},
region: {
id: 2,
abbr: 'region abbr 2'
}
}
],
geom: {
area: 12.34
}
}
];
callback(null, response);
};
const controller = proxyquire('../../../controller/placeholder', {
'pelias-logger': logger
})(placeholder_service, _.constant(true));
const req = { param1: 'param1 value' };
const res = { };
controller(req, res, () => {
const expected_res = {
meta: {},
data: [
{
_id: '123',
_type: 'neighbourhood',
layer: 'neighbourhood',
source: 'whosonfirst',
source_id: '123',
name: {
'default': 'name 1'
},
phrase: {
'default': 'name 1'
},
parent: {
country: ['country name 1'],
country_id: ['1'],
country_a: ['country abbr 1']
}
}
]
};
t.deepEquals(res, expected_res);
t.end();
});
});
test('lineage placetypes lacking ids should be ignored', (t) => {
const logger = mock_logger();
const placeholder_service = (req, callback) => {
t.deepEqual(req, { param1: 'param1 value' });
const response = [
{
id: 123,
name: 'name 1',
placetype: 'neighbourhood',
lineage: [
{
country: {
id: 1,
name: 'country name 1',
abbr: 'country abbr 1'
},
region: {
name: 'region name 2',
abbr: 'region abbr 2'
}
}
],
geom: {
area: 12.34
}
}
];
callback(null, response);
};
const controller = proxyquire('../../../controller/placeholder', {
'pelias-logger': logger
})(placeholder_service, _.constant(true));
const req = { param1: 'param1 value' };
const res = { };
controller(req, res, () => {
const expected_res = {
meta: {},
data: [
{
_id: '123',
_type: 'neighbourhood',
layer: 'neighbourhood',
source: 'whosonfirst',
source_id: '123',
name: {
'default': 'name 1'
},
phrase: {
'default': 'name 1'
},
parent: {
country: ['country name 1'],
country_id: ['1'],
country_a: ['country abbr 1']
}
}
]
};
t.deepEquals(res, expected_res);
t.end();
});
});
};
module.exports.tests.geometry_errors = (test, common) => {
test('result without geometry should not cause problems', (t) => {
const logger = mock_logger();
const placeholder_service = (req, callback) => {
t.deepEqual(req, { param1: 'param1 value' });
const response = [
{
id: 123,
name: 'name 1',
placetype: 'neighbourhood'
}
];
callback(null, response);
};
const controller = proxyquire('../../../controller/placeholder', {
'pelias-logger': logger
})(placeholder_service, _.constant(true));
const req = { param1: 'param1 value' };
const res = { };
controller(req, res, () => {
const expected_res = {
meta: {},
data: [
{
_id: '123',
_type: 'neighbourhood',
layer: 'neighbourhood',
source: 'whosonfirst',
source_id: '123',
name: {
'default': 'name 1'
},
phrase: {
'default': 'name 1'
},
parent: {}
}
]
};
t.deepEquals(res, expected_res);
t.end();
});
});
};
module.exports.tests.centroid_errors = (test, common) => {
test('result without geom.lat/geom.lon should leave centroid undefined', (t) => {
const logger = require('pelias-mock-logger')();
@ -1029,7 +1672,7 @@ module.exports.centroid_errors = (test, common) => {
};
module.exports.boundingbox_errors = (test, common) => {
module.exports.tests.boundingbox_errors = (test, common) => {
test('result with invalid geom.bbox should leave bounding_box undefined and log error', (t) => {
[
undefined,

Loading…
Cancel
Save