From e158c2168532caf7b8e91f070a374ec9584ce3d3 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Fri, 9 Jun 2017 14:18:27 -0400 Subject: [PATCH 01/52] added has_parsed_text_property predicate --- .../predicates/has_parsed_text_property.js | 13 +++ routes/v1.js | 10 ++- .../predicates/has_parsed_text_property.js | 82 +++++++++++++++++++ test/unit/run.js | 1 + 4 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 controller/predicates/has_parsed_text_property.js create mode 100644 test/unit/controller/predicates/has_parsed_text_property.js diff --git a/controller/predicates/has_parsed_text_property.js b/controller/predicates/has_parsed_text_property.js new file mode 100644 index 00000000..f6a51371 --- /dev/null +++ b/controller/predicates/has_parsed_text_property.js @@ -0,0 +1,13 @@ +const _ = require('lodash'); + +// returns a function that returns true if any result.layer is in any of the +// supplied layers using array intersection + +// example usage: determining if the response contains only admin results + +module.exports = (property) => { + return (request, response) => { + return _.has(request, ['clean', 'parsed_text', property]); + }; + +}; diff --git a/routes/v1.js b/routes/v1.js index 83d2fb78..51d5ac7f 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -66,6 +66,7 @@ var postProc = { }; // predicates that drive whether controller/search runs +const hasParsedTextProperty = require('../controller/predicates/has_parsed_text_property'); const hasResponseData = require('../controller/predicates/has_response_data'); const hasRequestErrors = require('../controller/predicates/has_request_errors'); const isCoarseReverse = require('../controller/predicates/is_coarse_reverse'); @@ -103,7 +104,14 @@ function addRoutes(app, peliasConfig) { ); const placeholderShouldExecute = all( - not(hasResponseDataOrRequestErrors), isPlaceholderServiceEnabled, isAdminOnlyAnalysis + not(hasResponseDataOrRequestErrors), + isPlaceholderServiceEnabled, + not( + any( + hasParsedTextProperty('venue'), + hasParsedTextProperty('category') + ) + ) ); // execute under the following conditions: diff --git a/test/unit/controller/predicates/has_parsed_text_property.js b/test/unit/controller/predicates/has_parsed_text_property.js new file mode 100644 index 00000000..a6b06be1 --- /dev/null +++ b/test/unit/controller/predicates/has_parsed_text_property.js @@ -0,0 +1,82 @@ +'use strict'; + +const _ = require('lodash'); +const has_parsed_text_property = require('../../../../controller/predicates/has_parsed_text_property'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', (t) => { + t.equal(typeof has_parsed_text_property, 'function', 'has_parsed_text_property is a function'); + t.end(); + }); +}; + +module.exports.tests.true_conditions = (test, common) => { + test('defined request.clean.parsed_text.property should return true', (t) => { + const req = { + clean: { + parsed_text: { + property: 'value' + } + } + }; + const res = {}; + + t.ok(has_parsed_text_property('property')(req, res)); + t.end(); + + }); + +}; + +module.exports.tests.false_conditions = (test, common) => { + test('undefined request should return false', (t) => { + const req = {}; + + t.notOk(has_parsed_text_property('property')(req, undefined)); + t.end(); + + }); + + test('undefined request.clean should return false', (t) => { + const req = {}; + + t.notOk(has_parsed_text_property('property')(req, undefined)); + t.end(); + + }); + + test('undefined request.clean.parsed_text should return false', (t) => { + const req = { + clean: {} + }; + + t.notOk(has_parsed_text_property('property')(req, undefined)); + t.end(); + + }); + + test('undefined request.clean.parsed_text.property should return false', (t) => { + const req = { + clean: { + parsed_text: {} + } + }; + + t.notOk(has_parsed_text_property('property')(req, undefined)); + t.end(); + + }); + +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`GET /has_parsed_text_property ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index b60f06c5..8447553b 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -16,6 +16,7 @@ var tests = [ require('./controller/place'), require('./controller/placeholder'), require('./controller/search'), + require('./controller/predicates/has_parsed_text_property'), require('./controller/predicates/has_response_data'), require('./controller/predicates/has_results_at_layers'), require('./controller/predicates/has_request_errors'), From 2363e9d7399d7161686a0a8dd1da0a31aee224d5 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Tue, 27 Jun 2017 16:10:09 -0400 Subject: [PATCH 02/52] added concatenation query building for placeholder --- service/configurations/PlaceHolder.js | 14 +++-- .../service/configurations/PlaceHolder.js | 53 +++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/service/configurations/PlaceHolder.js b/service/configurations/PlaceHolder.js index 8d27c4c0..c25f8abc 100644 --- a/service/configurations/PlaceHolder.js +++ b/service/configurations/PlaceHolder.js @@ -12,9 +12,17 @@ class PlaceHolder extends ServiceConfiguration { } getParameters(req) { - const parameters = { - text: req.clean.text - }; + const parameters = {}; + + if (_.has(req.clean.parsed_text, 'street')) { + // assemble all these fields into a space-delimited string + parameters.text = _.values(_.pick(req.clean.parsed_text, + ['neighbourhood', 'borough', 'city', 'county', 'state', 'country'])).join(' '); + + } else { + parameters.text = req.clean.text; + + } if (_.has(req.clean, 'lang.iso6393')) { parameters.lang = req.clean.lang.iso6393; diff --git a/test/unit/service/configurations/PlaceHolder.js b/test/unit/service/configurations/PlaceHolder.js index 02cedfa7..9391d413 100644 --- a/test/unit/service/configurations/PlaceHolder.js +++ b/test/unit/service/configurations/PlaceHolder.js @@ -126,6 +126,59 @@ module.exports.tests.all = (test, common) => { }); + test('existence of street in req.clean.parsed_text should assemble text parameter from admin fields', (t) => { + const configBlob = { + url: 'http://localhost:1234', + }; + + const placeholder = new PlaceHolder(configBlob); + + const req = { + clean: { + text: 'text value', + parsed_text: { + street: 'street value', + neighbourhood: 'neighbourhood value', + borough: 'borough value', + city: 'city value', + county: 'county value', + state: 'state value', + country: 'country value' + } + } + }; + + t.deepEquals(placeholder.getParameters(req), { + text: 'neighbourhood value borough value city value county value state value country value' + }); + t.end(); + + }); + + test('existence of street in req.clean.parsed_text should assemble text parameter from defined admin fields', (t) => { + const configBlob = { + url: 'http://localhost:1234', + }; + + const placeholder = new PlaceHolder(configBlob); + + const req = { + clean: { + text: 'text value', + parsed_text: { + street: 'street value', + country: 'country value' + } + } + }; + + t.deepEquals(placeholder.getParameters(req), { + text: 'country value' + }); + t.end(); + + }); + }; module.exports.all = (tape, common) => { From 855e736878d389bf9567c1466e24b52dd1e9d849 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Tue, 27 Jun 2017 16:11:06 -0400 Subject: [PATCH 03/52] dev query tag --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 86a0d727..8ae71856 100644 --- a/package.json +++ b/package.json @@ -60,7 +60,7 @@ "pelias-logger": "0.2.0", "pelias-microservice-wrapper": "1.1.3", "pelias-model": "5.0.0", - "pelias-query": "8.16.1", + "pelias-query": "pelias/query#9279d5f", "pelias-sorting": "1.0.1", "pelias-text-analyzer": "1.8.3", "predicates": "^1.0.1", From 386897c2c41f6676d3c17345b150565b1859b58d Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Tue, 27 Jun 2017 16:13:06 -0400 Subject: [PATCH 04/52] added query for address search with ids --- query/address_search_using_ids.js | 179 ++++++ test/unit/query/MockQuery.js | 27 + test/unit/query/address_search_using_ids.js | 617 ++++++++++++++++++++ 3 files changed, 823 insertions(+) create mode 100644 query/address_search_using_ids.js create mode 100644 test/unit/query/MockQuery.js create mode 100644 test/unit/query/address_search_using_ids.js diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js new file mode 100644 index 00000000..432f38bc --- /dev/null +++ b/query/address_search_using_ids.js @@ -0,0 +1,179 @@ +const peliasQuery = require('pelias-query'); +const defaults = require('./search_defaults'); +const logger = require('pelias-logger').get('api'); +const _ = require('lodash'); +const check = require('check-types'); + +//------------------------------ +// general-purpose search query +//------------------------------ +const addressUsingIdsQuery = new peliasQuery.layout.AddressesUsingIdsQuery(); + +// scoring boost +// addressUsingIdsQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); +addressUsingIdsQuery.score( peliasQuery.view.popularity_only_function ); +addressUsingIdsQuery.score( peliasQuery.view.population_only_function ); +// -------------------------------- + +// non-scoring hard filters +addressUsingIdsQuery.filter( peliasQuery.view.boundary_country ); +addressUsingIdsQuery.filter( peliasQuery.view.boundary_circle ); +addressUsingIdsQuery.filter( peliasQuery.view.boundary_rect ); +addressUsingIdsQuery.filter( peliasQuery.view.sources ); +// -------------------------------- + + +// Red Lion, PA -- parsed as locality/state, localadmin/state, and neighbourhood/state +// Chelsea -- parsed as neighbourhood, localadmin, and locality +// Manhattan -- parsed as borough, locality, and localadmin +// Luxembourg -- parsed as country, locality, and region + +// if any placeholder results are at neighbourhood, borough, locality, or localadmin layers, filter by those ids at those layers +// fallback to county +// if any placeholder results are at county or macrocounty layers, filter by those ids at those layers +// fallback to region +// if any placeholder results are at region or macroregion layers, filter by those ids at those layers +// fallback to dependency/country +// if any placeholder results are at dependency or country layers, filter by those ids at those layers + + +// address in Red Lion, PA -- find results at layer=address +// neighbourhood_id in [85844063, 85844067] +// locality_id in [101717221] +// localadmin_id in [404487867] +// search all of the above + +// address in Chelsea +// neighbourhood_id in [85786511, 85810589, 85769021, 85890029, 85810579, 85810591, 85810575, 85772883, 420514219] +// locality_id in [85950359, 85914491, 101932747, 85951865, 101715289, 85943049, 101733697, 101722101, 101738587] +// localadmin_id in [404476575, 404508239, 404474971, 404527169, 404494675, 404503811, 404519887, 404488679, 404538119] + +// address in Manhattan +// neighbourhood_id in [] +// borough_id in [421205771] +// locality_id in [85945171, 85940551, 85972655] +// localadmin_id in [404502889, 404499147, 404502891, 85972655] +// search all of the above + +// address in Luxembourg +// country_id in [85633275] +// region_id in [85681727, 85673875] +// locality_id in [101751765] +// search locality first, then region perhaps + + +// if there are locality/localadmin layers, return ['locality', 'localadmin'] +// if there are region/macroregion layers, return ['region', 'macroregion'] + +const granularity_bands = [ + ['neighbourhood', 'borough', 'locality', 'localadmin'], + ['county', 'macrocounty'], + ['region', 'macroregion'], + ['dependency', 'country'] +]; + +function anyResultsAtGranularityBand(results, band) { + return results.some((result) => { return _.includes(band, result.layer); }); +} + +function getIdsAtLayer(results, layer) { + return results.filter((result) => { return result.layer === layer; }).map(_.property('source_id')); +} + +/** + map request variables to query variables for all inputs + provided by this HTTP request. +**/ +function generateQuery( clean, res ){ + const vs = new peliasQuery.Vars( defaults ); + const results = _.defaultTo(res.data, []); + + const logParts = ['query:address_search_using_ids', 'parser:libpostal']; + + // sources + if( !_.isEmpty(clean.sources) ) { + vs.var( 'sources', clean.sources); + logParts.push('param:sources'); + } + + // size + if( clean.querySize ) { + vs.var( 'size', clean.querySize ); + logParts.push('param:querySize'); + } + + if( ! _.isEmpty(clean.parsed_text.number) ){ + vs.var( 'input:housenumber', clean.parsed_text.number ); + } + vs.var( 'input:street', clean.parsed_text.street ); + + const granularity_band = granularity_bands.find((band) => { + return anyResultsAtGranularityBand(results, band); + }); + + if (granularity_band) { + const layers_to_ids = granularity_band.reduce((acc, layer) => { + acc[layer] = getIdsAtLayer(res.data, layer); + return acc; + }, {}); + + vs.var('input:layers', JSON.stringify(layers_to_ids)); + + } + + // focus point + if( check.number(clean['focus.point.lat']) && + check.number(clean['focus.point.lon']) ){ + vs.set({ + 'focus:point:lat': clean['focus.point.lat'], + 'focus:point:lon': clean['focus.point.lon'] + }); + } + + // boundary rect + if( check.number(clean['boundary.rect.min_lat']) && + check.number(clean['boundary.rect.max_lat']) && + check.number(clean['boundary.rect.min_lon']) && + check.number(clean['boundary.rect.max_lon']) ){ + vs.set({ + 'boundary:rect:top': clean['boundary.rect.max_lat'], + 'boundary:rect:right': clean['boundary.rect.max_lon'], + 'boundary:rect:bottom': clean['boundary.rect.min_lat'], + 'boundary:rect:left': clean['boundary.rect.min_lon'] + }); + } + + // boundary circle + // @todo: change these to the correct request variable names + if( check.number(clean['boundary.circle.lat']) && + check.number(clean['boundary.circle.lon']) ){ + vs.set({ + 'boundary:circle:lat': clean['boundary.circle.lat'], + 'boundary:circle:lon': clean['boundary.circle.lon'] + }); + + if( check.number(clean['boundary.circle.radius']) ){ + vs.set({ + 'boundary:circle:radius': Math.round( clean['boundary.circle.radius'] ) + 'km' + }); + } + } + + // boundary country + if( check.string(clean['boundary.country']) ){ + vs.set({ + 'boundary:country': clean['boundary.country'] + }); + } + + // format the log parts into a single coherent string + logger.info(logParts.map((part) => { return `[${part}]`;} ).join(' ') ); + + return { + type: 'fallback_using_ids', + body: addressUsingIdsQuery.render(vs) + }; + +} + +module.exports = generateQuery; diff --git a/test/unit/query/MockQuery.js b/test/unit/query/MockQuery.js new file mode 100644 index 00000000..f97bfb27 --- /dev/null +++ b/test/unit/query/MockQuery.js @@ -0,0 +1,27 @@ +'use strict'; + +module.exports = class MockQuery { + constructor() { + this._score_functions = []; + this._filter_functions = []; + } + + render(vs) { + return { + vs: vs, + score_functions: this._score_functions, + filter_functions: this._filter_functions + }; + } + + score(view) { + this._score_functions.push(view); + return this; + } + + filter(view) { + this._filter_functions.push(view); + return this; + } + +}; diff --git a/test/unit/query/address_search_using_ids.js b/test/unit/query/address_search_using_ids.js new file mode 100644 index 00000000..f6d623e2 --- /dev/null +++ b/test/unit/query/address_search_using_ids.js @@ -0,0 +1,617 @@ +const generateQuery = require('../../../query/address_search_using_ids'); +const _ = require('lodash'); +const proxyquire = require('proxyquire').noCallThru(); +const mock_logger = require('pelias-mock-logger'); +const MockQuery = require('./MockQuery'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', (t) => { + t.ok(_.isFunction(generateQuery)); + t.end(); + }); +}; + +// helper for canned views +const views = { + popularity_only_function: 'popularity_only_function view', + population_only_function: 'population_only_function view', + boundary_country: 'boundary_country view', + boundary_circle: 'boundary_circle view', + boundary_rect: 'boundary_rect view', + sources: 'sources view' +}; + +module.exports.tests.base_query = (test, common) => { + test('basic', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + } + }; + const res = { + data: [] + }; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + }); + + const generatedQuery = generateQuery(clean, res); + + t.equals(generatedQuery.type, 'fallback_using_ids'); + + t.equals(generatedQuery.body.vs.var('input:housenumber').toString(), 'housenumber value'); + t.equals(generatedQuery.body.vs.var('input:street').toString(), 'street value'); + t.notOk(generatedQuery.body.vs.isset('sources')); + t.equals(generatedQuery.body.vs.var('size').toString(), 20); + + t.deepEquals(generatedQuery.body.score_functions, [ + 'popularity_only_function view', + 'population_only_function view' + ]); + + t.deepEquals(generatedQuery.body.filter_functions, [ + 'boundary_country view', + 'boundary_circle view', + 'boundary_rect view', + 'sources view' + ]); + + t.deepEquals(logger.getInfoMessages(), ['[query:address_search_using_ids] [parser:libpostal]']); + t.end(); + + }); +}; + +module.exports.tests.other_parameters = (test, common) => { + test('explicit size set', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + }, + querySize: 'querySize value' + }; + const res = { + data: [] + }; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + }); + + const generatedQuery = generateQuery(clean, res); + + t.equals(generatedQuery.body.vs.var('size').toString(), 'querySize value'); + t.deepEquals(logger.getInfoMessages(), ['[query:address_search_using_ids] [parser:libpostal] [param:querySize]']); + t.end(); + + }); + + test('explicit sources set', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + }, + sources: ['source 1', 'source 2'] + }; + const res = { + data: [] + }; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + }); + + const generatedQuery = generateQuery(clean, res); + + t.deepEquals(generatedQuery.body.vs.var('sources').toString(), ['source 1', 'source 2']); + t.deepEquals(logger.getInfoMessages(), ['[query:address_search_using_ids] [parser:libpostal] [param:sources]']); + t.end(); + + }); + +}; + +module.exports.tests.granularity_bands = (test, common) => { + test('neighbourhood/borough/locality/localadmin granularity band', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + } + }; + const res = { + data: [ + { + layer: 'neighbourhood', + source_id: 1 + }, + { + layer: 'borough', + source_id: 2 + }, + { + layer: 'locality', + source_id: 3 + }, + { + layer: 'localadmin', + source_id: 4 + }, + { + layer: 'county', + source_id: 5 + }, + { + layer: 'neighbourhood', + source_id: 6 + }, + { + layer: 'borough', + source_id: 7 + }, + { + layer: 'locality', + source_id: 8 + }, + { + layer: 'localadmin', + source_id: 9 + } + ] + }; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + + }); + + const generatedQuery = generateQuery(clean, res); + + t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { + neighbourhood: [1, 6], + borough: [2, 7], + locality: [3, 8], + localadmin: [4, 9] + }); + + t.end(); + }); + + test('only band members with ids should be passed', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + } + }; + const res = { + data: [ + { + layer: 'neighbourhood', + source_id: 1 + } + ] + }; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + }); + + const generatedQuery = generateQuery(clean, res); + + t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { + neighbourhood: [1], + borough: [], + locality: [], + localadmin: [] + }); + + t.end(); + }); + + test('county/macrocounty granularity band', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + } + }; + const res = { + data: [ + { + layer: 'county', + source_id: 1 + }, + { + layer: 'macrocounty', + source_id: 2 + }, + { + layer: 'region', + source_id: 3 + }, + { + layer: 'county', + source_id: 4 + }, + { + layer: 'macrocounty', + source_id: 5 + } + ] + }; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + + }); + + const generatedQuery = generateQuery(clean, res); + + t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { + county: [1, 4], + macrocounty: [2, 5] + }); + + t.end(); + }); + + test('region/macroregion granularity band', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + } + }; + const res = { + data: [ + { + layer: 'region', + source_id: 1 + }, + { + layer: 'macroregion', + source_id: 2 + }, + { + layer: 'country', + source_id: 3 + }, + { + layer: 'region', + source_id: 4 + }, + { + layer: 'macroregion', + source_id: 5 + } + ] + }; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + + }); + + const generatedQuery = generateQuery(clean, res); + + t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { + region: [1, 4], + macroregion: [2, 5] + }); + + t.end(); + + }); + + test('dependency/country granularity band', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + } + }; + const res = { + data: [ + { + layer: 'dependency', + source_id: 1 + }, + { + layer: 'country', + source_id: 2 + }, + { + layer: 'dependency', + source_id: 3 + }, + { + layer: 'country', + source_id: 4 + } + ] + }; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + + }); + + const generatedQuery = generateQuery(clean, res); + + t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { + dependency: [1, 3], + country: [2, 4] + }); + + t.end(); + + }); + +}; + +module.exports.tests.boundary_filters = (test, common) => { + test('boundary.country available should add to query', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + }, + 'boundary.country': 'boundary.country value' + }; + const res = {}; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + + }); + + const generatedQuery = generateQuery(clean, res); + + t.equals(generatedQuery.body.vs.var('boundary:country').toString(), 'boundary.country value'); + + t.end(); + + }); + + test('focus.point.lat/lon w/both numbers should add to query', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + }, + 'focus.point.lat': 12.121212, + 'focus.point.lon': 21.212121 + }; + const res = {}; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + + }); + + const generatedQuery = generateQuery(clean, res); + + t.equals(generatedQuery.body.vs.var('focus:point:lat').toString(), 12.121212); + t.equals(generatedQuery.body.vs.var('focus:point:lon').toString(), 21.212121); + + t.end(); + + }); + + test('boundary.rect with all numbers should add to query', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + }, + 'boundary.rect.min_lat': 12.121212, + 'boundary.rect.max_lat': 13.131313, + 'boundary.rect.min_lon': 21.212121, + 'boundary.rect.max_lon': 31.313131 + }; + const res = {}; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + + }); + + const generatedQuery = generateQuery(clean, res); + + t.equals(generatedQuery.body.vs.var('boundary:rect:top').toString(), 13.131313); + t.equals(generatedQuery.body.vs.var('boundary:rect:right').toString(), 31.313131); + t.equals(generatedQuery.body.vs.var('boundary:rect:bottom').toString(), 12.121212); + t.equals(generatedQuery.body.vs.var('boundary:rect:left').toString(), 21.212121); + + t.end(); + + }); + + test('boundary circle without radius should set radius to default', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + }, + 'boundary.circle.lat': 12.121212, + 'boundary.circle.lon': 21.212121 + }; + const res = {}; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + + }); + + const generatedQuery = generateQuery(clean, res); + + t.equals(generatedQuery.body.vs.var('boundary:circle:lat').toString(), 12.121212); + t.equals(generatedQuery.body.vs.var('boundary:circle:lon').toString(), 21.212121); + t.equals(generatedQuery.body.vs.var('boundary:circle:radius').toString(), '50km'); + + t.end(); + + }); + + test('boundary circle with radius set radius to that value rounded', (t) => { + const logger = mock_logger(); + + const clean = { + parsed_text: { + number: 'housenumber value', + street: 'street value' + }, + 'boundary.circle.lat': 12.121212, + 'boundary.circle.lon': 21.212121, + 'boundary.circle.radius': 17.6 + }; + const res = {}; + + const generateQuery = proxyquire('../../../query/address_search_using_ids', { + 'pelias-logger': logger, + 'pelias-query': { + layout: { + AddressesUsingIdsQuery: MockQuery + }, + view: views, + Vars: require('pelias-query').Vars + } + + }); + + const generatedQuery = generateQuery(clean, res); + + t.equals(generatedQuery.body.vs.var('boundary:circle:lat').toString(), 12.121212); + t.equals(generatedQuery.body.vs.var('boundary:circle:lon').toString(), 21.212121); + t.equals(generatedQuery.body.vs.var('boundary:circle:radius').toString(), '18km'); + + t.end(); + + }); + +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`address_search_using_ids query ${name}`, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; From 147d76f9d7aeb29b1cf1216dc915a3fc704eb2b2 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Tue, 27 Jun 2017 16:14:09 -0400 Subject: [PATCH 05/52] added controller for searching with ids --- controller/search_with_ids.js | 119 +++++ test/unit/controller/search_with_ids.js | 569 ++++++++++++++++++++++++ test/unit/run.js | 2 + 3 files changed, 690 insertions(+) create mode 100644 controller/search_with_ids.js create mode 100644 test/unit/controller/search_with_ids.js diff --git a/controller/search_with_ids.js b/controller/search_with_ids.js new file mode 100644 index 00000000..9f6e2f6f --- /dev/null +++ b/controller/search_with_ids.js @@ -0,0 +1,119 @@ +'use strict'; + +const _ = require('lodash'); + +const searchService = require('../service/search'); +const logger = require('pelias-logger').get('api'); +const logging = require( '../helper/logging' ); +const retry = require('retry'); + +function isRequestTimeout(err) { + return _.get(err, 'status') === 408; +} + +function setup( apiConfig, esclient, query, should_execute ){ + function controller( req, res, next ){ + if (!should_execute(req, res)) { + return next(); + } + + let cleanOutput = _.cloneDeep(req.clean); + if (logging.isDNT(req)) { + cleanOutput = logging.removeFields(cleanOutput); + } + // log clean parameters for stats + logger.info('[req]', 'endpoint=' + req.path, cleanOutput); + + const renderedQuery = query(req.clean, res); + // console.log(JSON.stringify(renderedQuery.body, null, 2)); + + // if there's no query to call ES with, skip the service + if (_.isUndefined(renderedQuery)) { + return next(); + } + + // options for retry + // maxRetries is from the API config with default of 3 + // factor of 1 means that each retry attempt will esclient requestTimeout + const operationOptions = { + retries: _.get(apiConfig, 'requestRetries', 3), + factor: 1, + minTimeout: _.get(esclient, 'transport.requestTimeout') + }; + + // setup a new operation + const operation = retry.operation(operationOptions); + + // elasticsearch command + const cmd = { + index: apiConfig.indexName, + searchType: 'dfs_query_then_fetch', + body: renderedQuery.body + }; + + logger.debug( '[ES req]', cmd ); + + operation.attempt((currentAttempt) => { + // query elasticsearch + searchService( esclient, cmd, function( err, docs, meta ){ + // returns true if the operation should be attempted again + // (handles bookkeeping of maxRetries) + // only consider for status 408 (request timeout) + if (isRequestTimeout(err) && operation.retry(err)) { + logger.info(`request timed out on attempt ${currentAttempt}, retrying`); + return; + } + + // if execution has gotten this far then one of three things happened: + // - the request didn't time out + // - maxRetries has been hit so we're giving up + // - another error occurred + // in either case, handle the error or results + + // error handler + if( err ){ + if (_.isObject(err) && err.message) { + req.errors.push( err.message ); + } else { + req.errors.push( err ); + } + } + else { + // log that a retry was successful + // most requests succeed on first attempt so this declutters log files + if (currentAttempt > 1) { + logger.info(`succeeded on retry ${currentAttempt-1}`); + } + + // because this is used in response to placeholder, there may already + // be results. if there are no results from this ES call, don't overwrite + // what's already there from placeholder. + if (!_.isEmpty(docs)) { + res.data = docs; + res.meta = meta || {}; + // store the query_type for subsequent middleware + res.meta.query_type = renderedQuery.type; + + const messageParts = [ + '[controller:search]', + `[queryType:${renderedQuery.type}]`, + `[es_result_count:${docs.length}]` + ]; + + logger.info(messageParts.join(' ')); + + } + + } + logger.debug('[ES response]', docs); + next(); + }); + + }); + + } + + return controller; +} + +module.exports = setup; diff --git a/test/unit/controller/search_with_ids.js b/test/unit/controller/search_with_ids.js new file mode 100644 index 00000000..153989b1 --- /dev/null +++ b/test/unit/controller/search_with_ids.js @@ -0,0 +1,569 @@ +'use strict'; + +const setup = require('../../../controller/search_with_ids'); +const proxyquire = require('proxyquire').noCallThru(); +const mocklogger = require('pelias-mock-logger'); +const _ = require('lodash'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', (t) => { + t.ok(_.isFunction(setup), 'setup is a function'); + t.ok(_.isFunction(setup()), 'setup returns a controller'); + t.end(); + }); +}; + +module.exports.tests.success = (test, common) => { + test('successful request to search service should replace data and meta', (t) => { + t.plan(5); + + const logger = mocklogger(); + + const config = { + indexName: 'indexName value' + }; + const esclient = 'this is the esclient'; + const query = () => ({ + body: 'this is the query body', + type: 'this is the query type' + }); + + // a controller that validates the esclient and cmd that was passed to the search service + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': (esclient, cmd, callback) => { + t.equal(esclient, 'this is the esclient'); + t.deepEqual(cmd, { + index: 'indexName value', + searchType: 'dfs_query_then_fetch', + body: 'this is the query body' + }); + + const docs = [ + { name: 'replacement result #1'}, + { name: 'replacement result #2'} + ]; + const meta = { key: 'replacement meta value' }; + + callback(undefined, docs, meta); + }, + 'pelias-logger': logger + })(config, esclient, query, () => true ); + + const req = { clean: { }, errors: [], warnings: [] }; + const res = { + data: [ + { name: 'original result #1'}, + { name: 'original result #2'} + ], + meta: { + key: 'original meta value' + } + }; + + const next = () => { + t.deepEqual(req, { + clean: {}, + errors: [], + warnings: [] + }); + t.deepEquals(res, { + data: [ + { name: 'replacement result #1'}, + { name: 'replacement result #2'} + ], + meta: { + key: 'replacement meta value', + query_type: 'this is the query type' + } + }); + + t.ok(logger.isInfoMessage('[controller:search] [queryType:this is the query type] [es_result_count:2]')); + + t.end(); + }; + + controller(req, res, next); + + }); + + test('undefined meta should set empty object into res', (t) => { + const logger = mocklogger(); + + const config = { + indexName: 'indexName value' + }; + const esclient = 'this is the esclient'; + const query = () => ({ + body: 'this is the query body', + type: 'this is the query type' + }); + + // a controller that validates the esclient and cmd that was passed to the search service + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': (esclient, cmd, callback) => { + const docs = [ + { name: 'replacement result #1'}, + { name: 'replacement result #2'} + ]; + + callback(undefined, docs, undefined); + }, + 'pelias-logger': logger + })(config, esclient, query, () => true ); + + const req = { clean: { }, errors: [], warnings: [] }; + const res = { + data: [ + { name: 'original result #1'}, + { name: 'original result #2'} + ], + meta: { + key: 'original meta value' + } + }; + + const next = () => { + t.deepEqual(req, { + clean: {}, + errors: [], + warnings: [] + }); + t.deepEquals(res, { + data: [ + { name: 'replacement result #1'}, + { name: 'replacement result #2'} + ], + meta: { + query_type: 'this is the query type' + } + }); + + t.end(); + }; + + controller(req, res, next); + + }); + + test('undefined docs in response should not overwrite existing results', (t) => { + t.plan(1+3); // ensures that search service was called, then req+res+logger tests + + const logger = mocklogger(); + + const config = { + indexName: 'indexName value' + }; + const esclient = 'this is the esclient'; + const query = () => ({ + body: 'this is the query body', + type: 'this is the query type' + }); + + // a controller that validates the esclient and cmd that was passed to the search service + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': (esclient, cmd, callback) => { + t.pass('search service was called'); + + const meta = { key: 'new value' }; + + callback(undefined, undefined, meta); + }, + 'pelias-logger': logger + })(config, esclient, query, () => true ); + + const req = { clean: { }, errors: [], warnings: [] }; + const res = { + data: [ + { id: 1 }, + { id: 2 } + ], + meta: { + key: 'value' + } + }; + + const next = () => { + t.deepEqual(req, { + clean: {}, + errors: [], + warnings: [] + }); + t.deepEquals(res, { + data: [ + { id: 1 }, + { id: 2 } + ], + meta: { key: 'value' } + }); + + t.notOk(logger.isInfoMessage(/[controller:search] [queryType:this is the query type] [es_result_count:0]/)); + + t.end(); + }; + + controller(req, res, next); + + }); + + test('empty docs in response should not overwrite existing results', (t) => { + t.plan(4); + + const logger = mocklogger(); + + const config = { + indexName: 'indexName value' + }; + const esclient = 'this is the esclient'; + const query = () => ({ + body: 'this is the query body', + type: 'this is the query type' + }); + + // a controller that validates the esclient and cmd that was passed to the search service + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': (esclient, cmd, callback) => { + t.pass('search service was called'); + + const meta = { key: 'value' }; + + callback(undefined, [], meta); + } + })(config, esclient, query, () => true ); + + const req = { clean: { }, errors: [], warnings: [] }; + const res = { + data: [ + { name: 'pre-existing result #1' }, + { name: 'pre-existing result #2' } + ], + meta: { + key: 'value' + } + }; + + const next = () => { + t.deepEqual(req, { + clean: {}, + errors: [], + warnings: [] + }); + t.deepEquals(res, { + data: [ + { name: 'pre-existing result #1' }, + { name: 'pre-existing result #2' } + ], + meta: { key: 'value' } + }); + + t.notOk(logger.isInfoMessage(/[controller:search] [queryType:this is the query type] [es_result_count:0]/)); + + t.end(); + }; + + controller(req, res, next); + + }); + + test('successful request on retry to search service should log info message', (t) => { + t.plan(3+2+2); // 3 search service calls, 2 log messages, 1 req, 1 res + + const logger = mocklogger(); + + const config = { + indexName: 'indexName value' + }; + const esclient = 'this is the esclient'; + const query = () => ({ + body: 'this is the query body', + type: 'this is the query type' + }); + + let searchServiceCallCount = 0; + + const timeoutError = { + status: 408, + displayName: 'RequestTimeout', + message: 'Request Timeout after 17ms' + }; + + // a controller that validates the esclient and cmd that was passed to the search service + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': (esclient, cmd, callback) => { + t.pass('search service was called'); + + if (searchServiceCallCount < 2) { + // note that the searchService got called + searchServiceCallCount++; + callback(timeoutError); + } else { + const docs = [ + { name: 'replacement result #1'}, + { name: 'replacement result #2'} + ]; + const meta = { key: 'replacement meta value' }; + + callback(undefined, docs, meta); + } + + }, + 'pelias-logger': logger + })(config, esclient, query, () => true ); + + const req = { clean: { }, errors: [], warnings: [] }; + const res = { + data: [ + { name: 'original result #1'}, + { name: 'original result #2'} + ], + meta: { + key: 'original meta value' + } + }; + + const next = () => { + t.deepEqual(req, { + clean: {}, + errors: [], + warnings: [] + }); + t.deepEquals(res, { + data: [ + { name: 'replacement result #1'}, + { name: 'replacement result #2'} + ], + meta: { + key: 'replacement meta value', + query_type: 'this is the query type' + } + }); + + t.ok(logger.isInfoMessage('[controller:search] [queryType:this is the query type] [es_result_count:2]')); + t.ok(logger.isInfoMessage('succeeded on retry 2')); + + t.end(); + }; + + controller(req, res, next); + + }); + +}; + +module.exports.tests.service_errors = (test, common) => { + test('default # of request timeout retries should be 3', (t) => { + // test for 1 initial search service, 3 retries, 1 log messages, 1 req, and 1 res + t.plan(1 + 3 + 1 + 2); + + const logger = mocklogger(); + + const config = { + indexName: 'indexName value' + }; + const esclient = 'this is the esclient'; + const query = () => ({ + body: 'this is the query body', + }); + + const timeoutError = { + status: 408, + displayName: 'RequestTimeout', + message: 'Request Timeout after 17ms' + }; + + // a controller that validates that the search service was called + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': (esclient, cmd, callback) => { + // note that the searchService got called + t.pass('search service was called'); + + callback(timeoutError); + }, + 'pelias-logger': logger + })(config, esclient, query, () => true ); + + const req = { clean: { }, errors: [], warnings: [] }; + const res = {}; + + const next = () => { + t.ok(logger.getInfoMessages(), [ + 'request timed out on attempt 1, retrying', + 'request timed out on attempt 2, retrying', + 'request timed out on attempt 3, retrying' + ]); + + t.deepEqual(req, { + clean: {}, + errors: [timeoutError.message], + warnings: [] + }); + t.deepEqual(res, {}); + t.end(); + }; + + controller(req, res, next); + + }); + + test('explicit apiConfig.requestRetries should retry that many times', (t) => { + t.plan(1 + 17); // test for initial search service call and 17 retries + + const config = { + indexName: 'indexName value', + requestRetries: 17 + }; + const esclient = 'this is the esclient'; + const query = () => ({ }); + + const timeoutError = { + status: 408, + displayName: 'RequestTimeout', + message: 'Request Timeout after 17ms' + }; + + // a controller that validates that the search service was called + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': (esclient, cmd, callback) => { + // note that the searchService got called + t.pass('search service was called'); + + callback(timeoutError); + } + })(config, esclient, query, () => true ); + + const req = { clean: { }, errors: [], warnings: [] }; + const res = {}; + + controller(req, res, () => t.end() ); + + }); + + test('only status code 408 should be considered a retryable request', (t) => { + t.plan(2); + + const config = { + indexName: 'indexName value', + requestRetries: 17 + }; + const esclient = 'this is the esclient'; + const query = () => ({ }); + + const nonTimeoutError = { + status: 500, + displayName: 'InternalServerError', + message: 'an internal server error occurred' + }; + + // a controller that validates that the search service was called + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': (esclient, cmd, callback) => { + // note that the searchService got called + t.pass('search service was called'); + + callback(nonTimeoutError); + } + })(config, esclient, query, () => true ); + + const req = { clean: { }, errors: [], warnings: [] }; + const res = {}; + + const next = () => { + t.deepEqual(req, { + clean: {}, + errors: [nonTimeoutError.message], + warnings: [] + }); + t.end(); + }; + + controller(req, res, next); + + }); + + test('string error should not retry and be logged as-is', (t) => { + t.plan(2); // service call + error is in req.errors + + const config = { + indexName: 'indexName value' + }; + const esclient = 'this is the esclient'; + const query = () => ({ }); + + // a controller that validates that the search service was called + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': (esclient, cmd, callback) => { + // note that the searchService got called + t.pass('search service was called'); + + callback('this is an error string'); + } + })(config, esclient, query, () => true ); + + const req = { clean: { }, errors: [], warnings: [] }; + const res = {}; + + const next = () => { + t.deepEqual(req, { + clean: {}, + errors: ['this is an error string'], + warnings: [] + }); + t.end(); + }; + + controller(req, res, next); + + }); + +}; + +module.exports.tests.should_execute = (test, common) => { + test('should_execute returning false and empty req.errors should call next', (t) => { + const esclient = () => t.fail('esclient should not have been called'); + const query = () => t.fail('query should not have been called'); + const should_execute = () => false; + const controller = setup( {}, esclient, query, should_execute ); + + const req = { }; + const res = { }; + + const next = () => { + t.deepEqual(res, { }); + t.end(); + }; + controller(req, res, next); + + }); + +}; + +module.exports.tests.undefined_query = (test, common) => { + test('query returning undefined should not call service', (t) => { + t.plan(0, 'test will fail if search service actually gets called'); + + // a function that returns undefined + const query = () => undefined; + + const controller = proxyquire('../../../controller/search_with_ids', { + '../service/search': () => { + t.fail('search service should not have been called'); + } + })(undefined, undefined, query, () => true ); + + const next = () => t.end(); + + controller({}, {}, next); + + }); +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`GET /search ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index 8447553b..70a2a449 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -16,6 +16,7 @@ var tests = [ require('./controller/place'), require('./controller/placeholder'), require('./controller/search'), + require('./controller/search_with_ids'), require('./controller/predicates/has_parsed_text_property'), require('./controller/predicates/has_response_data'), require('./controller/predicates/has_results_at_layers'), @@ -46,6 +47,7 @@ var tests = [ require('./middleware/trimByGranularity'), require('./middleware/trimByGranularityStructured'), require('./middleware/requestLanguage'), + require('./query/address_search_using_ids'), require('./query/autocomplete'), require('./query/autocomplete_defaults'), require('./query/search_defaults'), From cc9fda0d59c0e00962fa3637fd4532442cebd0be Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Tue, 27 Jun 2017 16:15:12 -0400 Subject: [PATCH 06/52] added searching with ids to /search endpoint --- routes/v1.js | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/routes/v1.js b/routes/v1.js index 51d5ac7f..a78edcea 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -31,6 +31,7 @@ var controllers = { place: require('../controller/place'), placeholder: require('../controller/placeholder'), search: require('../controller/search'), + search_with_ids: require('../controller/search_with_ids'), status: require('../controller/status') }; @@ -39,7 +40,8 @@ var queries = { fallback_to_old_prod: require('../query/search_original'), structured_geocoding: require('../query/structured_geocoding'), reverse: require('../query/reverse'), - autocomplete: require('../query/autocomplete') + autocomplete: require('../query/autocomplete'), + address_using_ids: require('../query/address_search_using_ids') }; /** ----------------------- controllers ----------------------- **/ @@ -114,6 +116,12 @@ function addRoutes(app, peliasConfig) { ) ); + const searchWithIdsShouldExecute = all( + not(hasRequestErrors), + not(hasParsedTextProperty('venue')), + hasParsedTextProperty('street') + ); + // execute under the following conditions: // - there are no errors or data // - request is not coarse OR pip service is disabled @@ -141,6 +149,7 @@ function addRoutes(app, peliasConfig) { middleware.requestLanguage, middleware.calcSize(), controllers.placeholder(placeholderService, placeholderShouldExecute), + controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute), // 3rd parameter is which query module to use, use fallback/geodisambiguation // first, then use original search strategy if first query didn't return anything controllers.search(peliasConfig.api, esclient, queries.libpostal, not(hasResponseDataOrRequestErrors)), From 568a258406dfbfe5fc584ee8615962180865383d Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 29 Jun 2017 11:17:14 -0400 Subject: [PATCH 07/52] code cleanup, shorter function syntax --- controller/search.js | 1 + controller/search_with_ids.js | 15 +++++---------- query/address_search_using_ids.js | 12 ++++++------ 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/controller/search.js b/controller/search.js index 989060a8..ad41b386 100644 --- a/controller/search.js +++ b/controller/search.js @@ -25,6 +25,7 @@ function setup( apiConfig, esclient, query, should_execute ){ logger.info('[req]', 'endpoint=' + req.path, cleanOutput); const renderedQuery = query(req.clean); + // console.log(`BODY: ` + JSON.stringify(renderedQuery.body, null, 2)); // if there's no query to call ES with, skip the service if (_.isUndefined(renderedQuery)) { diff --git a/controller/search_with_ids.js b/controller/search_with_ids.js index 9f6e2f6f..fb421d6f 100644 --- a/controller/search_with_ids.js +++ b/controller/search_with_ids.js @@ -1,5 +1,3 @@ -'use strict'; - const _ = require('lodash'); const searchService = require('../service/search'); @@ -17,12 +15,12 @@ function setup( apiConfig, esclient, query, should_execute ){ return next(); } - let cleanOutput = _.cloneDeep(req.clean); + const cleanOutput = _.cloneDeep(req.clean); if (logging.isDNT(req)) { - cleanOutput = logging.removeFields(cleanOutput); + logging.removeFields(cleanOutput); } // log clean parameters for stats - logger.info('[req]', 'endpoint=' + req.path, cleanOutput); + logger.info('[req]', `endpoint=${req.path}`, cleanOutput); const renderedQuery = query(req.clean, res); // console.log(JSON.stringify(renderedQuery.body, null, 2)); @@ -72,11 +70,8 @@ function setup( apiConfig, esclient, query, should_execute ){ // error handler if( err ){ - if (_.isObject(err) && err.message) { - req.errors.push( err.message ); - } else { - req.errors.push( err ); - } + // push err.message or err onto req.errors + req.errors.push( _.get(err, 'message', err)); } else { // log that a retry was successful diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index 432f38bc..2f14a043 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -73,11 +73,11 @@ const granularity_bands = [ ]; function anyResultsAtGranularityBand(results, band) { - return results.some((result) => { return _.includes(band, result.layer); }); + return results.some(result => _.includes(band, result.layer)); } function getIdsAtLayer(results, layer) { - return results.filter((result) => { return result.layer === layer; }).map(_.property('source_id')); + return results.filter(result => result.layer === layer).map(_.property('source_id')); } /** @@ -107,9 +107,7 @@ function generateQuery( clean, res ){ } vs.var( 'input:street', clean.parsed_text.street ); - const granularity_band = granularity_bands.find((band) => { - return anyResultsAtGranularityBand(results, band); - }); + const granularity_band = granularity_bands.find(band => anyResultsAtGranularityBand(results, band)); if (granularity_band) { const layers_to_ids = granularity_band.reduce((acc, layer) => { @@ -117,6 +115,8 @@ function generateQuery( clean, res ){ return acc; }, {}); + // use an object here instead of calling `set` since that flattens out an + // object into key/value pairs and makes identifying layers harder in query module vs.var('input:layers', JSON.stringify(layers_to_ids)); } @@ -167,7 +167,7 @@ function generateQuery( clean, res ){ } // format the log parts into a single coherent string - logger.info(logParts.map((part) => { return `[${part}]`;} ).join(' ') ); + logger.info(logParts.map(part => `[${part}]`).join(' ')); return { type: 'fallback_using_ids', From d3bd68b95a098127293e43ed613ade020c6774d7 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 29 Jun 2017 11:19:09 -0400 Subject: [PATCH 08/52] removed debug --- controller/search.js | 1 - 1 file changed, 1 deletion(-) diff --git a/controller/search.js b/controller/search.js index ad41b386..989060a8 100644 --- a/controller/search.js +++ b/controller/search.js @@ -25,7 +25,6 @@ function setup( apiConfig, esclient, query, should_execute ){ logger.info('[req]', 'endpoint=' + req.path, cleanOutput); const renderedQuery = query(req.clean); - // console.log(`BODY: ` + JSON.stringify(renderedQuery.body, null, 2)); // if there's no query to call ES with, skip the service if (_.isUndefined(renderedQuery)) { From 74419bddc10988d1b9a1ed5891796e8cc82083ee Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 29 Jun 2017 11:36:03 -0400 Subject: [PATCH 09/52] added support for focus-only function --- query/address_search_using_ids.js | 2 +- test/unit/query/address_search_using_ids.js | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index 2f14a043..9dbf1633 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -10,7 +10,7 @@ const check = require('check-types'); const addressUsingIdsQuery = new peliasQuery.layout.AddressesUsingIdsQuery(); // scoring boost -// addressUsingIdsQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); +addressUsingIdsQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); addressUsingIdsQuery.score( peliasQuery.view.popularity_only_function ); addressUsingIdsQuery.score( peliasQuery.view.population_only_function ); // -------------------------------- diff --git a/test/unit/query/address_search_using_ids.js b/test/unit/query/address_search_using_ids.js index f6d623e2..2e687dbd 100644 --- a/test/unit/query/address_search_using_ids.js +++ b/test/unit/query/address_search_using_ids.js @@ -15,6 +15,7 @@ module.exports.tests.interface = (test, common) => { // helper for canned views const views = { + focus_only_function: () => 'focus_only_function', popularity_only_function: 'popularity_only_function view', population_only_function: 'population_only_function view', boundary_country: 'boundary_country view', @@ -58,6 +59,7 @@ module.exports.tests.base_query = (test, common) => { t.equals(generatedQuery.body.vs.var('size').toString(), 20); t.deepEquals(generatedQuery.body.score_functions, [ + 'focus_only_function', 'popularity_only_function view', 'population_only_function view' ]); From ea3572539f03b7f41c31b0fa8c12e5417510f744 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 29 Jun 2017 16:15:04 -0400 Subject: [PATCH 10/52] combined non-county layers into one --- query/address_search_using_ids.js | 6 +- test/unit/query/address_search_using_ids.js | 180 +++++++------------- 2 files changed, 61 insertions(+), 125 deletions(-) diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index 9dbf1633..e3a5aea5 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -66,10 +66,8 @@ addressUsingIdsQuery.filter( peliasQuery.view.sources ); // if there are region/macroregion layers, return ['region', 'macroregion'] const granularity_bands = [ - ['neighbourhood', 'borough', 'locality', 'localadmin'], - ['county', 'macrocounty'], - ['region', 'macroregion'], - ['dependency', 'country'] + ['neighbourhood', 'borough', 'locality', 'localadmin', 'region', 'macroregion', 'dependency', 'country'], + ['county', 'macrocounty'] ]; function anyResultsAtGranularityBand(results, band) { diff --git a/test/unit/query/address_search_using_ids.js b/test/unit/query/address_search_using_ids.js index 2e687dbd..05fffae7 100644 --- a/test/unit/query/address_search_using_ids.js +++ b/test/unit/query/address_search_using_ids.js @@ -179,119 +179,64 @@ module.exports.tests.granularity_bands = (test, common) => { source_id: 5 }, { - layer: 'neighbourhood', + layer: 'macrocounty', source_id: 6 }, { - layer: 'borough', + layer: 'region', source_id: 7 }, { - layer: 'locality', + layer: 'macroregion', source_id: 8 }, { - layer: 'localadmin', + layer: 'dependency', source_id: 9 - } - ] - }; - - const generateQuery = proxyquire('../../../query/address_search_using_ids', { - 'pelias-logger': logger, - 'pelias-query': { - layout: { - AddressesUsingIdsQuery: MockQuery }, - view: views, - Vars: require('pelias-query').Vars - } - - }); - - const generatedQuery = generateQuery(clean, res); - - t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { - neighbourhood: [1, 6], - borough: [2, 7], - locality: [3, 8], - localadmin: [4, 9] - }); - - t.end(); - }); - - test('only band members with ids should be passed', (t) => { - const logger = mock_logger(); - - const clean = { - parsed_text: { - number: 'housenumber value', - street: 'street value' - } - }; - const res = { - data: [ + { + layer: 'country', + source_id: 10 + }, { layer: 'neighbourhood', - source_id: 1 - } - ] - }; - - const generateQuery = proxyquire('../../../query/address_search_using_ids', { - 'pelias-logger': logger, - 'pelias-query': { - layout: { - AddressesUsingIdsQuery: MockQuery + source_id: 11 + }, + { + layer: 'borough', + source_id: 12 + }, + { + layer: 'locality', + source_id: 13 + }, + { + layer: 'localadmin', + source_id: 14 }, - view: views, - Vars: require('pelias-query').Vars - } - }); - - const generatedQuery = generateQuery(clean, res); - - t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { - neighbourhood: [1], - borough: [], - locality: [], - localadmin: [] - }); - - t.end(); - }); - - test('county/macrocounty granularity band', (t) => { - const logger = mock_logger(); - - const clean = { - parsed_text: { - number: 'housenumber value', - street: 'street value' - } - }; - const res = { - data: [ { layer: 'county', - source_id: 1 + source_id: 15 }, { layer: 'macrocounty', - source_id: 2 + source_id: 16 }, { layer: 'region', - source_id: 3 + source_id: 17 }, { - layer: 'county', - source_id: 4 + layer: 'macroregion', + source_id: 18 }, { - layer: 'macrocounty', - source_id: 5 + layer: 'dependency', + source_id: 19 + }, + { + layer: 'country', + source_id: 20 } ] }; @@ -311,14 +256,20 @@ module.exports.tests.granularity_bands = (test, common) => { const generatedQuery = generateQuery(clean, res); t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { - county: [1, 4], - macrocounty: [2, 5] + neighbourhood: [1, 11], + borough: [2, 12], + locality: [3, 13], + localadmin: [4, 14], + region: [7, 17], + macroregion: [8, 18], + dependency: [9, 19], + country: [10, 20] }); t.end(); }); - test('region/macroregion granularity band', (t) => { + test('only band members with ids should be passed', (t) => { const logger = mock_logger(); const clean = { @@ -330,24 +281,8 @@ module.exports.tests.granularity_bands = (test, common) => { const res = { data: [ { - layer: 'region', + layer: 'neighbourhood', source_id: 1 - }, - { - layer: 'macroregion', - source_id: 2 - }, - { - layer: 'country', - source_id: 3 - }, - { - layer: 'region', - source_id: 4 - }, - { - layer: 'macroregion', - source_id: 5 } ] }; @@ -361,21 +296,25 @@ module.exports.tests.granularity_bands = (test, common) => { view: views, Vars: require('pelias-query').Vars } - }); const generatedQuery = generateQuery(clean, res); t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { - region: [1, 4], - macroregion: [2, 5] + neighbourhood: [1], + borough: [], + locality: [], + localadmin: [], + region: [], + macroregion: [], + dependency: [], + country: [] }); t.end(); - }); - test('dependency/country granularity band', (t) => { + test('county/macrocounty granularity band', (t) => { const logger = mock_logger(); const clean = { @@ -387,20 +326,20 @@ module.exports.tests.granularity_bands = (test, common) => { const res = { data: [ { - layer: 'dependency', + layer: 'county', source_id: 1 }, { - layer: 'country', + layer: 'macrocounty', source_id: 2 }, { - layer: 'dependency', - source_id: 3 + layer: 'county', + source_id: 4 }, { - layer: 'country', - source_id: 4 + layer: 'macrocounty', + source_id: 5 } ] }; @@ -420,12 +359,11 @@ module.exports.tests.granularity_bands = (test, common) => { const generatedQuery = generateQuery(clean, res); t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { - dependency: [1, 3], - country: [2, 4] + county: [1, 4], + macrocounty: [2, 5] }); t.end(); - }); }; From 98b6829e316072c3907c84c00d7bea237b84ce90 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Fri, 30 Jun 2017 14:20:03 -0400 Subject: [PATCH 11/52] exclude venue, address, and street from layers filter --- controller/placeholder.js | 20 ++-- test/unit/controller/placeholder.js | 136 ++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 12 deletions(-) diff --git a/controller/placeholder.js b/controller/placeholder.js index 27843403..0d00b1b8 100644 --- a/controller/placeholder.js +++ b/controller/placeholder.js @@ -59,16 +59,16 @@ function hasName(result) { } // filter that passes only results that match on requested layers -// passes everything if req.clean.layers is not found function getLayersFilter(clean) { - if (_.isEmpty(_.get(clean, 'layers', []))) { - return _.constant(true); + // passes everything if: + // - req.clean.layers is empty + // - req.clean.parsed_text.street is available + if (_.isEmpty(_.get(clean, 'layers', [])) || _.has(clean, ['parsed_text', 'street'])) { + return () => true; } // otherwise return a function that checks for set inclusion of a result placetype - return (result) => { - return _.includes(clean.layers, result.placetype); - }; + return (result) => _.includes(clean.layers, result.placetype); } @@ -222,12 +222,8 @@ function setup(placeholderService, should_execute) { placeholderService(req, (err, results) => { if (err) { - // bubble up an error if one occurred - if (_.isObject(err) && err.message) { - req.errors.push( err.message ); - } else { - req.errors.push( err ); - } + // push err.message or err onto req.errors + req.errors.push( _.get(err, 'message', err)); } else { const boundaryCountry = _.get(req, ['clean', 'boundary.country']); diff --git a/test/unit/controller/placeholder.js b/test/unit/controller/placeholder.js index 74118c8f..ef925de9 100644 --- a/test/unit/controller/placeholder.js +++ b/test/unit/controller/placeholder.js @@ -1143,6 +1143,142 @@ module.exports.tests.result_filtering = (test, common) => { }); + test('if req.clean.parsed_text contains street, don\'t filter on anything', (t) => { + const logger = mock_logger(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { + param1: 'param1 value', + clean: { + layers: ['neighbourhood'], + parsed_text: { + street: 'street value' + } + } + }); + + const response = [ + { + id: 1, + name: 'name 1', + placetype: 'neighbourhood', + lineage: [ {} ], + geom: { + area: 1, + lat: 14.141414, + lon: 41.414141 + } + }, + { + id: 2, + name: 'name 2', + placetype: 'borough', + lineage: [ {} ], + geom: { + area: 2, + lat: 15.151515, + lon: 51.515151 + } + }, + { + id: 3, + name: 'name 3', + placetype: 'locality', + lineage: [ {} ], + geom: { + area: 3, + lat: 16.161616, + lon: 61.616161 + } + } + ]; + + callback(null, response); + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, _.constant(true)); + + const req = { + param1: 'param1 value', + clean: { + layers: ['neighbourhood'], + parsed_text: { + street: 'street value' + } + } + }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: {}, + data: [ + { + _id: '1', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '1', + center_point: { + lat: 14.141414, + lon: 41.414141 + }, + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + }, + parent: { } + }, + { + _id: '2', + _type: 'borough', + layer: 'borough', + source: 'whosonfirst', + source_id: '2', + center_point: { + lat: 15.151515, + lon: 51.515151 + }, + name: { + 'default': 'name 2' + }, + phrase: { + 'default': 'name 2' + }, + parent: { } + }, + { + _id: '3', + _type: 'locality', + layer: 'locality', + source: 'whosonfirst', + source_id: '3', + center_point: { + lat: 16.161616, + lon: 61.616161 + }, + name: { + 'default': 'name 3' + }, + phrase: { + 'default': 'name 3' + }, + parent: { } + } + ] + }; + + t.deepEquals(res, expected_res); + t.ok(logger.isInfoMessage('[controller:placeholder] [result_count:3]')); + t.end(); + }); + + }); + test('only synthesized docs matching explicit boundary.country should be returned', (t) => { const logger = require('pelias-mock-logger')(); From eb4a89627233c4073b051b911ea6a5c9726cdb93 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Fri, 30 Jun 2017 14:21:53 -0400 Subject: [PATCH 12/52] removed debug --- controller/search_with_ids.js | 1 - 1 file changed, 1 deletion(-) diff --git a/controller/search_with_ids.js b/controller/search_with_ids.js index fb421d6f..a8af0b74 100644 --- a/controller/search_with_ids.js +++ b/controller/search_with_ids.js @@ -23,7 +23,6 @@ function setup( apiConfig, esclient, query, should_execute ){ logger.info('[req]', `endpoint=${req.path}`, cleanOutput); const renderedQuery = query(req.clean, res); - // console.log(JSON.stringify(renderedQuery.body, null, 2)); // if there's no query to call ES with, skip the service if (_.isUndefined(renderedQuery)) { From cdbc0b6447ee7636ca2e012e2119256734b538e8 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Fri, 30 Jun 2017 14:28:25 -0400 Subject: [PATCH 13/52] added helpers, don't call placeholder when +number,-street --- routes/v1.js | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/routes/v1.js b/routes/v1.js index a78edcea..a8cff01e 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -79,6 +79,16 @@ const hasResultsAtLayers = require('../controller/predicates/has_results_at_laye const hasResponseDataOrRequestErrors = any(hasResponseData, hasRequestErrors); const hasAdminOnlyResults = not(hasResultsAtLayers(['venue', 'address', 'street'])); +const hasNumberButNotStreet = all( + hasParsedTextProperty('number'), + not(hasParsedTextProperty('street')) +); + +const hasQueryOrCategory = any( + hasParsedTextProperty('query'), + hasParsedTextProperty('category') +); + const serviceWrapper = require('pelias-microservice-wrapper').service; const PlaceHolder = require('../service/configurations/PlaceHolder'); const PointInPolygon = require('../service/configurations/PointInPolygon'); @@ -108,17 +118,17 @@ function addRoutes(app, peliasConfig) { const placeholderShouldExecute = all( not(hasResponseDataOrRequestErrors), isPlaceholderServiceEnabled, - not( - any( - hasParsedTextProperty('venue'), - hasParsedTextProperty('category') - ) - ) + // don't run placeholder if there's a number but no street + not(hasNumberButNotStreet), + // don't run placeholder if there's a query or category + not(hasQueryOrCategory) ); const searchWithIdsShouldExecute = all( not(hasRequestErrors), - not(hasParsedTextProperty('venue')), + // don't search-with-ids if there's a query or category + not(hasQueryOrCategory), + // there must be a street hasParsedTextProperty('street') ); From adff25658334f4215ddb37f16b7c0ac2c625738e Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Fri, 30 Jun 2017 16:12:37 -0400 Subject: [PATCH 14/52] set query_type for placeholder to enable confidencescores --- test/unit/controller/placeholder.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/unit/controller/placeholder.js b/test/unit/controller/placeholder.js index ef925de9..2334d944 100644 --- a/test/unit/controller/placeholder.js +++ b/test/unit/controller/placeholder.js @@ -1213,7 +1213,9 @@ module.exports.tests.result_filtering = (test, common) => { controller(req, res, () => { const expected_res = { - meta: {}, + meta: { + query_type: 'fallback' + }, data: [ { _id: '1', From 5bcfbe76bc259a8c256d6f673b7a017a02136725 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Fri, 30 Jun 2017 17:05:51 -0400 Subject: [PATCH 15/52] set search_using_ids query_type to 'original' --- query/address_search_using_ids.js | 2 +- test/unit/query/address_search_using_ids.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index e3a5aea5..c0653211 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -168,7 +168,7 @@ function generateQuery( clean, res ){ logger.info(logParts.map(part => `[${part}]`).join(' ')); return { - type: 'fallback_using_ids', + type: 'original', body: addressUsingIdsQuery.render(vs) }; diff --git a/test/unit/query/address_search_using_ids.js b/test/unit/query/address_search_using_ids.js index 05fffae7..af153a17 100644 --- a/test/unit/query/address_search_using_ids.js +++ b/test/unit/query/address_search_using_ids.js @@ -51,7 +51,7 @@ module.exports.tests.base_query = (test, common) => { const generatedQuery = generateQuery(clean, res); - t.equals(generatedQuery.type, 'fallback_using_ids'); + t.equals(generatedQuery.type, 'original'); t.equals(generatedQuery.body.vs.var('input:housenumber').toString(), 'housenumber value'); t.equals(generatedQuery.body.vs.var('input:street').toString(), 'street value'); From ac7f5c445c4e8cea677cff67cdbbdc67feab464e Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Fri, 30 Jun 2017 17:26:09 -0400 Subject: [PATCH 16/52] moved libpostal lower in sanitizers to after sources --- sanitizer/search.js | 2 -- test/unit/sanitizer/search.js | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/sanitizer/search.js b/sanitizer/search.js index 99baa5a7..4319e55c 100644 --- a/sanitizer/search.js +++ b/sanitizer/search.js @@ -6,8 +6,6 @@ module.exports.middleware = (_api_pelias_config) => { singleScalarParameters: require('../sanitizer/_single_scalar_parameters'), quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'), text: require('../sanitizer/_text'), - iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'), - city_name_standardizer: require('../sanitizer/_city_name_standardizer'), size: require('../sanitizer/_size')(/* use defaults*/), layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping), sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping), diff --git a/test/unit/sanitizer/search.js b/test/unit/sanitizer/search.js index 214c9cbc..bbe85713 100644 --- a/test/unit/sanitizer/search.js +++ b/test/unit/sanitizer/search.js @@ -104,9 +104,6 @@ module.exports.tests.sanitize = (test, common) => { const expected_sanitizers = [ '_single_scalar_parameters', '_deprecate_quattroshapes', - '_text', - '_iso2_to_iso3', - '_city_name_standardizer', '_size', '_targets/layers', '_targets/sources', @@ -116,6 +113,9 @@ module.exports.tests.sanitize = (test, common) => { '_geo_search', '_boundary_country', '_categories', + '_text', + '_iso2_to_iso3', + '_city_name_standardizer', '_geonames_warnings' ]; From 30cd30236a804ce706c87ff83554c39817b2f36f Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 5 Jul 2017 08:36:43 -0400 Subject: [PATCH 17/52] don't call libpostal if sources=whosonfirst --- sanitizer/_text.js | 26 ++++++++------- test/unit/sanitizer/_text.js | 61 ++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 12 deletions(-) diff --git a/sanitizer/_text.js b/sanitizer/_text.js index 874a9b17..d7d1227b 100644 --- a/sanitizer/_text.js +++ b/sanitizer/_text.js @@ -1,29 +1,31 @@ -var check = require('check-types'), - text_analyzer = require('pelias-text-analyzer'); +const check = require('check-types'); +const text_analyzer = require('pelias-text-analyzer'); +const _ = require('lodash'); // validate texts, convert types and apply defaults function sanitize( raw, clean ){ - // error & warning messages - var messages = { errors: [], warnings: [] }; + const messages = { errors: [], warnings: [] }; // invalid input 'text' // must call `!check.nonEmptyString` since `check.emptyString` returns // `false` for `undefined` and `null` if( !check.nonEmptyString( raw.text ) ){ messages.errors.push('invalid param \'text\': text length, must be >0'); - } - // valid input 'text' - else { - // valid text + } else { clean.text = raw.text; - // parse text with query parser - var parsed_text = text_analyzer.parse(clean.text); - if (check.assigned(parsed_text)) { - clean.parsed_text = parsed_text; + // only call libpostal if there are other sources besides whosonfirst + // since placeholder will take care of it later + if (!_.isEqual(clean.sources, ['whosonfirst'])) { + // parse text with query parser + const parsed_text = text_analyzer.parse(clean.text); + if (check.assigned(parsed_text)) { + clean.parsed_text = parsed_text; + } } + } return messages; diff --git a/test/unit/sanitizer/_text.js b/test/unit/sanitizer/_text.js index c29f0a1d..a8a46137 100644 --- a/test/unit/sanitizer/_text.js +++ b/test/unit/sanitizer/_text.js @@ -142,6 +142,67 @@ module.exports.tests.text_parser = function(test, common) { }); + test('sources=whosonfirst should not call text_analyzer and set clean.text from raw.text', (t) => { + const sanitizer = proxyquire('../../../sanitizer/_text', { + 'pelias-text-analyzer': { parse: query => t.fail('should not have been called') } + }); + + const raw = { + text: 'raw clean.text' + }; + const clean = { + sources: ['whosonfirst'], + text: 'original clean.text' + }; + + const expected_clean = { + sources: ['whosonfirst'], + text: 'raw clean.text' + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages, { errors: [], warnings: [] }); + t.end(); + + }); + + test('sources with whosonfirst + others should call analyzer', (t) => { + const sanitizer = proxyquire('../../../sanitizer/_text', { + 'pelias-text-analyzer': { parse: function(query) { + return { + key1: 'value 1', + key2: 'value 2' + }; + } + }}); + + const raw = { + text: 'raw text' + }; + const clean = { + sources: ['whosonfirst', 'another source'], + text: 'clean text' + }; + + const expected_clean = { + sources: ['whosonfirst', 'another source'], + text: 'raw text', + parsed_text: { + key1: 'value 1', + key2: 'value 2' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages, { errors: [], warnings: [] }); + t.end(); + + }); + }; module.exports.all = function (tape, common) { From 9ae838903a8d7eb69c8bde0b3b62f4c7228ec438 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 5 Jul 2017 10:02:05 -0400 Subject: [PATCH 18/52] converted to 'any' property for brevity --- .../has_any_parsed_text_property.js | 17 +++++++++ .../predicates/has_parsed_text_property.js | 13 ------- routes/v1.js | 17 ++++----- ...rty.js => has_any_parsed_text_property.js} | 36 ++++++++++++------- test/unit/run.js | 2 +- 5 files changed, 48 insertions(+), 37 deletions(-) create mode 100644 controller/predicates/has_any_parsed_text_property.js delete mode 100644 controller/predicates/has_parsed_text_property.js rename test/unit/controller/predicates/{has_parsed_text_property.js => has_any_parsed_text_property.js} (54%) diff --git a/controller/predicates/has_any_parsed_text_property.js b/controller/predicates/has_any_parsed_text_property.js new file mode 100644 index 00000000..8f1be8b2 --- /dev/null +++ b/controller/predicates/has_any_parsed_text_property.js @@ -0,0 +1,17 @@ +const _ = require('lodash'); + +// return true if any setup parameter is a key of request.clean.parsed_text +// "arguments" is only available in long-form function declarations, cannot be shortened to fat arrow syntax +// potential improvement: inject set operator to allow for any/all functionality +module.exports = function() { + // save off requested properties since arguments can't be referenced later + const properties = _.values(arguments); + + return (request, response) => !_.isEmpty( + _.intersection( + properties, + _.keys(_.get(request, ['clean', 'parsed_text'], {})) + ) + ); + +}; diff --git a/controller/predicates/has_parsed_text_property.js b/controller/predicates/has_parsed_text_property.js deleted file mode 100644 index f6a51371..00000000 --- a/controller/predicates/has_parsed_text_property.js +++ /dev/null @@ -1,13 +0,0 @@ -const _ = require('lodash'); - -// returns a function that returns true if any result.layer is in any of the -// supplied layers using array intersection - -// example usage: determining if the response contains only admin results - -module.exports = (property) => { - return (request, response) => { - return _.has(request, ['clean', 'parsed_text', property]); - }; - -}; diff --git a/routes/v1.js b/routes/v1.js index a8cff01e..18cc39b7 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -68,7 +68,7 @@ var postProc = { }; // predicates that drive whether controller/search runs -const hasParsedTextProperty = require('../controller/predicates/has_parsed_text_property'); +const hasAnyParsedTextProperty = require('../controller/predicates/has_any_parsed_text_property'); const hasResponseData = require('../controller/predicates/has_response_data'); const hasRequestErrors = require('../controller/predicates/has_request_errors'); const isCoarseReverse = require('../controller/predicates/is_coarse_reverse'); @@ -80,13 +80,8 @@ const hasResponseDataOrRequestErrors = any(hasResponseData, hasRequestErrors); const hasAdminOnlyResults = not(hasResultsAtLayers(['venue', 'address', 'street'])); const hasNumberButNotStreet = all( - hasParsedTextProperty('number'), - not(hasParsedTextProperty('street')) -); - -const hasQueryOrCategory = any( - hasParsedTextProperty('query'), - hasParsedTextProperty('category') + hasAnyParsedTextProperty('number'), + not(hasAnyParsedTextProperty('street')) ); const serviceWrapper = require('pelias-microservice-wrapper').service; @@ -121,15 +116,15 @@ function addRoutes(app, peliasConfig) { // don't run placeholder if there's a number but no street not(hasNumberButNotStreet), // don't run placeholder if there's a query or category - not(hasQueryOrCategory) + not(hasAnyParsedTextProperty('query', 'category')) ); const searchWithIdsShouldExecute = all( not(hasRequestErrors), // don't search-with-ids if there's a query or category - not(hasQueryOrCategory), + not(hasAnyParsedTextProperty('query', 'category')), // there must be a street - hasParsedTextProperty('street') + hasAnyParsedTextProperty('street') ); // execute under the following conditions: diff --git a/test/unit/controller/predicates/has_parsed_text_property.js b/test/unit/controller/predicates/has_any_parsed_text_property.js similarity index 54% rename from test/unit/controller/predicates/has_parsed_text_property.js rename to test/unit/controller/predicates/has_any_parsed_text_property.js index a6b06be1..4a985bbe 100644 --- a/test/unit/controller/predicates/has_parsed_text_property.js +++ b/test/unit/controller/predicates/has_any_parsed_text_property.js @@ -1,13 +1,13 @@ 'use strict'; const _ = require('lodash'); -const has_parsed_text_property = require('../../../../controller/predicates/has_parsed_text_property'); +const has_any_parsed_text_property = require('../../../../controller/predicates/has_any_parsed_text_property'); module.exports.tests = {}; module.exports.tests.interface = (test, common) => { test('valid interface', (t) => { - t.equal(typeof has_parsed_text_property, 'function', 'has_parsed_text_property is a function'); + t.ok(_.isFunction(has_any_parsed_text_property), 'has_any_parsed_text_property is a function'); t.end(); }); }; @@ -21,9 +21,23 @@ module.exports.tests.true_conditions = (test, common) => { } } }; - const res = {}; - t.ok(has_parsed_text_property('property')(req, res)); + t.ok(has_any_parsed_text_property('property')(req)); + t.end(); + + }); + + test('clean.parsed_text with any property should return true ', (t) => { + const req = { + clean: { + parsed_text: { + property2: 'value2', + property3: 'value3' + } + } + }; + + t.ok(has_any_parsed_text_property('property1', 'property3')(req)); t.end(); }); @@ -32,9 +46,7 @@ module.exports.tests.true_conditions = (test, common) => { module.exports.tests.false_conditions = (test, common) => { test('undefined request should return false', (t) => { - const req = {}; - - t.notOk(has_parsed_text_property('property')(req, undefined)); + t.notOk(has_any_parsed_text_property('property')()); t.end(); }); @@ -42,7 +54,7 @@ module.exports.tests.false_conditions = (test, common) => { test('undefined request.clean should return false', (t) => { const req = {}; - t.notOk(has_parsed_text_property('property')(req, undefined)); + t.notOk(has_any_parsed_text_property('property')(req)); t.end(); }); @@ -52,19 +64,19 @@ module.exports.tests.false_conditions = (test, common) => { clean: {} }; - t.notOk(has_parsed_text_property('property')(req, undefined)); + t.notOk(has_any_parsed_text_property('property')(req)); t.end(); }); - test('undefined request.clean.parsed_text.property should return false', (t) => { + test('request.clean.parsed_text with none of the supplied properties should return false', (t) => { const req = { clean: { parsed_text: {} } }; - t.notOk(has_parsed_text_property('property')(req, undefined)); + t.notOk(has_any_parsed_text_property('property1', 'property2')(req)); t.end(); }); @@ -73,7 +85,7 @@ module.exports.tests.false_conditions = (test, common) => { module.exports.all = (tape, common) => { function test(name, testFunction) { - return tape(`GET /has_parsed_text_property ${name}`, testFunction); + return tape(`GET /has_any_parsed_text_property ${name}`, testFunction); } for( const testCase in module.exports.tests ){ diff --git a/test/unit/run.js b/test/unit/run.js index 70a2a449..66ed53d8 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -17,7 +17,7 @@ var tests = [ require('./controller/placeholder'), require('./controller/search'), require('./controller/search_with_ids'), - require('./controller/predicates/has_parsed_text_property'), + require('./controller/predicates/has_any_parsed_text_property'), require('./controller/predicates/has_response_data'), require('./controller/predicates/has_results_at_layers'), require('./controller/predicates/has_request_errors'), From 1d5151dd49236161a427da09a48744e934f488e4 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 5 Jul 2017 13:07:49 -0400 Subject: [PATCH 19/52] updated meta.type to 'fallback' --- query/address_search_using_ids.js | 2 +- test/unit/query/address_search_using_ids.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index c0653211..323ce49a 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -168,7 +168,7 @@ function generateQuery( clean, res ){ logger.info(logParts.map(part => `[${part}]`).join(' ')); return { - type: 'original', + type: 'fallback', body: addressUsingIdsQuery.render(vs) }; diff --git a/test/unit/query/address_search_using_ids.js b/test/unit/query/address_search_using_ids.js index af153a17..7a88a091 100644 --- a/test/unit/query/address_search_using_ids.js +++ b/test/unit/query/address_search_using_ids.js @@ -51,7 +51,7 @@ module.exports.tests.base_query = (test, common) => { const generatedQuery = generateQuery(clean, res); - t.equals(generatedQuery.type, 'original'); + t.equals(generatedQuery.type, 'fallback'); t.equals(generatedQuery.body.vs.var('input:housenumber').toString(), 'housenumber value'); t.equals(generatedQuery.body.vs.var('input:street').toString(), 'street value'); From 99a1868c47a1a3190ae0effa604752c2613a4953 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 5 Jul 2017 17:42:21 -0400 Subject: [PATCH 20/52] removed population/popularity functions from search using ids --- query/address_search_using_ids.js | 2 -- test/unit/query/address_search_using_ids.js | 6 +----- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index 323ce49a..5a880890 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -11,8 +11,6 @@ const addressUsingIdsQuery = new peliasQuery.layout.AddressesUsingIdsQuery(); // scoring boost addressUsingIdsQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); -addressUsingIdsQuery.score( peliasQuery.view.popularity_only_function ); -addressUsingIdsQuery.score( peliasQuery.view.population_only_function ); // -------------------------------- // non-scoring hard filters diff --git a/test/unit/query/address_search_using_ids.js b/test/unit/query/address_search_using_ids.js index 7a88a091..665aa681 100644 --- a/test/unit/query/address_search_using_ids.js +++ b/test/unit/query/address_search_using_ids.js @@ -16,8 +16,6 @@ module.exports.tests.interface = (test, common) => { // helper for canned views const views = { focus_only_function: () => 'focus_only_function', - popularity_only_function: 'popularity_only_function view', - population_only_function: 'population_only_function view', boundary_country: 'boundary_country view', boundary_circle: 'boundary_circle view', boundary_rect: 'boundary_rect view', @@ -59,9 +57,7 @@ module.exports.tests.base_query = (test, common) => { t.equals(generatedQuery.body.vs.var('size').toString(), 20); t.deepEquals(generatedQuery.body.score_functions, [ - 'focus_only_function', - 'popularity_only_function view', - 'population_only_function view' + 'focus_only_function' ]); t.deepEquals(generatedQuery.body.filter_functions, [ From 1f4a29bb0de8c1f5387b4158e03a0bbd79476938 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 6 Jul 2017 15:46:59 -0400 Subject: [PATCH 21/52] removed unused variable --- query/search.js | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/query/search.js b/query/search.js index fcf0b623..3b368f50 100644 --- a/query/search.js +++ b/query/search.js @@ -10,16 +10,11 @@ const logger = require('pelias-logger').get('api'); // general-purpose search query //------------------------------ var fallbackQuery = new peliasQuery.layout.FallbackQuery(); -var geodisambiguationQuery = new peliasQuery.layout.GeodisambiguationQuery(); // scoring boost fallbackQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); fallbackQuery.score( peliasQuery.view.popularity_only_function ); fallbackQuery.score( peliasQuery.view.population_only_function ); - -geodisambiguationQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); -geodisambiguationQuery.score( peliasQuery.view.popularity_only_function ); -geodisambiguationQuery.score( peliasQuery.view.population_only_function ); // -------------------------------- // non-scoring hard filters @@ -29,13 +24,6 @@ fallbackQuery.filter( peliasQuery.view.boundary_rect ); fallbackQuery.filter( peliasQuery.view.sources ); fallbackQuery.filter( peliasQuery.view.layers ); fallbackQuery.filter( peliasQuery.view.categories ); - -geodisambiguationQuery.filter( peliasQuery.view.boundary_country ); -geodisambiguationQuery.filter( peliasQuery.view.boundary_circle ); -geodisambiguationQuery.filter( peliasQuery.view.boundary_rect ); -geodisambiguationQuery.filter( peliasQuery.view.sources ); -geodisambiguationQuery.filter( peliasQuery.view.layers ); -geodisambiguationQuery.filter( peliasQuery.view.categories ); // -------------------------------- /** From 1fef7ac41ce209a1467662b6574b6e5452baecac Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 6 Jul 2017 16:07:56 -0400 Subject: [PATCH 22/52] removed unreachable paths in query/search --- query/search.js | 39 ++++----------------------------------- test/unit/query/search.js | 7 +++---- 2 files changed, 7 insertions(+), 39 deletions(-) diff --git a/query/search.js b/query/search.js index 3b368f50..ce3ac639 100644 --- a/query/search.js +++ b/query/search.js @@ -135,10 +135,7 @@ function getQuery(vs) { logger.info(`[query:search] [search_input_type:${determineQueryType(vs)}]`); - if (hasStreet(vs) || - isCityStateOnlyWithOptionalCountry(vs) || - isCityCountryOnly(vs) || - isPostalCodeOnly(vs)) { + if (hasStreet(vs) || isPostalCodeOnly(vs)) { return { type: 'fallback', body: fallbackQuery.render(vs) @@ -162,7 +159,8 @@ function determineQueryType(vs) { return 'venue'; } else if (['neighbourhood', 'borough', 'postcode', 'county', 'region','country'].some( - (layer)=> { return vs.isset(`input:${layer}`);})) { + layer => vs.isset(`input:${layer}`) + )) { return 'admin'; } return 'other'; @@ -172,37 +170,8 @@ function hasStreet(vs) { return vs.isset('input:street'); } -function isCityStateOnlyWithOptionalCountry(vs) { - var isSet = (layer) => { - return vs.isset(`input:${layer}`); - }; - - var allowedFields = ['locality', 'region']; - var disallowedFields = ['query', 'category', 'housenumber', 'street', - 'neighbourhood', 'borough', 'postcode', 'county']; - - return allowedFields.every(isSet) && !disallowedFields.some(isSet); - -} - -function isCityCountryOnly(vs) { - var isSet = (layer) => { - return vs.isset(`input:${layer}`); - }; - - var allowedFields = ['locality', 'country']; - var disallowedFields = ['query', 'category', 'housenumber', 'street', - 'neighbourhood', 'borough', 'postcode', 'county', 'region']; - - return allowedFields.every(isSet) && - !disallowedFields.some(isSet); - -} - function isPostalCodeOnly(vs) { - var isSet = (layer) => { - return vs.isset(`input:${layer}`); - }; + var isSet = layer => vs.isset(`input:${layer}`); var allowedFields = ['postcode']; var disallowedFields = ['query', 'category', 'housenumber', 'street', diff --git a/test/unit/query/search.js b/test/unit/query/search.js index aa582082..c97fb876 100644 --- a/test/unit/query/search.js +++ b/test/unit/query/search.js @@ -284,7 +284,7 @@ module.exports.tests.city_state = function(test, common) { var query = generate(clean); - t.notEqual(query, undefined, 'should not have returned undefined'); + t.equal(query, undefined, 'should have returned undefined'); t.end(); }); @@ -300,7 +300,7 @@ module.exports.tests.city_state = function(test, common) { var query = generate(clean); - t.notEqual(query, undefined, 'should not have returned undefined'); + t.equal(query, undefined, 'should have returned undefined'); t.end(); }); @@ -458,7 +458,7 @@ module.exports.tests.city_country = function(test, common) { var query = generate(clean); - t.notEqual(query, undefined, 'should not have returned undefined'); + t.equal(query, undefined, 'should have returned undefined'); t.end(); }); @@ -621,7 +621,6 @@ module.exports.tests.city_country = function(test, common) { t.end(); }); - }; module.exports.all = function (tape, common) { From 48c50e007b18bc1b580fb488d1c79b58dd71f57e Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 6 Jul 2017 16:24:55 -0400 Subject: [PATCH 23/52] bumped pelias-query hash --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 8ae71856..251e8a37 100644 --- a/package.json +++ b/package.json @@ -60,7 +60,7 @@ "pelias-logger": "0.2.0", "pelias-microservice-wrapper": "1.1.3", "pelias-model": "5.0.0", - "pelias-query": "pelias/query#9279d5f", + "pelias-query": "pelias/query#9673b57", "pelias-sorting": "1.0.1", "pelias-text-analyzer": "1.8.3", "predicates": "^1.0.1", From 3257ea6b9b02aa067350d3fd77052feee5cfde58 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 6 Jul 2017 16:25:52 -0400 Subject: [PATCH 24/52] fixed tests for model 4.9.0 changes --- test/unit/controller/placeholder.js | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/test/unit/controller/placeholder.js b/test/unit/controller/placeholder.js index 2334d944..bbe30464 100644 --- a/test/unit/controller/placeholder.js +++ b/test/unit/controller/placeholder.js @@ -1232,8 +1232,7 @@ module.exports.tests.result_filtering = (test, common) => { }, phrase: { 'default': 'name 1' - }, - parent: { } + } }, { _id: '2', @@ -1250,8 +1249,7 @@ module.exports.tests.result_filtering = (test, common) => { }, phrase: { 'default': 'name 2' - }, - parent: { } + } }, { _id: '3', @@ -1268,8 +1266,7 @@ module.exports.tests.result_filtering = (test, common) => { }, phrase: { 'default': 'name 3' - }, - parent: { } + } } ] }; From 442c2b5c9b47257b24a2abcbae3de60490719de8 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Fri, 7 Jul 2017 14:25:01 -0400 Subject: [PATCH 25/52] add ability to skip filters in placeholder --- controller/placeholder.js | 28 +- .../predicates/is_single_field_analysis.js | 5 + routes/v1.js | 50 +- test/unit/controller/placeholder.js | 486 +++++++++++++++++- .../predicates/is_single_field_analysis.js | 94 ++++ test/unit/run.js | 1 + 6 files changed, 619 insertions(+), 45 deletions(-) create mode 100644 controller/predicates/is_single_field_analysis.js create mode 100644 test/unit/controller/predicates/is_single_field_analysis.js diff --git a/controller/placeholder.js b/controller/placeholder.js index 0d00b1b8..8e97765b 100644 --- a/controller/placeholder.js +++ b/controller/placeholder.js @@ -86,19 +86,19 @@ function atLeastOneLineageMatchesBoundaryCountry(boundaryCountry, result) { // return a function that detects if a result has at least one lineage in boundary.country // if there's no boundary.country, return a function that always returns true -function getBoundaryCountryFilter(clean) { - if (_.has(clean, 'boundary.country')) { +function getBoundaryCountryFilter(clean, geometric_filters_apply) { + if (_.has(clean, 'boundary.country') && geometric_filters_apply) { return _.partial(atLeastOneLineageMatchesBoundaryCountry, clean['boundary.country']); } - return _.constant(true); + return () => true; } // return a function that detects if a result is inside a bbox if a bbox is available // if there's no bbox, return a function that always returns true -function getBoundaryRectangleFilter(clean) { - if (['min_lat', 'min_lon', 'max_lat', 'max_lon'].every((f) => { +function getBoundaryRectangleFilter(clean, geometric_filters_apply) { + if (geometric_filters_apply && ['min_lat', 'min_lon', 'max_lat', 'max_lon'].every((f) => { return _.has(clean, `boundary.rect.${f}`); })) { const polygon = [ @@ -113,14 +113,14 @@ function getBoundaryRectangleFilter(clean) { } - return _.constant(true); + return () => true; } // return a function that detects if a result is inside a circle if a circle is available // if there's no circle, return a function that always returns true -function getBoundaryCircleFilter(clean) { - if (['lat', 'lon', 'radius'].every((f) => { +function getBoundaryCircleFilter(clean, geometric_filters_apply) { + if (geometric_filters_apply && ['lat', 'lon', 'radius'].every((f) => { return _.has(clean, `boundary.circle.${f}`); })) { const center = { @@ -134,7 +134,7 @@ function getBoundaryCircleFilter(clean) { } - return _.constant(true); + return () => true; } @@ -213,7 +213,7 @@ function buildESDoc(doc) { return _.extend(esDoc.data, { _id: esDoc._id, _type: esDoc._type }); } -function setup(placeholderService, should_execute) { +function setup(placeholderService, geometric_filters_apply, should_execute) { function controller( req, res, next ){ // bail early if req/res don't pass conditions for execution if (!should_execute(req, res)) { @@ -226,7 +226,7 @@ function setup(placeholderService, should_execute) { req.errors.push( _.get(err, 'message', err)); } else { - const boundaryCountry = _.get(req, ['clean', 'boundary.country']); + const boundaryCountry = geometric_filters_apply ? _.get(req, ['clean', 'boundary.country']) : undefined; // convert results to ES docs // boundary.country filter must happen after synthesis since multiple @@ -241,13 +241,13 @@ function setup(placeholderService, should_execute) { // filter out results that don't match on requested layer(s) .filter(getLayersFilter(req.clean)) // filter out results that don't match on any lineage country - .filter(getBoundaryCountryFilter(req.clean)) + .filter(getBoundaryCountryFilter(req.clean, geometric_filters_apply)) // clean up geom.lat/lon for boundary rect/circle checks .map(numberifyGeomLatLon) // filter out results that aren't in the boundary.rect - .filter(getBoundaryRectangleFilter(req.clean)) + .filter(getBoundaryRectangleFilter(req.clean, geometric_filters_apply)) // filter out results that aren't in the boundary.circle - .filter(getBoundaryCircleFilter(req.clean)) + .filter(getBoundaryCircleFilter(req.clean, geometric_filters_apply)) // convert results to ES docs .map(_.partial(synthesizeDocs, boundaryCountry)); diff --git a/controller/predicates/is_single_field_analysis.js b/controller/predicates/is_single_field_analysis.js new file mode 100644 index 00000000..7e5f3c66 --- /dev/null +++ b/controller/predicates/is_single_field_analysis.js @@ -0,0 +1,5 @@ +const _ = require('lodash'); + +// predicate that determines if parsed_text contains only the supplied property +module.exports = property => (req, res) => + _.isEqual(_.keys(_.get(req, ['clean', 'parsed_text'])), [property]); diff --git a/routes/v1.js b/routes/v1.js index 18cc39b7..a232c92c 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -36,8 +36,8 @@ var controllers = { }; var queries = { - libpostal: require('../query/search'), - fallback_to_old_prod: require('../query/search_original'), + cascading_fallback: require('../query/search'), + very_old_prod: require('../query/search_original'), structured_geocoding: require('../query/structured_geocoding'), reverse: require('../query/reverse'), autocomplete: require('../query/autocomplete'), @@ -74,6 +74,7 @@ const hasRequestErrors = require('../controller/predicates/has_request_errors'); const isCoarseReverse = require('../controller/predicates/is_coarse_reverse'); const isAdminOnlyAnalysis = require('../controller/predicates/is_admin_only_analysis'); const hasResultsAtLayers = require('../controller/predicates/has_results_at_layers'); +const isSingleFieldAnalysis = require('../controller/predicates/is_single_field_analysis'); // shorthand for standard early-exit conditions const hasResponseDataOrRequestErrors = any(hasResponseData, hasRequestErrors); @@ -110,13 +111,33 @@ function addRoutes(app, peliasConfig) { isPipServiceEnabled, not(hasRequestErrors), not(hasResponseData) ); - const placeholderShouldExecute = all( + // execute placeholder if libpostal only parsed as admin-only and needs to + // be geodisambiguated + const placeholderGeodisambiguationShouldExecute = all( + not(hasResponseDataOrRequestErrors), + isPlaceholderServiceEnabled, + isAdminOnlyAnalysis, + not(isSingleFieldAnalysis('postalcode')) + ); + + // execute placeholder if libpostal identified address parts but ids need to + // be looked up for admin parts + const placeholderIdsLookupShouldExecute = all( not(hasResponseDataOrRequestErrors), isPlaceholderServiceEnabled, // don't run placeholder if there's a number but no street not(hasNumberButNotStreet), // don't run placeholder if there's a query or category - not(hasAnyParsedTextProperty('query', 'category')) + not(hasAnyParsedTextProperty('query', 'category')), + not(isSingleFieldAnalysis('postalcode')) + ); + + // placeholder should have executed, useful for determining whether to actually + // fallback or not (don't fallback to old search if the placeholder response + // should be honored as is) + const placeholderShouldHaveExecuted = any( + placeholderGeodisambiguationShouldExecute, + placeholderIdsLookupShouldExecute ); const searchWithIdsShouldExecute = all( @@ -127,6 +148,14 @@ function addRoutes(app, peliasConfig) { hasAnyParsedTextProperty('street') ); + // don't execute the cascading fallback query IF placeholder should have executed + // that way, if placeholder didn't return anything, don't try to find more things the old way + const fallbackQueryShouldExecute = all( + not(hasRequestErrors), + not(hasResponseData), + not(placeholderShouldHaveExecuted) + ); + // execute under the following conditions: // - there are no errors or data // - request is not coarse OR pip service is disabled @@ -138,6 +167,10 @@ function addRoutes(app, peliasConfig) { ) ); + // helpers to replace vague booleans + const geometricFiltersApply = true; + const geometricFiltersDontApply = false; + var base = '/v1/'; /** ------------------------- routers ------------------------- **/ @@ -153,13 +186,14 @@ function addRoutes(app, peliasConfig) { sanitizers.search.middleware(peliasConfig.api), middleware.requestLanguage, middleware.calcSize(), - controllers.placeholder(placeholderService, placeholderShouldExecute), + controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute), + controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute), controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute), - // 3rd parameter is which query module to use, use fallback/geodisambiguation + // 3rd parameter is which query module to use, use fallback // first, then use original search strategy if first query didn't return anything - controllers.search(peliasConfig.api, esclient, queries.libpostal, not(hasResponseDataOrRequestErrors)), + controllers.search(peliasConfig.api, esclient, queries.cascading_fallback, fallbackQueryShouldExecute), sanitizers.search_fallback.middleware, - controllers.search(peliasConfig.api, esclient, queries.fallback_to_old_prod, not(hasResponseDataOrRequestErrors)), + controllers.search(peliasConfig.api, esclient, queries.very_old_prod, fallbackQueryShouldExecute), postProc.trimByGranularity(), postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig.api), diff --git a/test/unit/controller/placeholder.js b/test/unit/controller/placeholder.js index bbe30464..efb0fc2a 100644 --- a/test/unit/controller/placeholder.js +++ b/test/unit/controller/placeholder.js @@ -30,7 +30,7 @@ module.exports.tests.should_execute = (test, common) => { return false; }; - const controller = placeholder(placeholder_service, should_execute); + const controller = placeholder(placeholder_service, true, should_execute); const req = { a: 1 }; const res = { b: 2 }; @@ -52,7 +52,7 @@ module.exports.tests.should_execute = (test, common) => { callback(null, []); }; - const controller = placeholder(placeholder_service, _.constant(true)); + const controller = placeholder(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { b: 2 }; @@ -206,7 +206,7 @@ module.exports.tests.success = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -324,7 +324,7 @@ module.exports.tests.success = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -387,7 +387,7 @@ module.exports.tests.success = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -447,7 +447,7 @@ module.exports.tests.success = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -512,7 +512,7 @@ module.exports.tests.success = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -577,7 +577,7 @@ module.exports.tests.success = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -742,7 +742,7 @@ module.exports.tests.result_filtering = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value', @@ -804,6 +804,141 @@ module.exports.tests.result_filtering = (test, common) => { }); + test('when geometric_filters_apply is false, boundary.rect should not apply', (t) => { + const logger = require('pelias-mock-logger')(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { + param1: 'param1 value', + clean: { + 'boundary.rect.min_lat': -1, + 'boundary.rect.max_lat': 1, + 'boundary.rect.min_lon': -1, + 'boundary.rect.max_lon': 1 + } + }); + + const response = [ + { + // inside bbox + id: 1, + name: 'name 1', + placetype: 'neighbourhood', + geom: { + lat: 0, + lon: 0 + } + }, + { + // outside bbox + id: 2, + name: 'name 2', + placetype: 'neighbourhood', + geom: { + lat: -2, + lon: 2 + } + }, + { + // outside bbox + id: 3, + name: 'name 3', + placetype: 'neighbourhood', + geom: { + lat: 2, + lon: -2 + } + } + ]; + + callback(null, response); + }; + + const should_execute = (req, res) => { + return true; + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, false, () => true); + + const req = { + param1: 'param1 value', + clean: { + 'boundary.rect.min_lat': -1, + 'boundary.rect.max_lat': 1, + 'boundary.rect.min_lon': -1, + 'boundary.rect.max_lon': 1 + } + }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: { + query_type: 'fallback' + }, + data: [ + { + _id: '1', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '1', + center_point: { + lat: 0, + lon: 0 + }, + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + } + }, + { + _id: '2', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '2', + center_point: { + lat: -2, + lon: 2 + }, + name: { + 'default': 'name 2' + }, + phrase: { + 'default': 'name 2' + } + }, + { + _id: '3', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '3', + center_point: { + lat: 2, + lon: -2 + }, + name: { + 'default': 'name 3' + }, + phrase: { + 'default': 'name 3' + } + } + ] + }; + + t.deepEquals(res, expected_res); + t.end(); + }); + + }); + test('when boundary.circle is available, results outside of it should be removed', (t) => { const logger = require('pelias-mock-logger')(); @@ -927,7 +1062,7 @@ module.exports.tests.result_filtering = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value', @@ -988,6 +1123,139 @@ module.exports.tests.result_filtering = (test, common) => { }); + test('when geometric_filters_apply is false, boundary.circle should not apply', (t) => { + const logger = require('pelias-mock-logger')(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { + param1: 'param1 value', + clean: { + 'boundary.circle.lat': 0, + 'boundary.circle.lon': 0, + 'boundary.circle.radius': 500 + } + }); + + const response = [ + { + // inside circle + id: 1, + name: 'name 1', + placetype: 'neighbourhood', + geom: { + lat: 1, + lon: 1 + } + }, + { + // outside circle on +lon + id: 2, + name: 'name 2', + placetype: 'neighbourhood', + geom: { + lat: -45, + lon: 45 + } + }, + { + // outside bbox on +lat + id: 3, + name: 'name 3', + placetype: 'neighbourhood', + geom: { + lat: 45, + lon: -45 + } + } + ]; + + callback(null, response); + }; + + const should_execute = (req, res) => { + return true; + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, false, () => true); + + const req = { + param1: 'param1 value', + clean: { + 'boundary.circle.lat': 0, + 'boundary.circle.lon': 0, + 'boundary.circle.radius': 500 + } + }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: { + query_type: 'fallback' + }, + data: [ + { + _id: '1', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '1', + center_point: { + lat: 1, + lon: 1 + }, + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + } + }, + { + _id: '2', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '2', + center_point: { + lat: -45, + lon: 45 + }, + name: { + 'default': 'name 2' + }, + phrase: { + 'default': 'name 2' + } + }, + { + _id: '3', + _type: 'neighbourhood', + layer: 'neighbourhood', + source: 'whosonfirst', + source_id: '3', + center_point: { + lat: 45, + lon: -45 + }, + name: { + 'default': 'name 3' + }, + phrase: { + 'default': 'name 3' + } + } + ] + }; + + t.deepEquals(res, expected_res); + t.end(); + }); + + }); + test('only results matching explicit layers should be returned', (t) => { const logger = mock_logger(); @@ -1062,7 +1330,7 @@ module.exports.tests.result_filtering = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value', @@ -1198,7 +1466,7 @@ module.exports.tests.result_filtering = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value', @@ -1350,7 +1618,7 @@ module.exports.tests.result_filtering = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value', @@ -1420,6 +1688,178 @@ module.exports.tests.result_filtering = (test, common) => { }); + test('when geometric_filters_apply is false, boundary.country should not apply', (t) => { + const logger = require('pelias-mock-logger')(); + + const placeholder_service = (req, callback) => { + t.deepEqual(req, { + param1: 'param1 value', + clean: { + 'boundary.country': 'ABC' + } + }); + + const response = [ + { + id: 1, + name: 'name 1', + placetype: 'locality', + lineage: [ + { + country: { + id: 1, + name: 'country name 1', + abbr: 'ABC' + } + }, + { + country: { + id: 2, + name: 'country name 2', + abbr: 'DEF' + } + } + ], + geom: { + lat: 14.141414, + lon: 41.414141 + } + }, + { + id: 3, + name: 'name 3', + placetype: 'locality', + lineage: [ + { + country: { + id: 3, + name: 'country name 3', + abbr: 'ABC' + } + } + ], + geom: { + lat: 15.151515, + lon: 51.515151 + } + }, + { + id: 4, + name: 'name 4', + placetype: 'locality', + lineage: [ + { + country: { + id: 4, + name: 'country name 4', + abbr: 'GHI' + } + } + ], + geom: { + lat: 16.161616, + lon: 61.616161 + } + } + ]; + + callback(null, response); + }; + + const controller = proxyquire('../../../controller/placeholder', { + 'pelias-logger': logger + })(placeholder_service, false, () => true); + + const req = { + param1: 'param1 value', + clean: { + 'boundary.country': 'ABC' + } + }; + const res = { }; + + controller(req, res, () => { + const expected_res = { + meta: { + query_type: 'fallback' + }, + data: [ + { + _id: '1', + _type: 'locality', + layer: 'locality', + source: 'whosonfirst', + source_id: '1', + center_point: { + lat: 14.141414, + lon: 41.414141 + }, + name: { + 'default': 'name 1' + }, + phrase: { + 'default': 'name 1' + }, + parent: { + country: ['country name 1', 'country name 2'], + country_id: ['1', '2'], + country_a: ['ABC', 'DEF'] + } + }, + { + _id: '3', + _type: 'locality', + layer: 'locality', + source: 'whosonfirst', + source_id: '3', + center_point: { + lat: 15.151515, + lon: 51.515151 + }, + name: { + 'default': 'name 3' + }, + phrase: { + 'default': 'name 3' + }, + parent: { + country: ['country name 3'], + country_id: ['3'], + country_a: ['ABC'] + } + }, + { + _id: '4', + _type: 'locality', + layer: 'locality', + source: 'whosonfirst', + source_id: '4', + center_point: { + lat: 16.161616, + lon: 61.616161 + }, + name: { + 'default': 'name 4' + }, + phrase: { + 'default': 'name 4' + }, + parent: { + country: ['country name 4'], + country_id: ['4'], + country_a: ['GHI'] + } + } + ] + }; + + t.deepEquals(res, expected_res); + t.ok(logger.isInfoMessage('[controller:placeholder] [result_count:3]')); + t.end(); + }); + + }); + }; module.exports.tests.lineage_errors = (test, common) => { @@ -1460,7 +1900,7 @@ module.exports.tests.lineage_errors = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -1534,7 +1974,7 @@ module.exports.tests.lineage_errors = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -1607,7 +2047,7 @@ module.exports.tests.lineage_errors = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -1667,7 +2107,7 @@ module.exports.tests.geometry_errors = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -1726,7 +2166,7 @@ module.exports.tests.centroid_errors = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -1786,7 +2226,7 @@ module.exports.tests.centroid_errors = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -1856,7 +2296,7 @@ module.exports.tests.boundingbox_errors = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { param1: 'param1 value' }; const res = { }; @@ -1908,7 +2348,7 @@ module.exports.tests.error_conditions = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { errors: [] @@ -1937,7 +2377,7 @@ module.exports.tests.error_conditions = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { errors: [] @@ -1962,7 +2402,7 @@ module.exports.tests.error_conditions = (test, common) => { const controller = proxyquire('../../../controller/placeholder', { 'pelias-logger': logger - })(placeholder_service, _.constant(true)); + })(placeholder_service, true, () => true); const req = { errors: [] diff --git a/test/unit/controller/predicates/is_single_field_analysis.js b/test/unit/controller/predicates/is_single_field_analysis.js new file mode 100644 index 00000000..06328fa0 --- /dev/null +++ b/test/unit/controller/predicates/is_single_field_analysis.js @@ -0,0 +1,94 @@ +'use strict'; + +const _ = require('lodash'); +const is_single_field_analysis = require('../../../../controller/predicates/is_single_field_analysis'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', (t) => { + t.ok(_.isFunction(is_single_field_analysis), 'is_single_field_analysis is a function'); + t.end(); + }); +}; + +module.exports.tests.true_conditions = (test, common) => { + test('defined request.clean.parsed_text.property should return true', (t) => { + const req = { + clean: { + parsed_text: { + property: 'value' + } + } + }; + + t.ok(is_single_field_analysis('property')(req)); + t.end(); + + }); + +}; + +module.exports.tests.false_conditions = (test, common) => { + test('undefined request should return false', (t) => { + t.notOk(is_single_field_analysis('property')()); + t.end(); + + }); + + test('undefined request.clean should return false', (t) => { + const req = {}; + + t.notOk(is_single_field_analysis('property')(req)); + t.end(); + + }); + + test('undefined request.clean.parsed_text should return false', (t) => { + const req = { + clean: {} + }; + + t.notOk(is_single_field_analysis('property')(req)); + t.end(); + + }); + + test('request.clean.parsed_text with none of the supplied properties should return false', (t) => { + const req = { + clean: { + parsed_text: {} + } + }; + + t.notOk(is_single_field_analysis('property')(req)); + t.end(); + + }); + + test('request.clean.parsed_text with none of the supplied properties should return false', (t) => { + const req = { + clean: { + parsed_text: { + property: 'value', + another_property: 'value' + } + } + }; + + t.notOk(is_single_field_analysis('property')(req)); + t.end(); + + }); + +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`GET /is_single_field_analysis ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index 66ed53d8..62924347 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -24,6 +24,7 @@ var tests = [ require('./controller/predicates/is_admin_only_analysis'), require('./controller/predicates/is_coarse_reverse'), require('./controller/predicates/is_service_enabled'), + require('./controller/predicates/is_single_field_analysis'), require('./helper/diffPlaces'), require('./helper/geojsonify'), require('./helper/logging'), From 40ddc93bbf2fa0362f452dd1c9411722ba7dd5b9 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 12 Jul 2017 15:54:12 -0400 Subject: [PATCH 26/52] added conditional fallback to addressit --- controller/predicates/is_addressit_parse.js | 4 + .../predicates/is_admin_only_analysis.js | 2 +- routes/v1.js | 29 ++- sanitizer/_text_addressit.js | 1 + sanitizer/defer_to_addressit.js | 32 +++ sanitizer/search_fallback.js | 30 --- .../predicates/is_addressit_parse.js | 73 +++++++ .../predicates/is_admin_only_analysis.js | 4 +- test/unit/run.js | 3 +- test/unit/sanitizer/_text_addressit.js | 13 ++ test/unit/sanitizer/defer_to_addressit.js | 132 +++++++++++++ test/unit/sanitizer/search_fallback.js | 185 ------------------ 12 files changed, 282 insertions(+), 226 deletions(-) create mode 100644 controller/predicates/is_addressit_parse.js create mode 100644 sanitizer/defer_to_addressit.js delete mode 100644 sanitizer/search_fallback.js create mode 100644 test/unit/controller/predicates/is_addressit_parse.js create mode 100644 test/unit/sanitizer/defer_to_addressit.js delete mode 100644 test/unit/sanitizer/search_fallback.js diff --git a/controller/predicates/is_addressit_parse.js b/controller/predicates/is_addressit_parse.js new file mode 100644 index 00000000..dac3bcfc --- /dev/null +++ b/controller/predicates/is_addressit_parse.js @@ -0,0 +1,4 @@ +const _ = require('lodash'); + +// returns true iff req.clean.parser is addressit +module.exports = (req, res) => _.get(req, 'clean.parser') === 'addressit'; diff --git a/controller/predicates/is_admin_only_analysis.js b/controller/predicates/is_admin_only_analysis.js index 4b3aef6a..d170f3fc 100644 --- a/controller/predicates/is_admin_only_analysis.js +++ b/controller/predicates/is_admin_only_analysis.js @@ -6,7 +6,7 @@ module.exports = (request, response) => { } // return true only if all non-admin properties of parsed_text are empty - return ['number', 'street', 'query', 'category'].every((prop) => { + return ['number', 'street', 'query', 'category', 'postalcode'].every((prop) => { return _.isEmpty(request.clean.parsed_text[prop]); }); diff --git a/routes/v1.js b/routes/v1.js index a232c92c..a07b966a 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -11,7 +11,7 @@ var sanitizers = { autocomplete: require('../sanitizer/autocomplete'), place: require('../sanitizer/place'), search: require('../sanitizer/search'), - search_fallback: require('../sanitizer/search_fallback'), + defer_to_addressit: require('../sanitizer/defer_to_addressit'), structured_geocoding: require('../sanitizer/structured_geocoding'), reverse: require('../sanitizer/reverse'), nearby: require('../sanitizer/nearby') @@ -75,6 +75,7 @@ const isCoarseReverse = require('../controller/predicates/is_coarse_reverse'); const isAdminOnlyAnalysis = require('../controller/predicates/is_admin_only_analysis'); const hasResultsAtLayers = require('../controller/predicates/has_results_at_layers'); const isSingleFieldAnalysis = require('../controller/predicates/is_single_field_analysis'); +const isAddressItParse = require('../controller/predicates/is_addressit_parse'); // shorthand for standard early-exit conditions const hasResponseDataOrRequestErrors = any(hasResponseData, hasRequestErrors); @@ -116,8 +117,7 @@ function addRoutes(app, peliasConfig) { const placeholderGeodisambiguationShouldExecute = all( not(hasResponseDataOrRequestErrors), isPlaceholderServiceEnabled, - isAdminOnlyAnalysis, - not(isSingleFieldAnalysis('postalcode')) + isAdminOnlyAnalysis ); // execute placeholder if libpostal identified address parts but ids need to @@ -129,6 +129,9 @@ function addRoutes(app, peliasConfig) { not(hasNumberButNotStreet), // don't run placeholder if there's a query or category not(hasAnyParsedTextProperty('query', 'category')), + // run placeholder if there are any adminareas identified + // hasAnyParsedTextProperty('neighbourhood', 'borough', 'city', 'county', 'state', 'country'), + // don't run placeholder if only postalcode was identified not(isSingleFieldAnalysis('postalcode')) ); @@ -156,6 +159,18 @@ function addRoutes(app, peliasConfig) { not(placeholderShouldHaveExecuted) ); + const shouldDeferToAddressIt = all( + not(hasRequestErrors), + not(hasResponseData), + not(placeholderShouldHaveExecuted) + ); + + const oldProdQueryShouldExecute = all( + not(hasRequestErrors), + not(hasResponseData), + isAddressItParse + ); + // execute under the following conditions: // - there are no errors or data // - request is not coarse OR pip service is disabled @@ -189,11 +204,11 @@ function addRoutes(app, peliasConfig) { controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute), controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute), controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute), - // 3rd parameter is which query module to use, use fallback - // first, then use original search strategy if first query didn't return anything + // 3rd parameter is which query module to use, use fallback first, then + // use original search strategy if first query didn't return anything controllers.search(peliasConfig.api, esclient, queries.cascading_fallback, fallbackQueryShouldExecute), - sanitizers.search_fallback.middleware, - controllers.search(peliasConfig.api, esclient, queries.very_old_prod, fallbackQueryShouldExecute), + sanitizers.defer_to_addressit(shouldDeferToAddressIt), + controllers.search(peliasConfig.api, esclient, queries.very_old_prod, oldProdQueryShouldExecute), postProc.trimByGranularity(), postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig.api), diff --git a/sanitizer/_text_addressit.js b/sanitizer/_text_addressit.js index 04fca21a..68a293bf 100644 --- a/sanitizer/_text_addressit.js +++ b/sanitizer/_text_addressit.js @@ -20,6 +20,7 @@ function sanitize( raw, clean ){ // valid text clean.text = raw.text; + clean.parser = 'addressit'; // remove anything that may have been parsed before delete clean.parsed_text; diff --git a/sanitizer/defer_to_addressit.js b/sanitizer/defer_to_addressit.js new file mode 100644 index 00000000..8822f72c --- /dev/null +++ b/sanitizer/defer_to_addressit.js @@ -0,0 +1,32 @@ +const sanitizeAll = require('../sanitizer/sanitizeAll'), + sanitizers = { + text: require('../sanitizer/_text_addressit') + }; + +const sanitize = function(req, cb) { sanitizeAll(req, sanitizers, cb); }; +const logger = require('pelias-logger').get('api'); +const logging = require( '../helper/logging' ); + +// middleware +module.exports = (should_execute) => { + return function(req, res, next) { + // if res.data already has results then don't call the _text_autocomplete sanitizer + // this has been put into place for when the libpostal integration way of querying + // ES doesn't return anything and we want to fallback to the old logic + if (!should_execute(req, res)) { + return next(); + } + + // log the query that caused a fallback since libpostal+new-queries didn't return anything + if (req.path === '/v1/search') { + const queryText = logging.isDNT(req) ? '[text removed]' : req.clean.text; + logger.info(`fallback queryText: ${queryText}`); + } + + sanitize( req, function( err, clean ){ + next(); + }); + + }; + +}; diff --git a/sanitizer/search_fallback.js b/sanitizer/search_fallback.js deleted file mode 100644 index 9bdb0c1e..00000000 --- a/sanitizer/search_fallback.js +++ /dev/null @@ -1,30 +0,0 @@ -var sanitizeAll = require('../sanitizer/sanitizeAll'), - sanitizers = { - text: require('../sanitizer/_text_addressit') - }; - -var sanitize = function(req, cb) { sanitizeAll(req, sanitizers, cb); }; -var logger = require('pelias-logger').get('api'); -var logging = require( '../helper/logging' ); -var _ = require('lodash'); - -// middleware -module.exports.middleware = function( req, res, next ){ - // if res.data already has results then don't call the _text_autocomplete sanitizer - // this has been put into place for when the libpostal integration way of querying - // ES doesn't return anything and we want to fallback to the old logic - if (_.get(res, 'data', []).length > 0) { - return next(); - } - - // log the query that caused a fallback since libpostal+new-queries didn't return anything - if (req.path === '/v1/search') { - const queryText = logging.isDNT(req) ? '[text removed]' : req.clean.text; - logger.info(`fallback queryText: ${queryText}`); - } - - sanitize( req, function( err, clean ){ - next(); - }); - -}; diff --git a/test/unit/controller/predicates/is_addressit_parse.js b/test/unit/controller/predicates/is_addressit_parse.js new file mode 100644 index 00000000..d11734ba --- /dev/null +++ b/test/unit/controller/predicates/is_addressit_parse.js @@ -0,0 +1,73 @@ +'use strict'; + +const _ = require('lodash'); +const is_addressit_parse = require('../../../../controller/predicates/is_addressit_parse'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', t => { + t.ok(_.isFunction(is_addressit_parse), 'is_addressit_parse is a function'); + t.end(); + }); +}; + +module.exports.tests.true_conditions = (test, common) => { + test('request.clean.parser=addressit should return true', t => { + const req = { + clean: { + parser: 'addressit' + } + }; + + t.ok(is_addressit_parse(req)); + t.end(); + + }); + +}; + +module.exports.tests.false_conditions = (test, common) => { + test('undefined request should return false', t => { + t.notOk(is_addressit_parse(undefined)); + t.end(); + }); + + test('undefined request.clean should return false', t => { + const req = {}; + + t.notOk(is_addressit_parse(req)); + t.end(); + }); + + test('undefined request.clean.parser should return false', t => { + const req = { + clean: {} + }; + + t.notOk(is_addressit_parse(req)); + t.end(); + }); + + test('non-\'addressit\' request.clean.parser should return false', t => { + const req = { + clean: { + parser: 'not addressit' + } + }; + + t.notOk(is_addressit_parse(req)); + t.end(); + }); + +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`GET /is_addressit_parse ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/controller/predicates/is_admin_only_analysis.js b/test/unit/controller/predicates/is_admin_only_analysis.js index eea8b567..4e90bf34 100644 --- a/test/unit/controller/predicates/is_admin_only_analysis.js +++ b/test/unit/controller/predicates/is_admin_only_analysis.js @@ -14,7 +14,7 @@ module.exports.tests.interface = (test, common) => { module.exports.tests.true_conditions = (test, common) => { test('parsed_text with admin-only properties should return true', (t) => { - ['neighbourhood', 'borough', 'city', 'county', 'state', 'postalcode', 'country'].forEach((property) => { + ['neighbourhood', 'borough', 'city', 'county', 'state', 'country'].forEach((property) => { const req = { clean: { parsed_text: {} @@ -47,7 +47,7 @@ module.exports.tests.false_conditions = (test, common) => { }); test('parsed_text with non-admin properties should return false', (t) => { - ['number', 'street', 'query', 'category'].forEach((property) => { + ['number', 'street', 'query', 'category', 'postalcode'].forEach((property) => { const req = { clean: { parsed_text: {} diff --git a/test/unit/run.js b/test/unit/run.js index 62924347..eb6f4de9 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -21,6 +21,7 @@ var tests = [ require('./controller/predicates/has_response_data'), require('./controller/predicates/has_results_at_layers'), require('./controller/predicates/has_request_errors'), + require('./controller/predicates/is_addressit_parse'), require('./controller/predicates/is_admin_only_analysis'), require('./controller/predicates/is_coarse_reverse'), require('./controller/predicates/is_service_enabled'), @@ -87,7 +88,7 @@ var tests = [ require('./sanitizer/reverse'), require('./sanitizer/sanitizeAll'), require('./sanitizer/search'), - require('./sanitizer/search_fallback'), + require('./sanitizer/defer_to_addressit'), require('./sanitizer/wrap'), require('./service/configurations/PlaceHolder'), require('./service/configurations/PointInPolygon'), diff --git a/test/unit/sanitizer/_text_addressit.js b/test/unit/sanitizer/_text_addressit.js index c81680c9..26989c18 100644 --- a/test/unit/sanitizer/_text_addressit.js +++ b/test/unit/sanitizer/_text_addressit.js @@ -33,6 +33,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: query.name + ', ' + query.admin_parts, + parser: 'addressit', parsed_text: { name: query.name, regions: [ query.name ], @@ -57,6 +58,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: query.name + ',' + query.admin_parts, + parser: 'addressit', parsed_text: { name: query.name, regions: [ query.name ], @@ -88,6 +90,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: query.name + ', ' + query.admin_parts, + parser: 'addressit', parsed_text: { name: query.name, regions: [ query.name, query.admin_parts ], @@ -111,6 +114,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: query.name + ',' + query.admin_parts, + parser: 'addressit', parsed_text: { name: query.name, regions: [ query.name, query.admin_parts ], @@ -136,6 +140,7 @@ module.exports.tests.text_parser = function(test, common) { clean.parsed_text = 'this should be removed'; var expected_clean = { + parser: 'addressit', text: 'yugolsavia' }; @@ -155,6 +160,7 @@ module.exports.tests.text_parser = function(test, common) { clean.parsed_text = 'this should be removed'; var expected_clean = { + parser: 'addressit', text: 'small town' }; @@ -174,6 +180,7 @@ module.exports.tests.text_parser = function(test, common) { clean.parsed_text = 'this should be removed'; var expected_clean = { + parser: 'addressit', text: '123 main' }; @@ -193,6 +200,7 @@ module.exports.tests.text_parser = function(test, common) { clean.parsed_text = 'this should be removed'; var expected_clean = { + parser: 'addressit', text: 'main 123' }; @@ -213,6 +221,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: 'main particle new york', + parser: 'addressit', parsed_text: { regions: [ 'main particle' ], state: 'NY' @@ -235,6 +244,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: '123 main st new york ny', + parser: 'addressit', parsed_text: { number: '123', street: 'main st', @@ -259,6 +269,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: '123 main st new york ny 10010', + parser: 'addressit', parsed_text: { number: '123', street: 'main st', @@ -283,6 +294,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: '339 W Main St, Cheshire, 06410', + parser: 'addressit', parsed_text: { name: '339 W Main St', number: '339', @@ -308,6 +320,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: '339 W Main St,Lancaster,PA', + parser: 'addressit', parsed_text: { name: '339 W Main St', number: '339', diff --git a/test/unit/sanitizer/defer_to_addressit.js b/test/unit/sanitizer/defer_to_addressit.js new file mode 100644 index 00000000..9fe4675e --- /dev/null +++ b/test/unit/sanitizer/defer_to_addressit.js @@ -0,0 +1,132 @@ +const proxyquire = require('proxyquire').noCallThru(); +const mock_logger = require('pelias-mock-logger'); + +module.exports.tests = {}; + +module.exports.tests.sanitize = (test, common) => { + test('verify that no sanitizers were called when should_execute returns false', (t) => { + t.plan(1); + + const logger = mock_logger(); + + // rather than re-verify the functionality of all the sanitizers, this test just verifies that they + // were all called correctly + const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', { + '../sanitizer/_text_addressit': () => { + t.fail('_text_addressit should not have been called'); + }, + 'pelias-logger': logger + })(() => false); + + defer_to_addressit({}, {}, () => { + t.equals(logger.getInfoMessages().length, 0); + t.end(); + }); + + }); + + test('verify that _text_addressit sanitizer was called when should_execute returns true', (t) => { + t.plan(2); + + const logger = mock_logger(); + + // rather than re-verify the functionality of all the sanitizers, this test just verifies that they + // were all called correctly + const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', { + '../sanitizer/_text_addressit': () => { + t.pass('_text_addressit should have been called'); + return { errors: [], warnings: [] }; + }, + 'pelias-logger': logger, + '../helper/logging': { + isDNT: () => false + } + })(() => true); + + const req = { + path: '/v1/search', + clean: { + text: 'this is the query text' + } + }; + + defer_to_addressit(req, {}, () => { + t.deepEquals(logger.getInfoMessages(), ['fallback queryText: this is the query text']); + t.end(); + }); + + }); + + test('query should not be logged if path != \'/v1/search\'', (t) => { + t.plan(2); + + const logger = mock_logger(); + + // rather than re-verify the functionality of all the sanitizers, this test just verifies that they + // were all called correctly + const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', { + '../sanitizer/_text_addressit': () => { + t.pass('_text_addressit should have been called'); + return { errors: [], warnings: [] }; + }, + 'pelias-logger': logger + })(() => true); + + const req = { + path: 'not /v1/search', + clean: { + text: 'this is the query text' + } + }; + + defer_to_addressit(req, {}, () => { + t.deepEquals(logger.getInfoMessages(), []); + t.end(); + }); + + }); + + test('query should be logged as [text removed] if private', (t) => { + t.plan(2); + + const logger = mock_logger(); + + // rather than re-verify the functionality of all the sanitizers, this test just verifies that they + // were all called correctly + const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', { + '../sanitizer/_text_addressit': () => { + t.pass('_text_addressit should have been called'); + return { errors: [], warnings: [] }; + }, + 'pelias-logger': logger, + '../helper/logging': { + isDNT: () => true + } + })(() => true); + + const req = { + path: '/v1/search', + clean: { + text: 'this is the query text' + } + }; + + defer_to_addressit(req, {}, () => { + t.deepEquals(logger.getInfoMessages(), ['fallback queryText: [text removed]']); + t.end(); + }); + + }); + +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape(`SANITIZE /defer_to_addressit ${name}`, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/sanitizer/search_fallback.js b/test/unit/sanitizer/search_fallback.js deleted file mode 100644 index 69e35fcd..00000000 --- a/test/unit/sanitizer/search_fallback.js +++ /dev/null @@ -1,185 +0,0 @@ -var proxyquire = require('proxyquire').noCallThru(); - -module.exports.tests = {}; - -module.exports.tests.sanitize = function(test, common) { - test('verify that all sanitizers were called as expected when `res` is undefined', function(t) { - var called_sanitizers = []; - - // rather than re-verify the functionality of all the sanitizers, this test just verifies that they - // were all called correctly - var search = proxyquire('../../../sanitizer/search_fallback', { - '../sanitizer/_text_addressit': function() { - called_sanitizers.push('_text_addressit'); - return { errors: [], warnings: [] }; - } - }); - - var expected_sanitizers = [ - '_text_addressit' - ]; - - var req = {}; - - search.middleware(req, undefined, function(){ - t.deepEquals(called_sanitizers, expected_sanitizers); - t.end(); - }); - - }); - - test('verify that all sanitizers were called as expected when `res` has no `data` property', function(t) { - var called_sanitizers = []; - - // rather than re-verify the functionality of all the sanitizers, this test just verifies that they - // were all called correctly - var search = proxyquire('../../../sanitizer/search_fallback', { - '../sanitizer/_text_addressit': function() { - called_sanitizers.push('_text_addressit'); - return { errors: [], warnings: [] }; - } - }); - - var expected_sanitizers = [ - '_text_addressit' - ]; - - var req = {}; - var res = {}; - - search.middleware(req, res, function(){ - t.deepEquals(called_sanitizers, expected_sanitizers); - t.end(); - }); - - }); - - test('verify that all sanitizers were called as expected when res.data is empty', function(t) { - var called_sanitizers = []; - - // rather than re-verify the functionality of all the sanitizers, this test just verifies that they - // were all called correctly - var search = proxyquire('../../../sanitizer/search_fallback', { - '../sanitizer/_text_addressit': function() { - called_sanitizers.push('_text_addressit'); - return { errors: [], warnings: [] }; - } - }); - - var expected_sanitizers = [ - '_text_addressit' - ]; - - var req = {}; - var res = { - data: [] - }; - - search.middleware(req, res, function(){ - t.deepEquals(called_sanitizers, expected_sanitizers); - t.end(); - }); - - }); - - test('non-empty res.data should not call the _text_autocomplete sanitizer', function(t) { - var called_sanitizers = []; - - // rather than re-verify the functionality of all the sanitizers, this test just verifies that they - // were all called correctly - var search = proxyquire('../../../sanitizer/search_fallback', { - '../sanitizer/_text_autocomplete': function() { - throw new Error('_text_autocomplete sanitizer should not have been called'); - } - }); - - var expected_sanitizers = []; - - var req = {}; - var res = { - data: [{}] - }; - - search.middleware(req, res, function(){ - t.deepEquals(called_sanitizers, expected_sanitizers); - t.end(); - }); - - }); - - test('req.clean.text should be logged when isDNT=false', (t) => { - const infoLog = []; - - const search = proxyquire('../../../sanitizer/search_fallback', { - 'pelias-logger': { - get: () => { - return { - info: (msg) => { - infoLog.push(msg); - } - }; - } - }, - '../helper/logging': { - isDNT: () => { return false; } - } - }); - - const req = { - path: '/v1/search', - clean: { - text: 'this is the query text' - } - }; - - search.middleware(req, undefined, () => { - t.deepEquals(infoLog, [`fallback queryText: ${req.clean.text}`]); - t.end(); - }); - - }); - - test('req.clean.text should not be logged when isDNT=true', (t) => { - const infoLog = []; - - const search = proxyquire('../../../sanitizer/search_fallback', { - 'pelias-logger': { - get: () => { - return { - info: (msg) => { - infoLog.push(msg); - } - }; - } - }, - '../helper/logging': { - isDNT: () => { return true; } - } - }); - - const req = { - path: '/v1/search', - clean: { - text: 'this is the query text' - } - }; - - search.middleware(req, undefined, () => { - t.deepEquals(infoLog, ['fallback queryText: [text removed]']); - t.end(); - }); - - }); - -}; - -module.exports.all = function (tape, common) { - - function test(name, testFunction) { - return tape('SANITIZE /search_fallback ' + name, testFunction); - } - - for( var testCase in module.exports.tests ){ - module.exports.tests[testCase](test, common); - } -}; From d55418177697cb1ec2349a4daace267679393741 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 16:14:24 -0400 Subject: [PATCH 27/52] use pelias-text-analyzer specific hash --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 251e8a37..b10d79fa 100644 --- a/package.json +++ b/package.json @@ -62,7 +62,7 @@ "pelias-model": "5.0.0", "pelias-query": "pelias/query#9673b57", "pelias-sorting": "1.0.1", - "pelias-text-analyzer": "1.8.3", + "pelias-text-analyzer": "pelias/text-analyzer#91196dd", "predicates": "^1.0.1", "retry": "^0.10.1", "stats-lite": "^2.0.4", From ce4c23f4a5489a31646ce01585dfd4ce56e9dc3b Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 16:15:37 -0400 Subject: [PATCH 28/52] add predicate that looks for a parameter in req.clean --- .../predicates/has_request_parameter.js | 3 + .../predicates/has_request_parameter.js | 63 +++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 controller/predicates/has_request_parameter.js create mode 100644 test/unit/controller/predicates/has_request_parameter.js diff --git a/controller/predicates/has_request_parameter.js b/controller/predicates/has_request_parameter.js new file mode 100644 index 00000000..707f0c1d --- /dev/null +++ b/controller/predicates/has_request_parameter.js @@ -0,0 +1,3 @@ +const _ = require('lodash'); + +module.exports = (parameter) => (req, res) => _.has(req, ['clean', parameter]); diff --git a/test/unit/controller/predicates/has_request_parameter.js b/test/unit/controller/predicates/has_request_parameter.js new file mode 100644 index 00000000..099ba39d --- /dev/null +++ b/test/unit/controller/predicates/has_request_parameter.js @@ -0,0 +1,63 @@ +'use strict'; + +const _ = require('lodash'); +const has_request_parameter = require('../../../../controller/predicates/has_request_parameter'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', t => { + t.equal(typeof has_request_parameter, 'function', 'has_request_parameter is a function'); + t.end(); + }); +}; + +module.exports.tests.true_conditions = (test, common) => { + test('request with specified parameter should return true', t => { + [[], {}, 'string value', 17].forEach(val => { + const req = { + clean: { + 'parameter name': val + } + }; + + t.ok(has_request_parameter('parameter name')(req)); + + }); + + t.end(); + + }); + +}; + +module.exports.tests.false_conditions = (test, common) => { + test('request with undefined clean should return false', t => { + const req = {}; + + t.notOk(has_request_parameter('parameter name')(req)); + t.end(); + + }); + + test('request.clean without specified parameter should return false', t => { + const req = { + clean: {} + }; + + t.notOk(has_request_parameter('parameter name')(req)); + t.end(); + + }); + +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`GET /has_request_parameter ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; From 59fb445e684ec80b03efd9d3a8cbaea1267e039f Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 16:16:33 -0400 Subject: [PATCH 29/52] add predicate that detects non-admin layers parameter values --- .../predicates/is_only_non_admin_layers.js | 5 + .../predicates/is_only_non_admin_layers.js | 111 ++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 controller/predicates/is_only_non_admin_layers.js create mode 100644 test/unit/controller/predicates/is_only_non_admin_layers.js diff --git a/controller/predicates/is_only_non_admin_layers.js b/controller/predicates/is_only_non_admin_layers.js new file mode 100644 index 00000000..03d15fd6 --- /dev/null +++ b/controller/predicates/is_only_non_admin_layers.js @@ -0,0 +1,5 @@ +const _ = require('lodash'); + +// return IFF req.clean.layers is empty OR there are non-venue/address/street layers +module.exports = (req, res) => !_.isEmpty(_.get(req, 'clean.layers', [])) && + _.isEmpty(_.difference(req.clean.layers, ['venue', 'address', 'street'])); diff --git a/test/unit/controller/predicates/is_only_non_admin_layers.js b/test/unit/controller/predicates/is_only_non_admin_layers.js new file mode 100644 index 00000000..51e1b796 --- /dev/null +++ b/test/unit/controller/predicates/is_only_non_admin_layers.js @@ -0,0 +1,111 @@ +'use strict'; + +const _ = require('lodash'); +const is_only_non_admin_layers = require('../../../../controller/predicates/is_only_non_admin_layers'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', t => { + t.equal(typeof is_only_non_admin_layers, 'function', 'is_only_non_admin_layers is a function'); + t.end(); + }); +}; + +module.exports.tests.true_conditions = (test, common) => { + test('request with specified parameter should return true', t => { + [ + ['venue', 'address', 'street'], + ['venue', 'address'], + ['venue', 'street'], + ['address', 'street'], + ['venue'], + ['address'], + ['street'] + ].forEach(layers => { + const req = { + clean: { + layers: layers + } + }; + + t.ok(is_only_non_admin_layers(req)); + + }); + + t.end(); + + }); + +}; + +module.exports.tests.false_conditions = (test, common) => { + test('request with undefined clean should return false', t => { + const req = {}; + + t.notOk(is_only_non_admin_layers(req)); + t.end(); + + }); + + test('request.clean without layers parameter should return false', t => { + const req = { + clean: {} + }; + + t.notOk(is_only_non_admin_layers(req)); + t.end(); + + }); + + test('request with empty layers should return false', t => { + const req = { + clean: { + layers: [] + } + }; + + t.notOk(is_only_non_admin_layers(req)); + t.end(); + + }); + + test('request.clean.layers without venue, address, or street should return false', t => { + const req = { + clean: { + layers: ['locality'] + } + }; + + t.notOk(is_only_non_admin_layers(req)); + t.end(); + + }); + + test('request.clean.layers with other layers besides venue, address, or street should return false', t => { + ['venue', 'address', 'street'].forEach(non_admin_layer => { + const req = { + clean: { + layers: ['locality', non_admin_layer] + } + }; + + t.notOk(is_only_non_admin_layers(req)); + + }); + + t.end(); + + }); + +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`GET /is_only_non_admin_layers ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; From 44896d7569a97313ca2ddab79d9bde3240d0c893 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 16:17:47 -0400 Subject: [PATCH 30/52] add predicate that checks for sources=['whosonfirst'] --- .../is_request_sources_only_whosonfirst.js | 3 + .../is_request_sources_only_whosonfirst.js | 115 ++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 controller/predicates/is_request_sources_only_whosonfirst.js create mode 100644 test/unit/controller/predicates/is_request_sources_only_whosonfirst.js diff --git a/controller/predicates/is_request_sources_only_whosonfirst.js b/controller/predicates/is_request_sources_only_whosonfirst.js new file mode 100644 index 00000000..855c1197 --- /dev/null +++ b/controller/predicates/is_request_sources_only_whosonfirst.js @@ -0,0 +1,3 @@ +const _ = require('lodash'); + +module.exports = (req, res) => _.isEqual(_.get(req, 'clean.sources', []), ['whosonfirst']); diff --git a/test/unit/controller/predicates/is_request_sources_only_whosonfirst.js b/test/unit/controller/predicates/is_request_sources_only_whosonfirst.js new file mode 100644 index 00000000..bbf1aa44 --- /dev/null +++ b/test/unit/controller/predicates/is_request_sources_only_whosonfirst.js @@ -0,0 +1,115 @@ +'use strict'; + +const _ = require('lodash'); +const is_request_sources_only_whosonfirst = require('../../../../controller/predicates/is_request_sources_only_whosonfirst'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', (t) => { + t.ok(_.isFunction(is_request_sources_only_whosonfirst), 'is_request_sources_only_whosonfirst is a function'); + t.end(); + }); +}; + +module.exports.tests.true_conditions = (test, common) => { + test('sources only \'whosonfirst\' should return true', (t) => { + const req = { + clean: { + sources: [ + 'whosonfirst' + ] + } + }; + + t.ok(is_request_sources_only_whosonfirst(req)); + t.end(); + + }); + +}; + +module.exports.tests.false_conditions = (test, common) => { + test('undefined req should return false', (t) => { + const req = { + clean: { + sources: [ + 'not whosonfirst' + ] + } + }; + + t.notOk(is_request_sources_only_whosonfirst(req)); + t.end(); + + }); + + test('undefined req.clean should return false', (t) => { + const req = {}; + + t.notOk(is_request_sources_only_whosonfirst(req)); + t.end(); + + }); + + test('undefined req.clean.sources should return false', (t) => { + const req = { + clean: {} + }; + + t.notOk(is_request_sources_only_whosonfirst(req)); + t.end(); + + }); + + test('empty req.clean.sources should return false', (t) => { + const req = { + clean: { + sources: [] + } + }; + + t.notOk(is_request_sources_only_whosonfirst(req)); + t.end(); + + }); + + test('sources not \'whosonfirst\' should return false', (t) => { + const req = { + clean: { + sources: [ + 'not whosonfirst' + ] + } + }; + + t.notOk(is_request_sources_only_whosonfirst(req)); + t.end(); + + }); + + test('sources other than \'whosonfirst\' should return false', (t) => { + const req = { + clean: { + sources: [ + 'whosonfirst', 'not whosonfirst' + ] + } + }; + + t.notOk(is_request_sources_only_whosonfirst(req)); + t.end(); + + }); + +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`GET /is_request_sources_only_whosonfirst ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; From b9e86dbaaab8996adb6a4aa1da5d440124848802 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 16:21:30 -0400 Subject: [PATCH 31/52] add new predicates to test driver --- test/unit/run.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/unit/run.js b/test/unit/run.js index eb6f4de9..603d07d6 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -20,10 +20,13 @@ var tests = [ require('./controller/predicates/has_any_parsed_text_property'), require('./controller/predicates/has_response_data'), require('./controller/predicates/has_results_at_layers'), + require('./controller/predicates/has_request_parameter'), require('./controller/predicates/has_request_errors'), require('./controller/predicates/is_addressit_parse'), require('./controller/predicates/is_admin_only_analysis'), require('./controller/predicates/is_coarse_reverse'), + require('./controller/predicates/is_only_non_admin_layers'), + require('./controller/predicates/is_request_sources_only_whosonfirst'), require('./controller/predicates/is_service_enabled'), require('./controller/predicates/is_single_field_analysis'), require('./helper/diffPlaces'), From 267d0d31e109e24802653ecdc460806f05affc22 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 16:42:57 -0400 Subject: [PATCH 32/52] cleaned up predicates to be more readable, added new ones --- routes/v1.js | 53 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/routes/v1.js b/routes/v1.js index a07b966a..5c8c82be 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -76,6 +76,10 @@ const isAdminOnlyAnalysis = require('../controller/predicates/is_admin_only_anal const hasResultsAtLayers = require('../controller/predicates/has_results_at_layers'); const isSingleFieldAnalysis = require('../controller/predicates/is_single_field_analysis'); const isAddressItParse = require('../controller/predicates/is_addressit_parse'); +const hasRequestCategories = require('../controller/predicates/has_request_parameter')('categories'); +const isOnlyNonAdminLayers = require('../controller/predicates/is_only_non_admin_layers'); +// this can probably be more generalized +const isRequestSourcesOnlyWhosOnFirst = require('../controller/predicates/is_request_sources_only_whosonfirst'); // shorthand for standard early-exit conditions const hasResponseDataOrRequestErrors = any(hasResponseData, hasRequestErrors); @@ -117,7 +121,20 @@ function addRoutes(app, peliasConfig) { const placeholderGeodisambiguationShouldExecute = all( not(hasResponseDataOrRequestErrors), isPlaceholderServiceEnabled, - isAdminOnlyAnalysis + // check request.clean for several conditions first + not( + any( + // layers only contains venue, address, or street + isOnlyNonAdminLayers, + // don't geodisambiguate if categories were requested + hasRequestCategories + ) + ), + any( + // only geodisambiguate if libpostal returned only admin areas or libpostal was skipped + isAdminOnlyAnalysis, + isRequestSourcesOnlyWhosOnFirst + ) ); // execute placeholder if libpostal identified address parts but ids need to @@ -125,14 +142,23 @@ function addRoutes(app, peliasConfig) { const placeholderIdsLookupShouldExecute = all( not(hasResponseDataOrRequestErrors), isPlaceholderServiceEnabled, - // don't run placeholder if there's a number but no street - not(hasNumberButNotStreet), - // don't run placeholder if there's a query or category + // check clean.parsed_text for several conditions that must all be true + all( + // run placeholder if clean.parsed_text has 'street' + hasAnyParsedTextProperty('street'), + // don't run placeholder if there's a query or category + not(hasAnyParsedTextProperty('query', 'category')), + // run placeholder if there are any adminareas identified + hasAnyParsedTextProperty('neighbourhood', 'borough', 'city', 'county', 'state', 'country') + ) + ); + + const searchWithIdsShouldExecute = all( + not(hasRequestErrors), + // don't search-with-ids if there's a query or category not(hasAnyParsedTextProperty('query', 'category')), - // run placeholder if there are any adminareas identified - // hasAnyParsedTextProperty('neighbourhood', 'borough', 'city', 'county', 'state', 'country'), - // don't run placeholder if only postalcode was identified - not(isSingleFieldAnalysis('postalcode')) + // there must be a street + hasAnyParsedTextProperty('street') ); // placeholder should have executed, useful for determining whether to actually @@ -143,14 +169,6 @@ function addRoutes(app, peliasConfig) { placeholderIdsLookupShouldExecute ); - const searchWithIdsShouldExecute = all( - not(hasRequestErrors), - // don't search-with-ids if there's a query or category - not(hasAnyParsedTextProperty('query', 'category')), - // there must be a street - hasAnyParsedTextProperty('street') - ); - // don't execute the cascading fallback query IF placeholder should have executed // that way, if placeholder didn't return anything, don't try to find more things the old way const fallbackQueryShouldExecute = all( @@ -159,15 +177,16 @@ function addRoutes(app, peliasConfig) { not(placeholderShouldHaveExecuted) ); + // defer to addressit for analysis IF there's no response AND placeholder should not have executed const shouldDeferToAddressIt = all( not(hasRequestErrors), not(hasResponseData), not(placeholderShouldHaveExecuted) ); + // call very old prod query if addressit was the parser const oldProdQueryShouldExecute = all( not(hasRequestErrors), - not(hasResponseData), isAddressItParse ); From 5b9275d7fd75d317c2955b2134f0dfd40a9ced28 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 18:05:03 -0400 Subject: [PATCH 33/52] added libpostal controller --- controller/libpostal.js | 30 ++++ test/unit/controller/libpostal.js | 290 ++++++++++++++++++++++++++++++ 2 files changed, 320 insertions(+) create mode 100644 controller/libpostal.js create mode 100644 test/unit/controller/libpostal.js diff --git a/controller/libpostal.js b/controller/libpostal.js new file mode 100644 index 00000000..5e567665 --- /dev/null +++ b/controller/libpostal.js @@ -0,0 +1,30 @@ +const text_analyzer = require('pelias-text-analyzer'); +const _ = require('lodash'); +const iso3166 = require('iso3166-1'); + +function setup(should_execute) { + function controller( req, res, next ){ + // bail early if req/res don't pass conditions for execution + if (!should_execute(req, res)) { + return next(); + } + + // parse text with query parser + const parsed_text = text_analyzer.parse(req.clean.text); + + if (!_.isUndefined(parsed_text)) { + if (_.has(parsed_text, 'country') && iso3166.is2(_.toUpper(parsed_text.country))) { + parsed_text.country = iso3166.to3(_.toUpper(parsed_text.country)); + } + + req.clean.parsed_text = parsed_text; + } + + return next(); + + } + + return controller; +} + +module.exports = setup; diff --git a/test/unit/controller/libpostal.js b/test/unit/controller/libpostal.js new file mode 100644 index 00000000..e0d012a9 --- /dev/null +++ b/test/unit/controller/libpostal.js @@ -0,0 +1,290 @@ +'use strict'; + +const proxyquire = require('proxyquire').noCallThru(); +const _ = require('lodash'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', t => { + const controller = proxyquire('../../../controller/libpostal', { + 'pelias-text-analyzer': { + parse: () => undefined + } + }); + + t.equal(typeof controller, 'function', 'libpostal is a function'); + t.equal(typeof controller(), 'function', 'libpostal returns a controller'); + t.end(); + + }); + +}; + +module.exports.tests.should_execute = (test, common) => { + test('should_execute returning false should not call text-analyzer', t => { + const should_execute = (req, res) => { + // req and res should be passed to should_execute + t.deepEquals(req, { + clean: { + text: 'original query' + } + }); + t.deepEquals(res, { b: 2 }); + return false; + }; + + const controller = proxyquire('../../../controller/libpostal', { + 'pelias-text-analyzer': { + parse: () => { + t.fail('parse should not have been called'); + } + } + })(should_execute); + + const req = { + clean: { + text: 'original query' + } + }; + const res = { b: 2 }; + + controller(req, res, () => { + t.deepEquals(req, { + clean: { + text: 'original query' + } + }, 'req should not have been modified'); + t.deepEquals(res, { b: 2 }); + t.end(); + }); + + }); + + test('should_execute returning false should not call text-analyzer', t => { + t.plan(5); + + const should_execute = (req, res) => { + // req and res should be passed to should_execute + t.deepEquals(req, { + clean: { + text: 'original query' + } + }); + t.deepEquals(res, { b: 2 }); + return true; + }; + + const controller = proxyquire('../../../controller/libpostal', { + 'pelias-text-analyzer': { + parse: (query) => { + t.equals(query, 'original query'); + return undefined; + } + } + })(should_execute); + + const req = { + clean: { + text: 'original query' + } + }; + const res = { b: 2 }; + + controller(req, res, () => { + t.deepEquals(req, { + clean: { + text: 'original query' + } + }, 'req should not have been modified'); + t.deepEquals(res, { b: 2 }); + t.end(); + }); + + }); + +}; + +module.exports.tests.parse_is_called = (test, common) => { + test('parse returning undefined should not overwrite clean.parsed_text', t => { + const controller = proxyquire('../../../controller/libpostal', { + 'pelias-text-analyzer': { + parse: () => undefined + } + })(() => true); + + const req = { + clean: { + parsed_text: 'original parsed_text' + } + }; + const res = 'this is the response'; + + controller(req, res, () => { + t.deepEquals(req, { + clean: { + parsed_text: 'original parsed_text' + } + }); + t.deepEquals(res, 'this is the response'); + t.end(); + }); + + }); + + test('parse returning something should overwrite clean.parsed_text', t => { + const controller = proxyquire('../../../controller/libpostal', { + 'pelias-text-analyzer': { + parse: () => 'replacement parsed_text' + } + })(() => true); + + const req = { + clean: { + parsed_text: 'original parsed_text' + } + }; + const res = 'this is the response'; + + controller(req, res, () => { + t.deepEquals(req, { + clean: { + parsed_text: 'replacement parsed_text' + } + }); + t.deepEquals(res, 'this is the response'); + t.end(); + }); + + }); + +}; + +module.exports.tests.iso2_conversion = (test, common) => { + test('no country in parse response should not leave country unset', t => { + const controller = proxyquire('../../../controller/libpostal', { + 'pelias-text-analyzer': { + parse: () => ({ + locality: 'this is the locality' + }) + }, + 'iso3166-1': { + is2: () => t.fail('should not have been called'), + to3: () => t.fail('should not have been called') + } + })(() => true); + + const req = { + clean: { + parsed_text: 'original parsed_text' + } + }; + const res = 'this is the response'; + + controller(req, res, () => { + t.deepEquals(req, { + clean: { + parsed_text: { + locality: 'this is the locality' + } + } + }); + t.deepEquals(res, 'this is the response'); + t.end(); + }); + + }); + + test('unknown country should not be converted', t => { + t.plan(3); + + const controller = proxyquire('../../../controller/libpostal', { + 'pelias-text-analyzer': { + parse: () => ({ + country: 'unknown country code' + }) + }, + 'iso3166-1': { + is2: country => { + t.equals(country, 'UNKNOWN COUNTRY CODE'); + return false; + }, + to3: () => t.fail('should not have been called') + } + })(() => true); + + const req = { + clean: { + parsed_text: 'original parsed_text' + } + }; + const res = 'this is the response'; + + controller(req, res, () => { + t.deepEquals(req, { + clean: { + parsed_text: { + country: 'unknown country code' + } + } + }); + t.deepEquals(res, 'this is the response'); + t.end(); + }); + + }); + + test('ISO2 country should be converted to ISO3', t => { + t.plan(4); + + const controller = proxyquire('../../../controller/libpostal', { + 'pelias-text-analyzer': { + parse: () => ({ + country: 'ISO2 COUNTRY CODE' + }) + }, + 'iso3166-1': { + is2: country => { + t.equals(country, 'ISO2 COUNTRY CODE'); + return true; + }, + to3: country => { + t.equals(country, 'ISO2 COUNTRY CODE'); + return 'ISO3 COUNTRY CODE'; + } + } + })(() => true); + + const req = { + clean: { + parsed_text: 'original parsed_text' + } + }; + const res = 'this is the response'; + + controller(req, res, () => { + t.deepEquals(req, { + clean: { + parsed_text: { + country: 'ISO3 COUNTRY CODE' + } + } + }); + t.deepEquals(res, 'this is the response'); + t.end(); + }); + + }); + +}; + +module.exports.all = (tape, common) => { + + function test(name, testFunction) { + return tape(`GET /libpostal ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; From 5dc9737caa2c01114e0fd2331fd7d691f849be78 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 18:05:47 -0400 Subject: [PATCH 34/52] removed text-analyzer call from _text sanitizer --- sanitizer/_text.js | 11 -- test/unit/sanitizer/_text.js | 201 ++++------------------------------- 2 files changed, 22 insertions(+), 190 deletions(-) diff --git a/sanitizer/_text.js b/sanitizer/_text.js index d7d1227b..82662473 100644 --- a/sanitizer/_text.js +++ b/sanitizer/_text.js @@ -1,5 +1,4 @@ const check = require('check-types'); -const text_analyzer = require('pelias-text-analyzer'); const _ = require('lodash'); // validate texts, convert types and apply defaults @@ -16,16 +15,6 @@ function sanitize( raw, clean ){ } else { clean.text = raw.text; - // only call libpostal if there are other sources besides whosonfirst - // since placeholder will take care of it later - if (!_.isEqual(clean.sources, ['whosonfirst'])) { - // parse text with query parser - const parsed_text = text_analyzer.parse(clean.text); - if (check.assigned(parsed_text)) { - clean.parsed_text = parsed_text; - } - } - } return messages; diff --git a/test/unit/sanitizer/_text.js b/test/unit/sanitizer/_text.js index a8a46137..78b9a676 100644 --- a/test/unit/sanitizer/_text.js +++ b/test/unit/sanitizer/_text.js @@ -1,213 +1,56 @@ -var type_mapping = require('../../../helper/type_mapping'); -var proxyquire = require('proxyquire').noCallThru(); +const sanitizer = require('../../../sanitizer/_text'); module.exports.tests = {}; module.exports.tests.text_parser = function(test, common) { - test('non-empty raw.text should call analyzer and set clean.text and clean.parsed_text', function(t) { - var mock_analyzer_response = { - key1: 'value 1', - key2: 'value 2' - }; - - var sanitizer = proxyquire('../../../sanitizer/_text', { - 'pelias-text-analyzer': { parse: function(query) { - return mock_analyzer_response; - } - }}); - - var raw = { - text: 'raw input' - }; - var clean = { - }; - - var expected_clean = { - text: raw.text, - parsed_text: mock_analyzer_response - }; - - var messages = sanitizer(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('empty raw.text should add error message', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_text', { - 'pelias-text-analyzer': { parse: function(query) { - throw new Error('analyzer should not have been called'); - } - }}); - - var raw = { - text: '' - }; - var clean = { - }; - - var expected_clean = { - }; - - var messages = sanitizer(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0'], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('undefined raw.text should add error message', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_text', { - 'pelias-text-analyzer': { parse: function(query) { - throw new Error('analyzer should not have been called'); - } - }}); - - var raw = { - text: undefined - }; - var clean = { - }; - - var expected_clean = { - }; - - var messages = sanitizer(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0'], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('text_analyzer.parse returning undefined should not overwrite clean.parsed_text', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_text', { - 'pelias-text-analyzer': { parse: function(query) { - return undefined; - } - }}); - - var raw = { - text: 'raw input' - }; - var clean = { - parsed_text: 'original clean.parsed_text' - }; - - var expected_clean = { - text: raw.text, - parsed_text: 'original clean.parsed_text' - }; - - var messages = sanitizer(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('text_analyzer.parse returning null should not overwrite clean.parsed_text', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_text', { - 'pelias-text-analyzer': { parse: function(query) { - return null; - } - }}); - - var raw = { - text: 'raw input' - }; - var clean = { - parsed_text: 'original clean.parsed_text' - }; - - var expected_clean = { - text: raw.text, - parsed_text: 'original clean.parsed_text' - }; - - var messages = sanitizer(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('sources=whosonfirst should not call text_analyzer and set clean.text from raw.text', (t) => { - const sanitizer = proxyquire('../../../sanitizer/_text', { - 'pelias-text-analyzer': { parse: query => t.fail('should not have been called') } - }); - + test('non-empty raw.text should call analyzer and set clean.text and not clean.parsed_text', t => { const raw = { - text: 'raw clean.text' + text: 'raw input' }; const clean = { - sources: ['whosonfirst'], text: 'original clean.text' }; const expected_clean = { - sources: ['whosonfirst'], - text: 'raw clean.text' + text: raw.text }; const messages = sanitizer(raw, clean); t.deepEquals(clean, expected_clean); - t.deepEquals(messages, { errors: [], warnings: [] }); + t.deepEquals(messages, { warnings: [], errors: [] }, 'no errors/warnings'); t.end(); }); - test('sources with whosonfirst + others should call analyzer', (t) => { - const sanitizer = proxyquire('../../../sanitizer/_text', { - 'pelias-text-analyzer': { parse: function(query) { - return { - key1: 'value 1', - key2: 'value 2' - }; - } - }}); + test('undefined/empty raw.text should add error message', t => { + [undefined, ''].forEach(val => { + const raw = { + text: val + }; + const clean = { + }; - const raw = { - text: 'raw text' - }; - const clean = { - sources: ['whosonfirst', 'another source'], - text: 'clean text' - }; + const expected_clean = { + }; - const expected_clean = { - sources: ['whosonfirst', 'another source'], - text: 'raw text', - parsed_text: { - key1: 'value 1', - key2: 'value 2' - } - }; + const messages = sanitizer(raw, clean); - const messages = sanitizer(raw, clean); + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0'], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + + }); - t.deepEquals(clean, expected_clean); - t.deepEquals(messages, { errors: [], warnings: [] }); t.end(); }); }; -module.exports.all = function (tape, common) { +module.exports.all = (tape, common) => { function test(name, testFunction) { - return tape('sanitizeR _text: ' + name, testFunction); + return tape(`sanitizer _text: ${name}`, testFunction); } for( var testCase in module.exports.tests ){ From 85b831b1610ec956ec9f080ea69c78ce8ae06b72 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 18:06:38 -0400 Subject: [PATCH 35/52] removed iso2_to_iso3 and city_name_standardizer from wrapper --- test/unit/sanitizer/search.js | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/test/unit/sanitizer/search.js b/test/unit/sanitizer/search.js index bbe85713..6c62caf1 100644 --- a/test/unit/sanitizer/search.js +++ b/test/unit/sanitizer/search.js @@ -22,14 +22,6 @@ module.exports.tests.sanitize = (test, common) => { called_sanitizers.push('_text'); return { errors: [], warnings: [] }; }, - '../sanitizer/_iso2_to_iso3': () => { - called_sanitizers.push('_iso2_to_iso3'); - return { errors: [], warnings: [] }; - }, - '../sanitizer/_city_name_standardizer': () => { - called_sanitizers.push('_city_name_standardizer'); - return { errors: [], warnings: [] }; - }, '../sanitizer/_size': function() { if (_.isEmpty(arguments)) { return () => { @@ -114,8 +106,6 @@ module.exports.tests.sanitize = (test, common) => { '_boundary_country', '_categories', '_text', - '_iso2_to_iso3', - '_city_name_standardizer', '_geonames_warnings' ]; From a39d760bb19d9cbc4e9cf0ed9e84644cb272ab88 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 18:07:33 -0400 Subject: [PATCH 36/52] added libpostal controller test --- test/unit/run.js | 1 + 1 file changed, 1 insertion(+) diff --git a/test/unit/run.js b/test/unit/run.js index 603d07d6..b49a3ac7 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -13,6 +13,7 @@ var tests = [ require('./schema'), require('./controller/coarse_reverse'), require('./controller/index'), + require('./controller/libpostal'), require('./controller/place'), require('./controller/placeholder'), require('./controller/search'), From 63c8eef0df34c423f8712d4fd385a952d4f51e91 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 18:08:19 -0400 Subject: [PATCH 37/52] added libpostal as controller into v1 /search flow --- routes/v1.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/routes/v1.js b/routes/v1.js index 5c8c82be..6ad31202 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -28,6 +28,7 @@ var middleware = { var controllers = { coarse_reverse: require('../controller/coarse_reverse'), mdToHTML: require('../controller/markdownToHtml'), + libpostal: require('../controller/libpostal'), place: require('../controller/place'), placeholder: require('../controller/placeholder'), search: require('../controller/search'), @@ -116,6 +117,11 @@ function addRoutes(app, peliasConfig) { isPipServiceEnabled, not(hasRequestErrors), not(hasResponseData) ); + const libpostalShouldExecute = all( + not(hasRequestErrors), + not(isRequestSourcesOnlyWhosOnFirst) + ); + // execute placeholder if libpostal only parsed as admin-only and needs to // be geodisambiguated const placeholderGeodisambiguationShouldExecute = all( @@ -220,6 +226,7 @@ function addRoutes(app, peliasConfig) { sanitizers.search.middleware(peliasConfig.api), middleware.requestLanguage, middleware.calcSize(), + controllers.libpostal(libpostalShouldExecute), controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute), controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute), controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute), From 2b70b41a017a7f79d32b5e507114b30cd9cec789 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 13 Jul 2017 22:12:58 -0400 Subject: [PATCH 38/52] move _text sanitizer --- test/unit/sanitizer/search.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/sanitizer/search.js b/test/unit/sanitizer/search.js index 6c62caf1..e887280a 100644 --- a/test/unit/sanitizer/search.js +++ b/test/unit/sanitizer/search.js @@ -96,6 +96,7 @@ module.exports.tests.sanitize = (test, common) => { const expected_sanitizers = [ '_single_scalar_parameters', '_deprecate_quattroshapes', + '_text', '_size', '_targets/layers', '_targets/sources', @@ -105,7 +106,6 @@ module.exports.tests.sanitize = (test, common) => { '_geo_search', '_boundary_country', '_categories', - '_text', '_geonames_warnings' ]; From 257ee8161f83fa6a216a80bf56fbefe130be3433 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 17 Jul 2017 14:17:18 -0400 Subject: [PATCH 39/52] removed unused predicate --- .../predicates/is_single_field_analysis.js | 5 - routes/v1.js | 1 - .../predicates/is_single_field_analysis.js | 94 ------------------- test/unit/run.js | 1 - 4 files changed, 101 deletions(-) delete mode 100644 controller/predicates/is_single_field_analysis.js delete mode 100644 test/unit/controller/predicates/is_single_field_analysis.js diff --git a/controller/predicates/is_single_field_analysis.js b/controller/predicates/is_single_field_analysis.js deleted file mode 100644 index 7e5f3c66..00000000 --- a/controller/predicates/is_single_field_analysis.js +++ /dev/null @@ -1,5 +0,0 @@ -const _ = require('lodash'); - -// predicate that determines if parsed_text contains only the supplied property -module.exports = property => (req, res) => - _.isEqual(_.keys(_.get(req, ['clean', 'parsed_text'])), [property]); diff --git a/routes/v1.js b/routes/v1.js index 6ad31202..2cc2eb01 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -75,7 +75,6 @@ const hasRequestErrors = require('../controller/predicates/has_request_errors'); const isCoarseReverse = require('../controller/predicates/is_coarse_reverse'); const isAdminOnlyAnalysis = require('../controller/predicates/is_admin_only_analysis'); const hasResultsAtLayers = require('../controller/predicates/has_results_at_layers'); -const isSingleFieldAnalysis = require('../controller/predicates/is_single_field_analysis'); const isAddressItParse = require('../controller/predicates/is_addressit_parse'); const hasRequestCategories = require('../controller/predicates/has_request_parameter')('categories'); const isOnlyNonAdminLayers = require('../controller/predicates/is_only_non_admin_layers'); diff --git a/test/unit/controller/predicates/is_single_field_analysis.js b/test/unit/controller/predicates/is_single_field_analysis.js deleted file mode 100644 index 06328fa0..00000000 --- a/test/unit/controller/predicates/is_single_field_analysis.js +++ /dev/null @@ -1,94 +0,0 @@ -'use strict'; - -const _ = require('lodash'); -const is_single_field_analysis = require('../../../../controller/predicates/is_single_field_analysis'); - -module.exports.tests = {}; - -module.exports.tests.interface = (test, common) => { - test('valid interface', (t) => { - t.ok(_.isFunction(is_single_field_analysis), 'is_single_field_analysis is a function'); - t.end(); - }); -}; - -module.exports.tests.true_conditions = (test, common) => { - test('defined request.clean.parsed_text.property should return true', (t) => { - const req = { - clean: { - parsed_text: { - property: 'value' - } - } - }; - - t.ok(is_single_field_analysis('property')(req)); - t.end(); - - }); - -}; - -module.exports.tests.false_conditions = (test, common) => { - test('undefined request should return false', (t) => { - t.notOk(is_single_field_analysis('property')()); - t.end(); - - }); - - test('undefined request.clean should return false', (t) => { - const req = {}; - - t.notOk(is_single_field_analysis('property')(req)); - t.end(); - - }); - - test('undefined request.clean.parsed_text should return false', (t) => { - const req = { - clean: {} - }; - - t.notOk(is_single_field_analysis('property')(req)); - t.end(); - - }); - - test('request.clean.parsed_text with none of the supplied properties should return false', (t) => { - const req = { - clean: { - parsed_text: {} - } - }; - - t.notOk(is_single_field_analysis('property')(req)); - t.end(); - - }); - - test('request.clean.parsed_text with none of the supplied properties should return false', (t) => { - const req = { - clean: { - parsed_text: { - property: 'value', - another_property: 'value' - } - } - }; - - t.notOk(is_single_field_analysis('property')(req)); - t.end(); - - }); - -}; - -module.exports.all = (tape, common) => { - function test(name, testFunction) { - return tape(`GET /is_single_field_analysis ${name}`, testFunction); - } - - for( const testCase in module.exports.tests ){ - module.exports.tests[testCase](test, common); - } -}; diff --git a/test/unit/run.js b/test/unit/run.js index b49a3ac7..d95c0e45 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -29,7 +29,6 @@ var tests = [ require('./controller/predicates/is_only_non_admin_layers'), require('./controller/predicates/is_request_sources_only_whosonfirst'), require('./controller/predicates/is_service_enabled'), - require('./controller/predicates/is_single_field_analysis'), require('./helper/diffPlaces'), require('./helper/geojsonify'), require('./helper/logging'), From e82993cb3fe3e91866a5072028dc41e42e082fe6 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 09:50:16 -0400 Subject: [PATCH 40/52] added comments and syntax updates --- controller/libpostal.js | 1 + controller/placeholder.js | 8 ++++++++ controller/predicates/has_any_parsed_text_property.js | 1 + controller/predicates/has_request_parameter.js | 2 +- controller/predicates/is_addressit_parse.js | 2 +- controller/predicates/is_only_non_admin_layers.js | 6 ++++-- .../predicates/is_request_sources_only_whosonfirst.js | 8 +++++++- 7 files changed, 23 insertions(+), 5 deletions(-) diff --git a/controller/libpostal.js b/controller/libpostal.js index 5e567665..d7a5ab2d 100644 --- a/controller/libpostal.js +++ b/controller/libpostal.js @@ -13,6 +13,7 @@ function setup(should_execute) { const parsed_text = text_analyzer.parse(req.clean.text); if (!_.isUndefined(parsed_text)) { + // if a known ISO2 country was parsed, convert it to ISO3 if (_.has(parsed_text, 'country') && iso3166.is2(_.toUpper(parsed_text.country))) { parsed_text.country = iso3166.to3(_.toUpper(parsed_text.country)); } diff --git a/controller/placeholder.js b/controller/placeholder.js index 8e97765b..efc62a7b 100644 --- a/controller/placeholder.js +++ b/controller/placeholder.js @@ -98,6 +98,7 @@ function getBoundaryCountryFilter(clean, geometric_filters_apply) { // return a function that detects if a result is inside a bbox if a bbox is available // if there's no bbox, return a function that always returns true function getBoundaryRectangleFilter(clean, geometric_filters_apply) { + // check to see if boundary.rect.min_lat/min_lon/max_lat/max_lon are all available if (geometric_filters_apply && ['min_lat', 'min_lon', 'max_lat', 'max_lon'].every((f) => { return _.has(clean, `boundary.rect.${f}`); })) { @@ -107,12 +108,14 @@ function getBoundaryRectangleFilter(clean, geometric_filters_apply) { { latitude: clean['boundary.rect.max_lat'], longitude: clean['boundary.rect.max_lon'] }, { latitude: clean['boundary.rect.min_lat'], longitude: clean['boundary.rect.max_lon'] } ]; + // isPointInside takes polygon last, so create a function that has it pre-populated const isPointInsidePolygon = _.partialRight(geolib.isPointInside, polygon); return _.partial(isInsideGeometry, isPointInsidePolygon); } + // there's no bbox filter, so return a function that always returns true return () => true; } @@ -120,6 +123,7 @@ function getBoundaryRectangleFilter(clean, geometric_filters_apply) { // return a function that detects if a result is inside a circle if a circle is available // if there's no circle, return a function that always returns true function getBoundaryCircleFilter(clean, geometric_filters_apply) { + // check to see if boundary.circle.lat/lon/radius are all available if (geometric_filters_apply && ['lat', 'lon', 'radius'].every((f) => { return _.has(clean, `boundary.circle.${f}`); })) { @@ -128,12 +132,15 @@ function getBoundaryCircleFilter(clean, geometric_filters_apply) { longitude: clean['boundary.circle.lon'] }; const radiusInMeters = clean['boundary.circle.radius'] * 1000; + + // isPointInCircle takes circle/radius last, so create a function that has them pre-populated const isPointInCircle = _.partialRight(geolib.isPointInCircle, center, radiusInMeters); return _.partial(isInsideGeometry, isPointInCircle); } + // there's no circle filter, so return a function that always returns true return () => true; } @@ -143,6 +150,7 @@ function isInsideGeometry(f, result) { return hasLatLon(result) ? f(getLatLon(result)) : false; } +// returns true if hierarchyElement has both name and id function placetypeHasNameAndId(hierarchyElement) { return !_.isEmpty(_.trim(hierarchyElement.name)) && !_.isEmpty(_.trim(hierarchyElement.id)); diff --git a/controller/predicates/has_any_parsed_text_property.js b/controller/predicates/has_any_parsed_text_property.js index 8f1be8b2..66f3a8ac 100644 --- a/controller/predicates/has_any_parsed_text_property.js +++ b/controller/predicates/has_any_parsed_text_property.js @@ -7,6 +7,7 @@ module.exports = function() { // save off requested properties since arguments can't be referenced later const properties = _.values(arguments); + // return true if any of the supplied properties are in clean.parsed_text return (request, response) => !_.isEmpty( _.intersection( properties, diff --git a/controller/predicates/has_request_parameter.js b/controller/predicates/has_request_parameter.js index 707f0c1d..921dfc00 100644 --- a/controller/predicates/has_request_parameter.js +++ b/controller/predicates/has_request_parameter.js @@ -1,3 +1,3 @@ const _ = require('lodash'); -module.exports = (parameter) => (req, res) => _.has(req, ['clean', parameter]); +module.exports = (parameter) => (req, res) => (_.has(req, ['clean', parameter])); diff --git a/controller/predicates/is_addressit_parse.js b/controller/predicates/is_addressit_parse.js index dac3bcfc..d52d62d5 100644 --- a/controller/predicates/is_addressit_parse.js +++ b/controller/predicates/is_addressit_parse.js @@ -1,4 +1,4 @@ const _ = require('lodash'); // returns true iff req.clean.parser is addressit -module.exports = (req, res) => _.get(req, 'clean.parser') === 'addressit'; +module.exports = (req, res) => (_.get(req, 'clean.parser') === 'addressit'); diff --git a/controller/predicates/is_only_non_admin_layers.js b/controller/predicates/is_only_non_admin_layers.js index 03d15fd6..03052571 100644 --- a/controller/predicates/is_only_non_admin_layers.js +++ b/controller/predicates/is_only_non_admin_layers.js @@ -1,5 +1,7 @@ const _ = require('lodash'); // return IFF req.clean.layers is empty OR there are non-venue/address/street layers -module.exports = (req, res) => !_.isEmpty(_.get(req, 'clean.layers', [])) && - _.isEmpty(_.difference(req.clean.layers, ['venue', 'address', 'street'])); +module.exports = (req, res) => ( + !_.isEmpty(_.get(req, 'clean.layers', [])) && + _.isEmpty(_.difference(req.clean.layers, ['venue', 'address', 'street'])) +); diff --git a/controller/predicates/is_request_sources_only_whosonfirst.js b/controller/predicates/is_request_sources_only_whosonfirst.js index 855c1197..58644b27 100644 --- a/controller/predicates/is_request_sources_only_whosonfirst.js +++ b/controller/predicates/is_request_sources_only_whosonfirst.js @@ -1,3 +1,9 @@ const _ = require('lodash'); -module.exports = (req, res) => _.isEqual(_.get(req, 'clean.sources', []), ['whosonfirst']); +// returns true IFF clean.sources only contains 'whosonfirst' +module.exports = (req, res) => ( + _.isEqual( + _.get(req, 'clean.sources', []), + ['whosonfirst'] + ) +); From 9aca556dd3c4dbea3f146532cbf97df8dfa17885 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 15:54:10 -0400 Subject: [PATCH 41/52] switch to simpler check since we know what's coming back --- controller/libpostal.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/libpostal.js b/controller/libpostal.js index d7a5ab2d..6c913cd7 100644 --- a/controller/libpostal.js +++ b/controller/libpostal.js @@ -12,7 +12,7 @@ function setup(should_execute) { // parse text with query parser const parsed_text = text_analyzer.parse(req.clean.text); - if (!_.isUndefined(parsed_text)) { + if (parsed_text !== undefined) { // if a known ISO2 country was parsed, convert it to ISO3 if (_.has(parsed_text, 'country') && iso3166.is2(_.toUpper(parsed_text.country))) { parsed_text.country = iso3166.to3(_.toUpper(parsed_text.country)); From 5dcdf60a95212fb1e814e79c236f1801d48a2bcf Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 15:56:36 -0400 Subject: [PATCH 42/52] added comment --- controller/predicates/has_request_parameter.js | 1 + 1 file changed, 1 insertion(+) diff --git a/controller/predicates/has_request_parameter.js b/controller/predicates/has_request_parameter.js index 921dfc00..4567f4df 100644 --- a/controller/predicates/has_request_parameter.js +++ b/controller/predicates/has_request_parameter.js @@ -1,3 +1,4 @@ const _ = require('lodash'); +// returns true IFF req.clean has a key with the supplied name module.exports = (parameter) => (req, res) => (_.has(req, ['clean', parameter])); From 15b07cfbf867289b89d4f408beccd0fc57b0976d Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 16:06:26 -0400 Subject: [PATCH 43/52] clarified parameter names, added comments --- controller/placeholder.js | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/controller/placeholder.js b/controller/placeholder.js index efc62a7b..48e7dfd5 100644 --- a/controller/placeholder.js +++ b/controller/placeholder.js @@ -85,21 +85,20 @@ function atLeastOneLineageMatchesBoundaryCountry(boundaryCountry, result) { } // return a function that detects if a result has at least one lineage in boundary.country -// if there's no boundary.country, return a function that always returns true -function getBoundaryCountryFilter(clean, geometric_filters_apply) { - if (_.has(clean, 'boundary.country') && geometric_filters_apply) { +function getBoundaryCountryFilter(clean, do_geometric_filters_apply) { + if ( do_geometric_filters_apply && _.has(clean, 'boundary.country') ) { return _.partial(atLeastOneLineageMatchesBoundaryCountry, clean['boundary.country']); } + // there's no boundary.country filter, so return a function that always returns true return () => true; } // return a function that detects if a result is inside a bbox if a bbox is available -// if there's no bbox, return a function that always returns true -function getBoundaryRectangleFilter(clean, geometric_filters_apply) { +function getBoundaryRectangleFilter(clean, do_geometric_filters_apply) { // check to see if boundary.rect.min_lat/min_lon/max_lat/max_lon are all available - if (geometric_filters_apply && ['min_lat', 'min_lon', 'max_lat', 'max_lon'].every((f) => { + if (do_geometric_filters_apply && ['min_lat', 'min_lon', 'max_lat', 'max_lon'].every((f) => { return _.has(clean, `boundary.rect.${f}`); })) { const polygon = [ @@ -121,10 +120,9 @@ function getBoundaryRectangleFilter(clean, geometric_filters_apply) { } // return a function that detects if a result is inside a circle if a circle is available -// if there's no circle, return a function that always returns true -function getBoundaryCircleFilter(clean, geometric_filters_apply) { +function getBoundaryCircleFilter(clean, do_geometric_filters_apply) { // check to see if boundary.circle.lat/lon/radius are all available - if (geometric_filters_apply && ['lat', 'lon', 'radius'].every((f) => { + if (do_geometric_filters_apply && ['lat', 'lon', 'radius'].every((f) => { return _.has(clean, `boundary.circle.${f}`); })) { const center = { @@ -168,7 +166,7 @@ function synthesizeDocs(boundaryCountry, result) { logger.error(`could not parse centroid for id ${result.id}`); } - // lodash conformsTo verifies that an object has a property with a certain format + // _.conformsTo verifies that an object property has a certain format if (_.conformsTo(result.geom, { 'bbox': is4CommaDelimitedNumbers } )) { const parsedBoundingBox = result.geom.bbox.split(',').map(_.toFinite); doc.setBoundingBox({ @@ -221,7 +219,7 @@ function buildESDoc(doc) { return _.extend(esDoc.data, { _id: esDoc._id, _type: esDoc._type }); } -function setup(placeholderService, geometric_filters_apply, should_execute) { +function setup(placeholderService, do_geometric_filters_apply, should_execute) { function controller( req, res, next ){ // bail early if req/res don't pass conditions for execution if (!should_execute(req, res)) { @@ -234,7 +232,7 @@ function setup(placeholderService, geometric_filters_apply, should_execute) { req.errors.push( _.get(err, 'message', err)); } else { - const boundaryCountry = geometric_filters_apply ? _.get(req, ['clean', 'boundary.country']) : undefined; + const boundaryCountry = do_geometric_filters_apply ? _.get(req, ['clean', 'boundary.country']) : undefined; // convert results to ES docs // boundary.country filter must happen after synthesis since multiple @@ -249,13 +247,13 @@ function setup(placeholderService, geometric_filters_apply, should_execute) { // filter out results that don't match on requested layer(s) .filter(getLayersFilter(req.clean)) // filter out results that don't match on any lineage country - .filter(getBoundaryCountryFilter(req.clean, geometric_filters_apply)) + .filter(getBoundaryCountryFilter(req.clean, do_geometric_filters_apply)) // clean up geom.lat/lon for boundary rect/circle checks .map(numberifyGeomLatLon) // filter out results that aren't in the boundary.rect - .filter(getBoundaryRectangleFilter(req.clean, geometric_filters_apply)) + .filter(getBoundaryRectangleFilter(req.clean, do_geometric_filters_apply)) // filter out results that aren't in the boundary.circle - .filter(getBoundaryCircleFilter(req.clean, geometric_filters_apply)) + .filter(getBoundaryCircleFilter(req.clean, do_geometric_filters_apply)) // convert results to ES docs .map(_.partial(synthesizeDocs, boundaryCountry)); From f9996cd6b0176f7177e451ad0ab889f41f3cb0f0 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 16:07:39 -0400 Subject: [PATCH 44/52] added word --- controller/predicates/is_only_non_admin_layers.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/predicates/is_only_non_admin_layers.js b/controller/predicates/is_only_non_admin_layers.js index 03052571..f36ab097 100644 --- a/controller/predicates/is_only_non_admin_layers.js +++ b/controller/predicates/is_only_non_admin_layers.js @@ -1,6 +1,6 @@ const _ = require('lodash'); -// return IFF req.clean.layers is empty OR there are non-venue/address/street layers +// return true IFF req.clean.layers is empty OR there are non-venue/address/street layers module.exports = (req, res) => ( !_.isEmpty(_.get(req, 'clean.layers', [])) && _.isEmpty(_.difference(req.clean.layers, ['venue', 'address', 'street'])) From aac1780db78a78282d98fc2b1af326c725b01273 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 16:08:52 -0400 Subject: [PATCH 45/52] reworded comment --- controller/predicates/is_request_sources_only_whosonfirst.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/predicates/is_request_sources_only_whosonfirst.js b/controller/predicates/is_request_sources_only_whosonfirst.js index 58644b27..c3071ffb 100644 --- a/controller/predicates/is_request_sources_only_whosonfirst.js +++ b/controller/predicates/is_request_sources_only_whosonfirst.js @@ -1,6 +1,6 @@ const _ = require('lodash'); -// returns true IFF clean.sources only contains 'whosonfirst' +// returns true IFF 'whosonfirst' is the only requested source module.exports = (req, res) => ( _.isEqual( _.get(req, 'clean.sources', []), From 7037573d8b5361a35ff19a21bedfcf05e901c6cc Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 16:13:48 -0400 Subject: [PATCH 46/52] removed task comment --- query/address_search_using_ids.js | 1 - 1 file changed, 1 deletion(-) diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index 5a880890..413ab3ce 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -140,7 +140,6 @@ function generateQuery( clean, res ){ } // boundary circle - // @todo: change these to the correct request variable names if( check.number(clean['boundary.circle.lat']) && check.number(clean['boundary.circle.lon']) ){ vs.set({ From 321ba0d45860a7c132b94f969178a18a79a8fb54 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 16:28:42 -0400 Subject: [PATCH 47/52] fixed test --- .../predicates/is_request_sources_only_whosonfirst.js | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/test/unit/controller/predicates/is_request_sources_only_whosonfirst.js b/test/unit/controller/predicates/is_request_sources_only_whosonfirst.js index bbf1aa44..24f861ad 100644 --- a/test/unit/controller/predicates/is_request_sources_only_whosonfirst.js +++ b/test/unit/controller/predicates/is_request_sources_only_whosonfirst.js @@ -31,15 +31,7 @@ module.exports.tests.true_conditions = (test, common) => { module.exports.tests.false_conditions = (test, common) => { test('undefined req should return false', (t) => { - const req = { - clean: { - sources: [ - 'not whosonfirst' - ] - } - }; - - t.notOk(is_request_sources_only_whosonfirst(req)); + t.notOk(is_request_sources_only_whosonfirst(undefined)); t.end(); }); From b0d2e8c74a03d28d0792c136305ee6e1da5e39a5 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 16:47:02 -0400 Subject: [PATCH 48/52] capitalization --- controller/predicates/is_addressit_parse.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/predicates/is_addressit_parse.js b/controller/predicates/is_addressit_parse.js index d52d62d5..48060eed 100644 --- a/controller/predicates/is_addressit_parse.js +++ b/controller/predicates/is_addressit_parse.js @@ -1,4 +1,4 @@ const _ = require('lodash'); -// returns true iff req.clean.parser is addressit +// returns true IFF req.clean.parser is addressit module.exports = (req, res) => (_.get(req, 'clean.parser') === 'addressit'); From f176b8665780f70877a1a1216359b4596ab659c7 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 24 Jul 2017 21:43:13 -0400 Subject: [PATCH 49/52] drastically revised comments --- query/address_search_using_ids.js | 96 ++++++++++++++++++------------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index 413ab3ce..d7e2d1fd 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -20,65 +20,80 @@ addressUsingIdsQuery.filter( peliasQuery.view.boundary_rect ); addressUsingIdsQuery.filter( peliasQuery.view.sources ); // -------------------------------- - -// Red Lion, PA -- parsed as locality/state, localadmin/state, and neighbourhood/state -// Chelsea -- parsed as neighbourhood, localadmin, and locality -// Manhattan -- parsed as borough, locality, and localadmin -// Luxembourg -- parsed as country, locality, and region - -// if any placeholder results are at neighbourhood, borough, locality, or localadmin layers, filter by those ids at those layers -// fallback to county -// if any placeholder results are at county or macrocounty layers, filter by those ids at those layers -// fallback to region -// if any placeholder results are at region or macroregion layers, filter by those ids at those layers -// fallback to dependency/country -// if any placeholder results are at dependency or country layers, filter by those ids at those layers - - -// address in Red Lion, PA -- find results at layer=address -// neighbourhood_id in [85844063, 85844067] -// locality_id in [101717221] -// localadmin_id in [404487867] -// search all of the above - -// address in Chelsea -// neighbourhood_id in [85786511, 85810589, 85769021, 85890029, 85810579, 85810591, 85810575, 85772883, 420514219] -// locality_id in [85950359, 85914491, 101932747, 85951865, 101715289, 85943049, 101733697, 101722101, 101738587] -// localadmin_id in [404476575, 404508239, 404474971, 404527169, 404494675, 404503811, 404519887, 404488679, 404538119] - -// address in Manhattan +// This query is a departure from traditional Pelias queries where textual +// names of admin areas were looked up. This query uses the ids returned by +// placeholder for lookups which dramatically reduces the amount of information +// that ES has to store and allows us to have placeholder handle altnames on +// behalf of Pelias. +// +// For the happy path, an input like '30 West 26th Street, Manhattan' would result +// in: // neighbourhood_id in [] // borough_id in [421205771] // locality_id in [85945171, 85940551, 85972655] // localadmin_id in [404502889, 404499147, 404502891, 85972655] -// search all of the above - -// address in Luxembourg -// country_id in [85633275] -// region_id in [85681727, 85673875] -// locality_id in [101751765] -// search locality first, then region perhaps - - -// if there are locality/localadmin layers, return ['locality', 'localadmin'] -// if there are region/macroregion layers, return ['region', 'macroregion'] +// +// Where the ids are for all the various Manhattans. Each of those could +// conceivably be the Manhattan that the user was referring to so so all must be +// queried for at the same time. +// +// A counter example for this is '1 West Market Street, York, PA' where York, PA +// can be interpreted as a locality OR county. From experience, when there's +// ambiguity between locality and county for an input, the user is, with complete +// metaphysical certitude, referring to the city. If they were referring to the +// county, they would have entered 'York County, PA'. The point is that it's +// insufficient to just query for all ids because, in this case, '1 West Market Street' +// in other cities in York County, PA would be returned and would be both jarring +// to the user and almost certainly leads to incorrect results. For example, +// the following could be returned (all are towns in York County, PA): +// - 1 West Market Street, Dallastown, PA +// - 1 West Market Street, Fawn Grove, PA +// - 1 West Market Street, Shrewsbury, PA +// etc. +// +// To avoid this calamitous response, this query takes the approach of +// "granularity bands". That is, if there are any ids in the first set of any +// of these granularities: +// - neighbourhood +// - borough +// - locality +// - localadmin +// - region +// - macroregion +// - dependency +// - country +// +// then query for all ids in only those layers. Falling back, if there are +// no ids in those layers, query for the county/macrocounty layers. +// +// This methodology ensures that no happened-to-match-on-county results are returned. +// +// The decision was made to include all other layers in one to solve the issue +// where a country and city share a name, such as Mexico, which could be +// interpreted as a country AND city (in Missouri). The data itself will sort +// out which is correct. That is, it's unlikely that "11 Rock Springs Dr" exists +// in Mexico the country due to naming conventions and would be filtered out +// (though it could, but that's good because it's legitimate) const granularity_bands = [ ['neighbourhood', 'borough', 'locality', 'localadmin', 'region', 'macroregion', 'dependency', 'country'], ['county', 'macrocounty'] ]; +// returns IFF there are *any* results in the granularity band function anyResultsAtGranularityBand(results, band) { return results.some(result => _.includes(band, result.layer)); } +// returns the ids of results at the requested layer function getIdsAtLayer(results, layer) { return results.filter(result => result.layer === layer).map(_.property('source_id')); } /** map request variables to query variables for all inputs - provided by this HTTP request. + provided by this HTTP request. This function operates on res.data which is the + Document-ified placeholder repsonse. **/ function generateQuery( clean, res ){ const vs = new peliasQuery.Vars( defaults ); @@ -103,8 +118,11 @@ function generateQuery( clean, res ){ } vs.var( 'input:street', clean.parsed_text.street ); + // find the first granularity band for which there are results const granularity_band = granularity_bands.find(band => anyResultsAtGranularityBand(results, band)); + // if there's a granularity band, accumulate the ids from each layer in the band + // into an object mapping layer->ids of those layers if (granularity_band) { const layers_to_ids = granularity_band.reduce((acc, layer) => { acc[layer] = getIdsAtLayer(res.data, layer); From ab5ff1802d04a1aa103995793612e5d032a9f279 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Thu, 27 Jul 2017 18:27:02 -0400 Subject: [PATCH 50/52] fix pelias-text-analyzer version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index b10d79fa..ebd63d9d 100644 --- a/package.json +++ b/package.json @@ -62,7 +62,7 @@ "pelias-model": "5.0.0", "pelias-query": "pelias/query#9673b57", "pelias-sorting": "1.0.1", - "pelias-text-analyzer": "pelias/text-analyzer#91196dd", + "pelias-text-analyzer": "1.9.0", "predicates": "^1.0.1", "retry": "^0.10.1", "stats-lite": "^2.0.4", From 0ce54c2d1ec152e3516adb37ae0a32324956a6d9 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 31 Jul 2017 11:23:33 -0400 Subject: [PATCH 51/52] bumped pelias-query version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ebd63d9d..0adb9f93 100644 --- a/package.json +++ b/package.json @@ -60,7 +60,7 @@ "pelias-logger": "0.2.0", "pelias-microservice-wrapper": "1.1.3", "pelias-model": "5.0.0", - "pelias-query": "pelias/query#9673b57", + "pelias-query": "9.0.0", "pelias-sorting": "1.0.1", "pelias-text-analyzer": "1.9.0", "predicates": "^1.0.1", From 447d0f74b9921620d38601db766655616cc8d88d Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 31 Jul 2017 11:25:13 -0400 Subject: [PATCH 52/52] don't serialize input:layers when passing to query --- query/address_search_using_ids.js | 2 +- test/unit/query/address_search_using_ids.js | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index d7e2d1fd..a22cb917 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -131,7 +131,7 @@ function generateQuery( clean, res ){ // use an object here instead of calling `set` since that flattens out an // object into key/value pairs and makes identifying layers harder in query module - vs.var('input:layers', JSON.stringify(layers_to_ids)); + vs.var('input:layers', layers_to_ids); } diff --git a/test/unit/query/address_search_using_ids.js b/test/unit/query/address_search_using_ids.js index 665aa681..27ecb85f 100644 --- a/test/unit/query/address_search_using_ids.js +++ b/test/unit/query/address_search_using_ids.js @@ -251,7 +251,7 @@ module.exports.tests.granularity_bands = (test, common) => { const generatedQuery = generateQuery(clean, res); - t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { + t.deepEquals(generatedQuery.body.vs.var('input:layers').$, { neighbourhood: [1, 11], borough: [2, 12], locality: [3, 13], @@ -296,7 +296,7 @@ module.exports.tests.granularity_bands = (test, common) => { const generatedQuery = generateQuery(clean, res); - t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { + t.deepEquals(generatedQuery.body.vs.var('input:layers').$, { neighbourhood: [1], borough: [], locality: [], @@ -354,7 +354,7 @@ module.exports.tests.granularity_bands = (test, common) => { const generatedQuery = generateQuery(clean, res); - t.deepEquals(JSON.parse(generatedQuery.body.vs.var('input:layers')), { + t.deepEquals(generatedQuery.body.vs.var('input:layers').$, { county: [1, 4], macrocounty: [2, 5] });