From 40ddc93bbf2fa0362f452dd1c9411722ba7dd5b9 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 12 Jul 2017 15:54:12 -0400 Subject: [PATCH] added conditional fallback to addressit --- controller/predicates/is_addressit_parse.js | 4 + .../predicates/is_admin_only_analysis.js | 2 +- routes/v1.js | 29 ++- sanitizer/_text_addressit.js | 1 + sanitizer/defer_to_addressit.js | 32 +++ sanitizer/search_fallback.js | 30 --- .../predicates/is_addressit_parse.js | 73 +++++++ .../predicates/is_admin_only_analysis.js | 4 +- test/unit/run.js | 3 +- test/unit/sanitizer/_text_addressit.js | 13 ++ test/unit/sanitizer/defer_to_addressit.js | 132 +++++++++++++ test/unit/sanitizer/search_fallback.js | 185 ------------------ 12 files changed, 282 insertions(+), 226 deletions(-) create mode 100644 controller/predicates/is_addressit_parse.js create mode 100644 sanitizer/defer_to_addressit.js delete mode 100644 sanitizer/search_fallback.js create mode 100644 test/unit/controller/predicates/is_addressit_parse.js create mode 100644 test/unit/sanitizer/defer_to_addressit.js delete mode 100644 test/unit/sanitizer/search_fallback.js diff --git a/controller/predicates/is_addressit_parse.js b/controller/predicates/is_addressit_parse.js new file mode 100644 index 00000000..dac3bcfc --- /dev/null +++ b/controller/predicates/is_addressit_parse.js @@ -0,0 +1,4 @@ +const _ = require('lodash'); + +// returns true iff req.clean.parser is addressit +module.exports = (req, res) => _.get(req, 'clean.parser') === 'addressit'; diff --git a/controller/predicates/is_admin_only_analysis.js b/controller/predicates/is_admin_only_analysis.js index 4b3aef6a..d170f3fc 100644 --- a/controller/predicates/is_admin_only_analysis.js +++ b/controller/predicates/is_admin_only_analysis.js @@ -6,7 +6,7 @@ module.exports = (request, response) => { } // return true only if all non-admin properties of parsed_text are empty - return ['number', 'street', 'query', 'category'].every((prop) => { + return ['number', 'street', 'query', 'category', 'postalcode'].every((prop) => { return _.isEmpty(request.clean.parsed_text[prop]); }); diff --git a/routes/v1.js b/routes/v1.js index a232c92c..a07b966a 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -11,7 +11,7 @@ var sanitizers = { autocomplete: require('../sanitizer/autocomplete'), place: require('../sanitizer/place'), search: require('../sanitizer/search'), - search_fallback: require('../sanitizer/search_fallback'), + defer_to_addressit: require('../sanitizer/defer_to_addressit'), structured_geocoding: require('../sanitizer/structured_geocoding'), reverse: require('../sanitizer/reverse'), nearby: require('../sanitizer/nearby') @@ -75,6 +75,7 @@ const isCoarseReverse = require('../controller/predicates/is_coarse_reverse'); const isAdminOnlyAnalysis = require('../controller/predicates/is_admin_only_analysis'); const hasResultsAtLayers = require('../controller/predicates/has_results_at_layers'); const isSingleFieldAnalysis = require('../controller/predicates/is_single_field_analysis'); +const isAddressItParse = require('../controller/predicates/is_addressit_parse'); // shorthand for standard early-exit conditions const hasResponseDataOrRequestErrors = any(hasResponseData, hasRequestErrors); @@ -116,8 +117,7 @@ function addRoutes(app, peliasConfig) { const placeholderGeodisambiguationShouldExecute = all( not(hasResponseDataOrRequestErrors), isPlaceholderServiceEnabled, - isAdminOnlyAnalysis, - not(isSingleFieldAnalysis('postalcode')) + isAdminOnlyAnalysis ); // execute placeholder if libpostal identified address parts but ids need to @@ -129,6 +129,9 @@ function addRoutes(app, peliasConfig) { not(hasNumberButNotStreet), // don't run placeholder if there's a query or category not(hasAnyParsedTextProperty('query', 'category')), + // run placeholder if there are any adminareas identified + // hasAnyParsedTextProperty('neighbourhood', 'borough', 'city', 'county', 'state', 'country'), + // don't run placeholder if only postalcode was identified not(isSingleFieldAnalysis('postalcode')) ); @@ -156,6 +159,18 @@ function addRoutes(app, peliasConfig) { not(placeholderShouldHaveExecuted) ); + const shouldDeferToAddressIt = all( + not(hasRequestErrors), + not(hasResponseData), + not(placeholderShouldHaveExecuted) + ); + + const oldProdQueryShouldExecute = all( + not(hasRequestErrors), + not(hasResponseData), + isAddressItParse + ); + // execute under the following conditions: // - there are no errors or data // - request is not coarse OR pip service is disabled @@ -189,11 +204,11 @@ function addRoutes(app, peliasConfig) { controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute), controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute), controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute), - // 3rd parameter is which query module to use, use fallback - // first, then use original search strategy if first query didn't return anything + // 3rd parameter is which query module to use, use fallback first, then + // use original search strategy if first query didn't return anything controllers.search(peliasConfig.api, esclient, queries.cascading_fallback, fallbackQueryShouldExecute), - sanitizers.search_fallback.middleware, - controllers.search(peliasConfig.api, esclient, queries.very_old_prod, fallbackQueryShouldExecute), + sanitizers.defer_to_addressit(shouldDeferToAddressIt), + controllers.search(peliasConfig.api, esclient, queries.very_old_prod, oldProdQueryShouldExecute), postProc.trimByGranularity(), postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig.api), diff --git a/sanitizer/_text_addressit.js b/sanitizer/_text_addressit.js index 04fca21a..68a293bf 100644 --- a/sanitizer/_text_addressit.js +++ b/sanitizer/_text_addressit.js @@ -20,6 +20,7 @@ function sanitize( raw, clean ){ // valid text clean.text = raw.text; + clean.parser = 'addressit'; // remove anything that may have been parsed before delete clean.parsed_text; diff --git a/sanitizer/defer_to_addressit.js b/sanitizer/defer_to_addressit.js new file mode 100644 index 00000000..8822f72c --- /dev/null +++ b/sanitizer/defer_to_addressit.js @@ -0,0 +1,32 @@ +const sanitizeAll = require('../sanitizer/sanitizeAll'), + sanitizers = { + text: require('../sanitizer/_text_addressit') + }; + +const sanitize = function(req, cb) { sanitizeAll(req, sanitizers, cb); }; +const logger = require('pelias-logger').get('api'); +const logging = require( '../helper/logging' ); + +// middleware +module.exports = (should_execute) => { + return function(req, res, next) { + // if res.data already has results then don't call the _text_autocomplete sanitizer + // this has been put into place for when the libpostal integration way of querying + // ES doesn't return anything and we want to fallback to the old logic + if (!should_execute(req, res)) { + return next(); + } + + // log the query that caused a fallback since libpostal+new-queries didn't return anything + if (req.path === '/v1/search') { + const queryText = logging.isDNT(req) ? '[text removed]' : req.clean.text; + logger.info(`fallback queryText: ${queryText}`); + } + + sanitize( req, function( err, clean ){ + next(); + }); + + }; + +}; diff --git a/sanitizer/search_fallback.js b/sanitizer/search_fallback.js deleted file mode 100644 index 9bdb0c1e..00000000 --- a/sanitizer/search_fallback.js +++ /dev/null @@ -1,30 +0,0 @@ -var sanitizeAll = require('../sanitizer/sanitizeAll'), - sanitizers = { - text: require('../sanitizer/_text_addressit') - }; - -var sanitize = function(req, cb) { sanitizeAll(req, sanitizers, cb); }; -var logger = require('pelias-logger').get('api'); -var logging = require( '../helper/logging' ); -var _ = require('lodash'); - -// middleware -module.exports.middleware = function( req, res, next ){ - // if res.data already has results then don't call the _text_autocomplete sanitizer - // this has been put into place for when the libpostal integration way of querying - // ES doesn't return anything and we want to fallback to the old logic - if (_.get(res, 'data', []).length > 0) { - return next(); - } - - // log the query that caused a fallback since libpostal+new-queries didn't return anything - if (req.path === '/v1/search') { - const queryText = logging.isDNT(req) ? '[text removed]' : req.clean.text; - logger.info(`fallback queryText: ${queryText}`); - } - - sanitize( req, function( err, clean ){ - next(); - }); - -}; diff --git a/test/unit/controller/predicates/is_addressit_parse.js b/test/unit/controller/predicates/is_addressit_parse.js new file mode 100644 index 00000000..d11734ba --- /dev/null +++ b/test/unit/controller/predicates/is_addressit_parse.js @@ -0,0 +1,73 @@ +'use strict'; + +const _ = require('lodash'); +const is_addressit_parse = require('../../../../controller/predicates/is_addressit_parse'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', t => { + t.ok(_.isFunction(is_addressit_parse), 'is_addressit_parse is a function'); + t.end(); + }); +}; + +module.exports.tests.true_conditions = (test, common) => { + test('request.clean.parser=addressit should return true', t => { + const req = { + clean: { + parser: 'addressit' + } + }; + + t.ok(is_addressit_parse(req)); + t.end(); + + }); + +}; + +module.exports.tests.false_conditions = (test, common) => { + test('undefined request should return false', t => { + t.notOk(is_addressit_parse(undefined)); + t.end(); + }); + + test('undefined request.clean should return false', t => { + const req = {}; + + t.notOk(is_addressit_parse(req)); + t.end(); + }); + + test('undefined request.clean.parser should return false', t => { + const req = { + clean: {} + }; + + t.notOk(is_addressit_parse(req)); + t.end(); + }); + + test('non-\'addressit\' request.clean.parser should return false', t => { + const req = { + clean: { + parser: 'not addressit' + } + }; + + t.notOk(is_addressit_parse(req)); + t.end(); + }); + +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`GET /is_addressit_parse ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/controller/predicates/is_admin_only_analysis.js b/test/unit/controller/predicates/is_admin_only_analysis.js index eea8b567..4e90bf34 100644 --- a/test/unit/controller/predicates/is_admin_only_analysis.js +++ b/test/unit/controller/predicates/is_admin_only_analysis.js @@ -14,7 +14,7 @@ module.exports.tests.interface = (test, common) => { module.exports.tests.true_conditions = (test, common) => { test('parsed_text with admin-only properties should return true', (t) => { - ['neighbourhood', 'borough', 'city', 'county', 'state', 'postalcode', 'country'].forEach((property) => { + ['neighbourhood', 'borough', 'city', 'county', 'state', 'country'].forEach((property) => { const req = { clean: { parsed_text: {} @@ -47,7 +47,7 @@ module.exports.tests.false_conditions = (test, common) => { }); test('parsed_text with non-admin properties should return false', (t) => { - ['number', 'street', 'query', 'category'].forEach((property) => { + ['number', 'street', 'query', 'category', 'postalcode'].forEach((property) => { const req = { clean: { parsed_text: {} diff --git a/test/unit/run.js b/test/unit/run.js index 62924347..eb6f4de9 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -21,6 +21,7 @@ var tests = [ require('./controller/predicates/has_response_data'), require('./controller/predicates/has_results_at_layers'), require('./controller/predicates/has_request_errors'), + require('./controller/predicates/is_addressit_parse'), require('./controller/predicates/is_admin_only_analysis'), require('./controller/predicates/is_coarse_reverse'), require('./controller/predicates/is_service_enabled'), @@ -87,7 +88,7 @@ var tests = [ require('./sanitizer/reverse'), require('./sanitizer/sanitizeAll'), require('./sanitizer/search'), - require('./sanitizer/search_fallback'), + require('./sanitizer/defer_to_addressit'), require('./sanitizer/wrap'), require('./service/configurations/PlaceHolder'), require('./service/configurations/PointInPolygon'), diff --git a/test/unit/sanitizer/_text_addressit.js b/test/unit/sanitizer/_text_addressit.js index c81680c9..26989c18 100644 --- a/test/unit/sanitizer/_text_addressit.js +++ b/test/unit/sanitizer/_text_addressit.js @@ -33,6 +33,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: query.name + ', ' + query.admin_parts, + parser: 'addressit', parsed_text: { name: query.name, regions: [ query.name ], @@ -57,6 +58,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: query.name + ',' + query.admin_parts, + parser: 'addressit', parsed_text: { name: query.name, regions: [ query.name ], @@ -88,6 +90,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: query.name + ', ' + query.admin_parts, + parser: 'addressit', parsed_text: { name: query.name, regions: [ query.name, query.admin_parts ], @@ -111,6 +114,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: query.name + ',' + query.admin_parts, + parser: 'addressit', parsed_text: { name: query.name, regions: [ query.name, query.admin_parts ], @@ -136,6 +140,7 @@ module.exports.tests.text_parser = function(test, common) { clean.parsed_text = 'this should be removed'; var expected_clean = { + parser: 'addressit', text: 'yugolsavia' }; @@ -155,6 +160,7 @@ module.exports.tests.text_parser = function(test, common) { clean.parsed_text = 'this should be removed'; var expected_clean = { + parser: 'addressit', text: 'small town' }; @@ -174,6 +180,7 @@ module.exports.tests.text_parser = function(test, common) { clean.parsed_text = 'this should be removed'; var expected_clean = { + parser: 'addressit', text: '123 main' }; @@ -193,6 +200,7 @@ module.exports.tests.text_parser = function(test, common) { clean.parsed_text = 'this should be removed'; var expected_clean = { + parser: 'addressit', text: 'main 123' }; @@ -213,6 +221,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: 'main particle new york', + parser: 'addressit', parsed_text: { regions: [ 'main particle' ], state: 'NY' @@ -235,6 +244,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: '123 main st new york ny', + parser: 'addressit', parsed_text: { number: '123', street: 'main st', @@ -259,6 +269,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: '123 main st new york ny 10010', + parser: 'addressit', parsed_text: { number: '123', street: 'main st', @@ -283,6 +294,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: '339 W Main St, Cheshire, 06410', + parser: 'addressit', parsed_text: { name: '339 W Main St', number: '339', @@ -308,6 +320,7 @@ module.exports.tests.text_parser = function(test, common) { var expected_clean = { text: '339 W Main St,Lancaster,PA', + parser: 'addressit', parsed_text: { name: '339 W Main St', number: '339', diff --git a/test/unit/sanitizer/defer_to_addressit.js b/test/unit/sanitizer/defer_to_addressit.js new file mode 100644 index 00000000..9fe4675e --- /dev/null +++ b/test/unit/sanitizer/defer_to_addressit.js @@ -0,0 +1,132 @@ +const proxyquire = require('proxyquire').noCallThru(); +const mock_logger = require('pelias-mock-logger'); + +module.exports.tests = {}; + +module.exports.tests.sanitize = (test, common) => { + test('verify that no sanitizers were called when should_execute returns false', (t) => { + t.plan(1); + + const logger = mock_logger(); + + // rather than re-verify the functionality of all the sanitizers, this test just verifies that they + // were all called correctly + const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', { + '../sanitizer/_text_addressit': () => { + t.fail('_text_addressit should not have been called'); + }, + 'pelias-logger': logger + })(() => false); + + defer_to_addressit({}, {}, () => { + t.equals(logger.getInfoMessages().length, 0); + t.end(); + }); + + }); + + test('verify that _text_addressit sanitizer was called when should_execute returns true', (t) => { + t.plan(2); + + const logger = mock_logger(); + + // rather than re-verify the functionality of all the sanitizers, this test just verifies that they + // were all called correctly + const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', { + '../sanitizer/_text_addressit': () => { + t.pass('_text_addressit should have been called'); + return { errors: [], warnings: [] }; + }, + 'pelias-logger': logger, + '../helper/logging': { + isDNT: () => false + } + })(() => true); + + const req = { + path: '/v1/search', + clean: { + text: 'this is the query text' + } + }; + + defer_to_addressit(req, {}, () => { + t.deepEquals(logger.getInfoMessages(), ['fallback queryText: this is the query text']); + t.end(); + }); + + }); + + test('query should not be logged if path != \'/v1/search\'', (t) => { + t.plan(2); + + const logger = mock_logger(); + + // rather than re-verify the functionality of all the sanitizers, this test just verifies that they + // were all called correctly + const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', { + '../sanitizer/_text_addressit': () => { + t.pass('_text_addressit should have been called'); + return { errors: [], warnings: [] }; + }, + 'pelias-logger': logger + })(() => true); + + const req = { + path: 'not /v1/search', + clean: { + text: 'this is the query text' + } + }; + + defer_to_addressit(req, {}, () => { + t.deepEquals(logger.getInfoMessages(), []); + t.end(); + }); + + }); + + test('query should be logged as [text removed] if private', (t) => { + t.plan(2); + + const logger = mock_logger(); + + // rather than re-verify the functionality of all the sanitizers, this test just verifies that they + // were all called correctly + const defer_to_addressit = proxyquire('../../../sanitizer/defer_to_addressit', { + '../sanitizer/_text_addressit': () => { + t.pass('_text_addressit should have been called'); + return { errors: [], warnings: [] }; + }, + 'pelias-logger': logger, + '../helper/logging': { + isDNT: () => true + } + })(() => true); + + const req = { + path: '/v1/search', + clean: { + text: 'this is the query text' + } + }; + + defer_to_addressit(req, {}, () => { + t.deepEquals(logger.getInfoMessages(), ['fallback queryText: [text removed]']); + t.end(); + }); + + }); + +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape(`SANITIZE /defer_to_addressit ${name}`, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/sanitizer/search_fallback.js b/test/unit/sanitizer/search_fallback.js deleted file mode 100644 index 69e35fcd..00000000 --- a/test/unit/sanitizer/search_fallback.js +++ /dev/null @@ -1,185 +0,0 @@ -var proxyquire = require('proxyquire').noCallThru(); - -module.exports.tests = {}; - -module.exports.tests.sanitize = function(test, common) { - test('verify that all sanitizers were called as expected when `res` is undefined', function(t) { - var called_sanitizers = []; - - // rather than re-verify the functionality of all the sanitizers, this test just verifies that they - // were all called correctly - var search = proxyquire('../../../sanitizer/search_fallback', { - '../sanitizer/_text_addressit': function() { - called_sanitizers.push('_text_addressit'); - return { errors: [], warnings: [] }; - } - }); - - var expected_sanitizers = [ - '_text_addressit' - ]; - - var req = {}; - - search.middleware(req, undefined, function(){ - t.deepEquals(called_sanitizers, expected_sanitizers); - t.end(); - }); - - }); - - test('verify that all sanitizers were called as expected when `res` has no `data` property', function(t) { - var called_sanitizers = []; - - // rather than re-verify the functionality of all the sanitizers, this test just verifies that they - // were all called correctly - var search = proxyquire('../../../sanitizer/search_fallback', { - '../sanitizer/_text_addressit': function() { - called_sanitizers.push('_text_addressit'); - return { errors: [], warnings: [] }; - } - }); - - var expected_sanitizers = [ - '_text_addressit' - ]; - - var req = {}; - var res = {}; - - search.middleware(req, res, function(){ - t.deepEquals(called_sanitizers, expected_sanitizers); - t.end(); - }); - - }); - - test('verify that all sanitizers were called as expected when res.data is empty', function(t) { - var called_sanitizers = []; - - // rather than re-verify the functionality of all the sanitizers, this test just verifies that they - // were all called correctly - var search = proxyquire('../../../sanitizer/search_fallback', { - '../sanitizer/_text_addressit': function() { - called_sanitizers.push('_text_addressit'); - return { errors: [], warnings: [] }; - } - }); - - var expected_sanitizers = [ - '_text_addressit' - ]; - - var req = {}; - var res = { - data: [] - }; - - search.middleware(req, res, function(){ - t.deepEquals(called_sanitizers, expected_sanitizers); - t.end(); - }); - - }); - - test('non-empty res.data should not call the _text_autocomplete sanitizer', function(t) { - var called_sanitizers = []; - - // rather than re-verify the functionality of all the sanitizers, this test just verifies that they - // were all called correctly - var search = proxyquire('../../../sanitizer/search_fallback', { - '../sanitizer/_text_autocomplete': function() { - throw new Error('_text_autocomplete sanitizer should not have been called'); - } - }); - - var expected_sanitizers = []; - - var req = {}; - var res = { - data: [{}] - }; - - search.middleware(req, res, function(){ - t.deepEquals(called_sanitizers, expected_sanitizers); - t.end(); - }); - - }); - - test('req.clean.text should be logged when isDNT=false', (t) => { - const infoLog = []; - - const search = proxyquire('../../../sanitizer/search_fallback', { - 'pelias-logger': { - get: () => { - return { - info: (msg) => { - infoLog.push(msg); - } - }; - } - }, - '../helper/logging': { - isDNT: () => { return false; } - } - }); - - const req = { - path: '/v1/search', - clean: { - text: 'this is the query text' - } - }; - - search.middleware(req, undefined, () => { - t.deepEquals(infoLog, [`fallback queryText: ${req.clean.text}`]); - t.end(); - }); - - }); - - test('req.clean.text should not be logged when isDNT=true', (t) => { - const infoLog = []; - - const search = proxyquire('../../../sanitizer/search_fallback', { - 'pelias-logger': { - get: () => { - return { - info: (msg) => { - infoLog.push(msg); - } - }; - } - }, - '../helper/logging': { - isDNT: () => { return true; } - } - }); - - const req = { - path: '/v1/search', - clean: { - text: 'this is the query text' - } - }; - - search.middleware(req, undefined, () => { - t.deepEquals(infoLog, ['fallback queryText: [text removed]']); - t.end(); - }); - - }); - -}; - -module.exports.all = function (tape, common) { - - function test(name, testFunction) { - return tape('SANITIZE /search_fallback ' + name, testFunction); - } - - for( var testCase in module.exports.tests ){ - module.exports.tests[testCase](test, common); - } -};