From 6f4ec874fe6a290d10977e3fa48263c0eb3d1376 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 7 Sep 2016 15:45:01 -0400 Subject: [PATCH] added special override condition for libpostal query/housenumber mixup --- query/text_parser.js | 29 +++++++ test/unit/query/text_parser.js | 153 ++++++++++++++++++++++++++------- test/unit/run.js | 1 + 3 files changed, 150 insertions(+), 33 deletions(-) diff --git a/query/text_parser.js b/query/text_parser.js index c71e444a..293c021e 100644 --- a/query/text_parser.js +++ b/query/text_parser.js @@ -1,4 +1,5 @@ var logger = require('pelias-logger').get('api'); +var _ = require('lodash'); // all the address parsing logic function addParsedVariablesToQueryVariables( parsed_text, vs ){ @@ -61,6 +62,34 @@ function addParsedVariablesToQueryVariables( parsed_text, vs ){ vs.var( 'input:country', parsed_text.country ); } + // libpostal sometimes parses addresses with prefix house numbers in places where + // the house number is normally postfix incorrectly, for instance: + // ```> 1 Grolmanstraße, Berlin, Germany + // + // Result: + // + // { + // "house": "1", + // "road": "grolmanstrasse", + // "state": "berlin", + // "country": "germany" + // }``` + // + // In libpostal parlance, `house` is just a query term, not the house number. + // This special case moves the query term to the house number field if there's a street, + // there's no house number, and the query is parseable as an integer, then use the + // query as the house number and blank out the query. + if (shouldSetQueryIntoHouseNumber(vs)) { + vs.var( 'input:housenumber', vs.var('input:query').toString()); + vs.unset( 'input:query' ); + } + +} + +function shouldSetQueryIntoHouseNumber(vs) { + return !vs.isset('input:housenumber') && + vs.isset('input:street') && + /^[0-9]+$/.test(vs.var('input:query').toString()); } module.exports = addParsedVariablesToQueryVariables; diff --git a/test/unit/query/text_parser.js b/test/unit/query/text_parser.js index 5ffc5b76..274dc10c 100644 --- a/test/unit/query/text_parser.js +++ b/test/unit/query/text_parser.js @@ -10,57 +10,144 @@ module.exports.tests.interface = function(test, common) { }); }; -module.exports.tests.query = function(test, common) { - test('parsed_text without properties should leave vs properties unset', function(t) { - var parsed_text = {}; +// module.exports.tests.query = function(test, common) { +// test('parsed_text without properties should leave vs properties unset', function(t) { +// var parsed_text = {}; +// var vs = new VariableStore(); +// +// text_parser(parsed_text, vs); +// +// t.false(vs.isset('input:query')); +// t.false(vs.isset('input:category')); +// t.false(vs.isset('input:housenumber')); +// t.false(vs.isset('input:street')); +// t.false(vs.isset('input:neighbourhood')); +// t.false(vs.isset('input:borough')); +// t.false(vs.isset('input:postcode')); +// t.false(vs.isset('input:locality')); +// t.false(vs.isset('input:county')); +// t.false(vs.isset('input:region')); +// t.false(vs.isset('input:country')); +// t.end(); +// +// }); +// +// test('parsed_text without properties should leave vs properties unset', function(t) { +// var parsed_text = { +// query: 'query value', +// category: 'category value', +// number: 'number value', +// street: 'street value', +// neighbourhood: 'neighbourhood value', +// borough: 'borough value', +// postalcode: 'postalcode value', +// city: 'city value', +// county: 'county value', +// state: 'state value', +// country: 'country value' +// }; +// var vs = new VariableStore(); +// +// text_parser(parsed_text, vs); +// +// t.equals(vs.var('input:query').toString(), 'query value'); +// t.equals(vs.var('input:category').toString(), 'category value'); +// t.equals(vs.var('input:housenumber').toString(), 'number value'); +// t.equals(vs.var('input:street').toString(), 'street value'); +// t.equals(vs.var('input:neighbourhood').toString(), 'neighbourhood value'); +// t.equals(vs.var('input:borough').toString(), 'borough value'); +// t.equals(vs.var('input:postcode').toString(), 'postalcode value'); +// t.equals(vs.var('input:locality').toString(), 'city value'); +// t.equals(vs.var('input:county').toString(), 'county value'); +// t.equals(vs.var('input:region').toString(), 'state value'); +// t.equals(vs.var('input:country').toString(), 'country value'); +// t.end(); +// +// }); +// +// }; + +module.exports.tests.housenumber_special_cases = function(test, common) { + test('numeric query with street but no number should reassign query to housenumber', function(t) { + var parsed_text = { + query: '17', + // no house number set + street: 'street value' + }; var vs = new VariableStore(); text_parser(parsed_text, vs); t.false(vs.isset('input:query')); - t.false(vs.isset('input:category')); + t.equals(vs.var('input:housenumber').toString(), '17'); + t.equals(vs.var('input:street').toString(), 'street value'); + t.end(); + + }); + + test('numeric query with street but without number should not change anything', function(t) { + var parsed_text = { + query: '17', + number: 'housenumber value', + street: 'street value' + // no number or street + }; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.equals(vs.var('input:query').toString(), '17'); + t.equals(vs.var('input:housenumber').toString(), 'housenumber value'); + t.equals(vs.var('input:street').toString(), 'street value'); + t.end(); + + }); + + test('numeric query with number but without street should not change anything', function(t) { + var parsed_text = { + query: '17', + number: 'number value' + // no number or street + }; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.equals(vs.var('input:query').toString(), '17'); + t.equals(vs.var('input:housenumber').toString(), 'number value'); + t.false(vs.isset('input:street')); + t.end(); + + }); + + test('numeric query without street or number should not change anything', function(t) { + var parsed_text = { + query: '17' + // no number or street + }; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.equals(vs.var('input:query').toString(), '17'); t.false(vs.isset('input:housenumber')); t.false(vs.isset('input:street')); - t.false(vs.isset('input:neighbourhood')); - t.false(vs.isset('input:borough')); - t.false(vs.isset('input:postcode')); - t.false(vs.isset('input:locality')); - t.false(vs.isset('input:county')); - t.false(vs.isset('input:region')); - t.false(vs.isset('input:country')); t.end(); }); - test('parsed_text without properties should leave vs properties unset', function(t) { + test('non-numeric query with street but no number should not change anything', function(t) { var parsed_text = { - query: 'query value', - category: 'category value', - number: 'number value', - street: 'street value', - neighbourhood: 'neighbourhood value', - borough: 'borough value', - postalcode: 'postalcode value', - city: 'city value', - county: 'county value', - state: 'state value', - country: 'country value' + query: '13 this is 15 not a number 17', + street: 'street value' }; var vs = new VariableStore(); text_parser(parsed_text, vs); - t.equals(vs.var('input:query').toString(), 'query value'); - t.equals(vs.var('input:category').toString(), 'category value'); - t.equals(vs.var('input:housenumber').toString(), 'number value'); + t.equals(vs.var('input:query').toString(), '13 this is 15 not a number 17'); + t.false(vs.isset('input:housenumber')); t.equals(vs.var('input:street').toString(), 'street value'); - t.equals(vs.var('input:neighbourhood').toString(), 'neighbourhood value'); - t.equals(vs.var('input:borough').toString(), 'borough value'); - t.equals(vs.var('input:postcode').toString(), 'postalcode value'); - t.equals(vs.var('input:locality').toString(), 'city value'); - t.equals(vs.var('input:county').toString(), 'county value'); - t.equals(vs.var('input:region').toString(), 'state value'); - t.equals(vs.var('input:country').toString(), 'country value'); t.end(); }); diff --git a/test/unit/run.js b/test/unit/run.js index 1cd8ab45..e93deb7d 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -41,6 +41,7 @@ var tests = [ require('./query/reverse'), require('./query/search'), require('./query/search_original'), + require('./query/text_parser'), require('./sanitiser/_boundary_country'), require('./sanitiser/_flag_bool'), require('./sanitiser/_geo_common'),