From 553f9780c57c5316f7e2e15025ae13aa8e5b1d67 Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Mon, 21 Sep 2015 18:30:31 -0400 Subject: [PATCH 1/2] Remove admin matching when address is not parsed --- helper/query_parser.js | 9 + middleware/distance.js | 19 +- query/defaults.js | 2 +- routes/v1.js | 4 +- sanitiser/_text.js | 5 +- .../fixture/autocomplete_linguistic_focus.js | 2 +- ...tocomplete_linguistic_focus_null_island.js | 2 +- test/unit/fixture/search_linguistic_focus.js | 2 +- .../fixture/search_linguistic_focus_bbox.js | 2 +- .../search_linguistic_focus_null_island.js | 2 +- test/unit/helper/query_parser.js | 176 +++++++----------- 11 files changed, 97 insertions(+), 128 deletions(-) diff --git a/helper/query_parser.js b/helper/query_parser.js index b60ebcd8..aad58809 100644 --- a/helper/query_parser.js +++ b/helper/query_parser.js @@ -3,6 +3,8 @@ var parser = require('addressit'); var extend = require('extend'); var layers_map = require('../query/layers'); var delim = ','; +var check = require('check-types'); +var logger = require('pelias-logger').get('api'); module.exports = {}; @@ -61,5 +63,12 @@ module.exports.get_parsed_address = function get_parsed_address(query) { } }); + // if all we found was regions, ignore it as it is not enough information to make smarter decisions + if (Object.keys(parsed_text).length === 1 && !check.undefined(parsed_text.regions)) + { + logger.info('Ignoring address parser output, regions only'); + return null; + } + return parsed_text; }; diff --git a/middleware/distance.js b/middleware/distance.js index eb1d7c21..e333fba1 100644 --- a/middleware/distance.js +++ b/middleware/distance.js @@ -2,26 +2,31 @@ var geolib = require('geolib'); var check = require('check-types'); -function setup() { +function setup(prefix) { - return computeDistances; + return function (req, res, next) { + var opts = { + prefix: prefix || 'point.' + }; + return computeDistances(req, res, next, opts); + }; } -function computeDistances(req, res, next) { +function computeDistances(req, res, next, opts) { // do nothing if no result data set if (!res || !res.data) { return next(); } - if (!(check.number(req.clean['point.lat']) && - check.number(req.clean['point.lon']))) { + if (!(check.number(req.clean[opts.prefix + 'lat']) && + check.number(req.clean[opts.prefix + 'lon']))) { return next(); } var point = { - latitude: req.clean['point.lat'], - longitude: req.clean['point.lon'] + latitude: req.clean[opts.prefix + 'lat'], + longitude: req.clean[opts.prefix + 'lon'] }; res.data.forEach(function (place) { diff --git a/query/defaults.js b/query/defaults.js index cf9422d0..c9613d66 100644 --- a/query/defaults.js +++ b/query/defaults.js @@ -31,7 +31,7 @@ module.exports = extend( false, peliasQuery.defaults, { 'focus:function': 'linear', 'focus:offset': '1km', - 'focus:scale': '50km', + 'focus:scale': '100km', 'focus:decay': 0.5, 'function_score:score_mode': 'avg', diff --git a/routes/v1.js b/routes/v1.js index 215f1752..352f7c82 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -58,6 +58,7 @@ function addRoutes(app, peliasConfig) { sanitisers.search.middleware, middleware.types, controllers.search(), + postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig), postProc.renamePlacenames(), postProc.geocodeJSON(peliasConfig, base), @@ -67,6 +68,7 @@ function addRoutes(app, peliasConfig) { sanitisers.autocomplete.middleware, middleware.types, controllers.search(null, require('../query/autocomplete')), + postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig), postProc.renamePlacenames(), postProc.geocodeJSON(peliasConfig, base), @@ -76,7 +78,7 @@ function addRoutes(app, peliasConfig) { sanitisers.reverse.middleware, middleware.types, controllers.search(undefined, reverseQuery), - postProc.distances(), + postProc.distances('point.'), // reverse confidence scoring depends on distance from origin // so it must be calculated first postProc.confidenceScoresReverse(), diff --git a/sanitiser/_text.js b/sanitiser/_text.js index 5df79225..d0cc0986 100644 --- a/sanitiser/_text.js +++ b/sanitiser/_text.js @@ -19,7 +19,10 @@ function sanitize( raw, clean ){ clean.text = raw.text; // parse text with query parser - clean.parsed_text = query_parser.get_parsed_address(clean.text); + var parsed_text = query_parser.get_parsed_address(clean.text); + if (check.assigned(parsed_text)) { + clean.parsed_text = parsed_text; + } // try to set layers from query parser results clean.types = clean.layers || {}; diff --git a/test/unit/fixture/autocomplete_linguistic_focus.js b/test/unit/fixture/autocomplete_linguistic_focus.js index 76e844e3..a1eb6d45 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus.js +++ b/test/unit/fixture/autocomplete_linguistic_focus.js @@ -44,7 +44,7 @@ module.exports = { 'lon': -82.50622 }, 'offset': '1km', - 'scale': '50km', + 'scale': '100km', 'decay': 0.5 } } diff --git a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js index ded463bf..7e105bb3 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js +++ b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js @@ -44,7 +44,7 @@ module.exports = { 'lon': 0 }, 'offset': '1km', - 'scale': '50km', + 'scale': '100km', 'decay': 0.5 } } diff --git a/test/unit/fixture/search_linguistic_focus.js b/test/unit/fixture/search_linguistic_focus.js index eac92086..b9878dec 100644 --- a/test/unit/fixture/search_linguistic_focus.js +++ b/test/unit/fixture/search_linguistic_focus.js @@ -44,7 +44,7 @@ module.exports = { 'lon': -82.50622 }, 'offset': '1km', - 'scale': '50km', + 'scale': '100km', 'decay': 0.5 } } diff --git a/test/unit/fixture/search_linguistic_focus_bbox.js b/test/unit/fixture/search_linguistic_focus_bbox.js index d7aa6544..b6e642d1 100644 --- a/test/unit/fixture/search_linguistic_focus_bbox.js +++ b/test/unit/fixture/search_linguistic_focus_bbox.js @@ -44,7 +44,7 @@ module.exports = { 'lon': -82.50622 }, 'offset': '1km', - 'scale': '50km', + 'scale': '100km', 'decay': 0.5 } } diff --git a/test/unit/fixture/search_linguistic_focus_null_island.js b/test/unit/fixture/search_linguistic_focus_null_island.js index 506dcea9..4ba2adf0 100644 --- a/test/unit/fixture/search_linguistic_focus_null_island.js +++ b/test/unit/fixture/search_linguistic_focus_null_island.js @@ -44,7 +44,7 @@ module.exports = { 'lon': 0 }, 'offset': '1km', - 'scale': '50km', + 'scale': '100km', 'decay': 0.5 } } diff --git a/test/unit/helper/query_parser.js b/test/unit/helper/query_parser.js index ad669ece..2d3b462c 100644 --- a/test/unit/helper/query_parser.js +++ b/test/unit/helper/query_parser.js @@ -13,26 +13,22 @@ module.exports.tests.interface = function(test, common) { }; module.exports.tests.split_on_comma = function(test, common) { - var queries = ['soho, new york', 'chelsea, london', '123 main, new york']; - var delim = ','; + var queries = [ + { name: 'soho', admin_parts: 'new york' }, + { name: 'chelsea', admin_parts: 'london' }, + { name: '123 main', admin_parts: 'new york' } + ]; - var testParse = function(query) { + queries.forEach(function (query) { test('naive parsing ' + query, function(t) { - var address = parser.get_parsed_address(query); - var delimIndex = query.indexOf(delim); - var name = query.substring(0, delimIndex); - var admin_parts = query.substring(delimIndex + 1).trim(); + var address = parser.get_parsed_address(query.name + ', ' + query.admin_parts); t.equal(typeof address, 'object', 'valid object'); - t.equal(address.name, name, 'name set correctly to ' + address.name); - t.equal(address.admin_parts, admin_parts, 'admin_parts set correctly to ' + address.admin_parts); + t.equal(address.name, query.name, 'name set correctly to ' + address.name); + t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts); t.end(); }); - }; - - for (var key in queries) { - testParse( queries[key] ); - } + }); }; module.exports.tests.parse_three_chars_or_less = function(test, common) { @@ -40,7 +36,8 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) { var num_queries = ['1', '12', '123']; var alphanum_q = ['a1', '1a2', '12c']; - var testParse = function(query) { + var queries = chars_queries.concat(num_queries).concat(alphanum_q); + queries.forEach(function(query) { test('query length < 3 (' + query + ')', function(t) { var address = parser.get_parsed_address(query); var target_layer = layers_map.coarse; @@ -50,111 +47,64 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) { t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); t.end(); }); - }; - - var queries = chars_queries.concat(num_queries).concat(alphanum_q); - for (var key in queries) { - testParse( queries[key] ); - } + }); }; -module.exports.tests.parse_one_or_more_tokens = function(test, common) { - var one_token_queries = ['hyderbad', 'yugoslavia', 'somethingreallybigbutjustonetokenstill']; - var two_tokens_nonum = ['small town', 'biggg city', 'another empire']; - var two_tokens_withnum= ['123 main', 'sixty 1', '123-980 house']; - - // parse address is now always true to fix pelias/api#194 - var testParse = function(query, parse_address) { - test('query with one or more tokens (' + query + ')', function(t) { - var address = parser.get_parsed_address(query); - var target_layer = layers_map.coarse.concat(layers_map.venue); - var layers = parser.get_layers(query); - - t.equal(typeof address, 'object', 'valid object'); - - if (parse_address) { - t.deepEqual(address.regions.join(''), query, 'since query contained a number, it went through address parsing'); - } else { - t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); - } - - t.end(); - }); - }; - - var queries = one_token_queries.concat(two_tokens_nonum); - for (var key in queries) { - testParse( queries[key], true ); - } - for (key in two_tokens_withnum) { - testParse( two_tokens_withnum[key], true ); - } +module.exports.tests.parse_one_token = function(test, common) { + test('query with one token', function (t) { + var address = parser.get_parsed_address('yugolsavia'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); + test('query with two tokens, no numbers', function (t) { + var address = parser.get_parsed_address('small town'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); + test('query with two tokens, number first', function (t) { + var address = parser.get_parsed_address('123 main'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); + test('query with two tokens, number second', function (t) { + var address = parser.get_parsed_address('main 123'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); + test('query with many tokens', function(t) { + var address = parser.get_parsed_address('main particle new york'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); }; module.exports.tests.parse_address = function(test, common) { - var addresses_nonum = [{ non_street: 'main particle', city: 'new york'}, - { non_street: 'biggg city block' }, - { non_street: 'the empire state building' } - ]; - var address_with_num = [{ number: 123, street: 'main st', city: 'new york', state: 'ny'}, - { number: 456, street: 'pine ave', city: 'san francisco', state: 'CA'}, - { number: 1980, street: 'house st', city: 'hoboken', state: 'NY'} - ]; - var address_with_zip = [{ number: 1, street: 'main st', city: 'new york', state: 'ny', zip: 10010}, - { number: 4, street: 'ape ave', city: 'san diego', state: 'CA', zip: 98970}, - { number: 19, street: 'house dr', city: 'houston', state: 'TX', zip: 79089} - ]; - - var testParse = function(query, hasNumber, hasZip) { - var testcase = 'parse query with ' + (hasNumber ? 'a house number ': 'no house number '); - testcase += 'and ' + (hasZip ? 'a zip ' : 'no zip '); - - test(testcase, function(t) { - var query_string = ''; - for (var k in query) { - query_string += ' ' + query[k]; - } - - // remove leading whitespace - query_string = query_string.substring(1); - - var address = parser.get_parsed_address(query_string); - - t.equal(typeof address, 'object', 'valid object for the address ('+query_string+')'); - - if (!hasNumber && !hasZip && query.non_street) { - t.equal(address.regions.join(''), query_string, 'expected parsing result'); - } else { - t.equal(address.regions.join(''), query.city, 'city in regions (' + query.city +')'); - } - - if ((hasNumber || hasZip) && query.street) { - t.equal(typeof address.number, 'number', 'valid house number format (' + address.number + ')'); - t.equal(address.number, query.number, 'correct house number (' + query.number + ')'); - t.equal(typeof address.street, 'string', 'valid street name format (' + address.street + ')'); - t.equal(address.street, query.street, 'correct street name (' + query.street + ')'); - } - - if (hasZip) { - t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')'); - t.equal(address.postalcode, query.zip, 'correct postal code (' + query.zip + ')'); - } - - t.end(); - }); - }; - - for (var key in addresses_nonum) { - testParse( addresses_nonum[key] ); - } - for (key in address_with_num) { - testParse( address_with_num[key], true ); - } - for (key in address_with_zip) { - testParse( address_with_zip[key], true, true ); - } + test('valid address, house number', function(t) { + var query_string = '123 main st new york ny'; + var address = parser.get_parsed_address(query_string); + + t.equal(typeof address, 'object', 'valid object for the address'); + t.equal(address.number, 123, 'parsed house number'); + t.equal(address.street, 'main st', 'parsed street'); + t.deepEqual(address.regions, ['new york'], 'parsed city'); + t.equal(address.state , 'NY', 'parsed state'); + t.end(); + }); + test('valid address, zipcode', function(t) { + var query_string = '123 main st new york ny 10010'; + var address = parser.get_parsed_address(query_string); + + t.equal(typeof address, 'object', 'valid object for the address'); + t.equal(address.number, 123, 'parsed house number'); + t.equal(address.street, 'main st', 'parsed street'); + t.deepEqual(address.regions, ['new york'], 'parsed city'); + t.equal(address.state , 'NY', 'parsed state'); + t.equal(address.postalcode, 10010, 'parsed zip'); + t.end(); + }); }; + module.exports.all = function (tape, common) { function test(name, testFunction) { From 62315a1775ba655cf70222445aecf78b6ea42d03 Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Tue, 22 Sep 2015 16:15:46 -0400 Subject: [PATCH 2/2] Revert defaults.json change --- query/defaults.js | 2 +- test/unit/fixture/autocomplete_linguistic_focus.js | 2 +- test/unit/fixture/autocomplete_linguistic_focus_null_island.js | 2 +- test/unit/fixture/search_linguistic_focus.js | 2 +- test/unit/fixture/search_linguistic_focus_bbox.js | 2 +- test/unit/fixture/search_linguistic_focus_null_island.js | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/query/defaults.js b/query/defaults.js index c9613d66..cf9422d0 100644 --- a/query/defaults.js +++ b/query/defaults.js @@ -31,7 +31,7 @@ module.exports = extend( false, peliasQuery.defaults, { 'focus:function': 'linear', 'focus:offset': '1km', - 'focus:scale': '100km', + 'focus:scale': '50km', 'focus:decay': 0.5, 'function_score:score_mode': 'avg', diff --git a/test/unit/fixture/autocomplete_linguistic_focus.js b/test/unit/fixture/autocomplete_linguistic_focus.js index a1eb6d45..76e844e3 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus.js +++ b/test/unit/fixture/autocomplete_linguistic_focus.js @@ -44,7 +44,7 @@ module.exports = { 'lon': -82.50622 }, 'offset': '1km', - 'scale': '100km', + 'scale': '50km', 'decay': 0.5 } } diff --git a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js index 7e105bb3..ded463bf 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js +++ b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js @@ -44,7 +44,7 @@ module.exports = { 'lon': 0 }, 'offset': '1km', - 'scale': '100km', + 'scale': '50km', 'decay': 0.5 } } diff --git a/test/unit/fixture/search_linguistic_focus.js b/test/unit/fixture/search_linguistic_focus.js index b9878dec..eac92086 100644 --- a/test/unit/fixture/search_linguistic_focus.js +++ b/test/unit/fixture/search_linguistic_focus.js @@ -44,7 +44,7 @@ module.exports = { 'lon': -82.50622 }, 'offset': '1km', - 'scale': '100km', + 'scale': '50km', 'decay': 0.5 } } diff --git a/test/unit/fixture/search_linguistic_focus_bbox.js b/test/unit/fixture/search_linguistic_focus_bbox.js index b6e642d1..d7aa6544 100644 --- a/test/unit/fixture/search_linguistic_focus_bbox.js +++ b/test/unit/fixture/search_linguistic_focus_bbox.js @@ -44,7 +44,7 @@ module.exports = { 'lon': -82.50622 }, 'offset': '1km', - 'scale': '100km', + 'scale': '50km', 'decay': 0.5 } } diff --git a/test/unit/fixture/search_linguistic_focus_null_island.js b/test/unit/fixture/search_linguistic_focus_null_island.js index 4ba2adf0..506dcea9 100644 --- a/test/unit/fixture/search_linguistic_focus_null_island.js +++ b/test/unit/fixture/search_linguistic_focus_null_island.js @@ -44,7 +44,7 @@ module.exports = { 'lon': 0 }, 'offset': '1km', - 'scale': '100km', + 'scale': '50km', 'decay': 0.5 } }