diff --git a/helper/query_parser.js b/helper/query_parser.js index 4af009aa..cb5dc7a7 100644 --- a/helper/query_parser.js +++ b/helper/query_parser.js @@ -3,6 +3,8 @@ var parser = require('addressit'); var extend = require('extend'); var type_mapping = require('../helper/type_mapping'); var delim = ','; +var check = require('check-types'); +var logger = require('pelias-logger').get('api'); module.exports = {}; @@ -61,5 +63,12 @@ module.exports.get_parsed_address = function get_parsed_address(query) { } }); + // if all we found was regions, ignore it as it is not enough information to make smarter decisions + if (Object.keys(parsed_text).length === 1 && !check.undefined(parsed_text.regions)) + { + logger.info('Ignoring address parser output, regions only'); + return null; + } + return parsed_text; }; diff --git a/middleware/distance.js b/middleware/distance.js index eb1d7c21..e333fba1 100644 --- a/middleware/distance.js +++ b/middleware/distance.js @@ -2,26 +2,31 @@ var geolib = require('geolib'); var check = require('check-types'); -function setup() { +function setup(prefix) { - return computeDistances; + return function (req, res, next) { + var opts = { + prefix: prefix || 'point.' + }; + return computeDistances(req, res, next, opts); + }; } -function computeDistances(req, res, next) { +function computeDistances(req, res, next, opts) { // do nothing if no result data set if (!res || !res.data) { return next(); } - if (!(check.number(req.clean['point.lat']) && - check.number(req.clean['point.lon']))) { + if (!(check.number(req.clean[opts.prefix + 'lat']) && + check.number(req.clean[opts.prefix + 'lon']))) { return next(); } var point = { - latitude: req.clean['point.lat'], - longitude: req.clean['point.lon'] + latitude: req.clean[opts.prefix + 'lat'], + longitude: req.clean[opts.prefix + 'lon'] }; res.data.forEach(function (place) { diff --git a/routes/v1.js b/routes/v1.js index 215f1752..352f7c82 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -58,6 +58,7 @@ function addRoutes(app, peliasConfig) { sanitisers.search.middleware, middleware.types, controllers.search(), + postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig), postProc.renamePlacenames(), postProc.geocodeJSON(peliasConfig, base), @@ -67,6 +68,7 @@ function addRoutes(app, peliasConfig) { sanitisers.autocomplete.middleware, middleware.types, controllers.search(null, require('../query/autocomplete')), + postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig), postProc.renamePlacenames(), postProc.geocodeJSON(peliasConfig, base), @@ -76,7 +78,7 @@ function addRoutes(app, peliasConfig) { sanitisers.reverse.middleware, middleware.types, controllers.search(undefined, reverseQuery), - postProc.distances(), + postProc.distances('point.'), // reverse confidence scoring depends on distance from origin // so it must be calculated first postProc.confidenceScoresReverse(), diff --git a/sanitiser/_text.js b/sanitiser/_text.js index 5df79225..d0cc0986 100644 --- a/sanitiser/_text.js +++ b/sanitiser/_text.js @@ -19,7 +19,10 @@ function sanitize( raw, clean ){ clean.text = raw.text; // parse text with query parser - clean.parsed_text = query_parser.get_parsed_address(clean.text); + var parsed_text = query_parser.get_parsed_address(clean.text); + if (check.assigned(parsed_text)) { + clean.parsed_text = parsed_text; + } // try to set layers from query parser results clean.types = clean.layers || {}; diff --git a/test/unit/helper/query_parser.js b/test/unit/helper/query_parser.js index 02c96531..e7771ca4 100644 --- a/test/unit/helper/query_parser.js +++ b/test/unit/helper/query_parser.js @@ -14,26 +14,22 @@ module.exports.tests.interface = function(test, common) { }; module.exports.tests.split_on_comma = function(test, common) { - var queries = ['soho, new york', 'chelsea, london', '123 main, new york']; - var delim = ','; + var queries = [ + { name: 'soho', admin_parts: 'new york' }, + { name: 'chelsea', admin_parts: 'london' }, + { name: '123 main', admin_parts: 'new york' } + ]; - var testParse = function(query) { + queries.forEach(function (query) { test('naive parsing ' + query, function(t) { - var address = parser.get_parsed_address(query); - var delimIndex = query.indexOf(delim); - var name = query.substring(0, delimIndex); - var admin_parts = query.substring(delimIndex + 1).trim(); + var address = parser.get_parsed_address(query.name + ', ' + query.admin_parts); t.equal(typeof address, 'object', 'valid object'); - t.equal(address.name, name, 'name set correctly to ' + address.name); - t.equal(address.admin_parts, admin_parts, 'admin_parts set correctly to ' + address.admin_parts); + t.equal(address.name, query.name, 'name set correctly to ' + address.name); + t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts); t.end(); }); - }; - - for (var key in queries) { - testParse( queries[key] ); - } + }); }; module.exports.tests.parse_three_chars_or_less = function(test, common) { @@ -41,7 +37,8 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) { var num_queries = ['1', '12', '123']; var alphanum_q = ['a1', '1a2', '12c']; - var testParse = function(query) { + var queries = chars_queries.concat(num_queries).concat(alphanum_q); + queries.forEach(function(query) { test('query length < 3 (' + query + ')', function(t) { var address = parser.get_parsed_address(query); var target_layer = layers_map.coarse; @@ -51,111 +48,64 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) { t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); t.end(); }); - }; - - var queries = chars_queries.concat(num_queries).concat(alphanum_q); - for (var key in queries) { - testParse( queries[key] ); - } + }); }; -module.exports.tests.parse_one_or_more_tokens = function(test, common) { - var one_token_queries = ['hyderbad', 'yugoslavia', 'somethingreallybigbutjustonetokenstill']; - var two_tokens_nonum = ['small town', 'biggg city', 'another empire']; - var two_tokens_withnum= ['123 main', 'sixty 1', '123-980 house']; - - // parse address is now always true to fix pelias/api#194 - var testParse = function(query, parse_address) { - test('query with one or more tokens (' + query + ')', function(t) { - var address = parser.get_parsed_address(query); - var target_layer = layers_map.coarse.concat(layers_map.venue); - var layers = parser.get_layers(query); - - t.equal(typeof address, 'object', 'valid object'); - - if (parse_address) { - t.deepEqual(address.regions.join(''), query, 'since query contained a number, it went through address parsing'); - } else { - t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); - } - - t.end(); - }); - }; - - var queries = one_token_queries.concat(two_tokens_nonum); - for (var key in queries) { - testParse( queries[key], true ); - } - for (key in two_tokens_withnum) { - testParse( two_tokens_withnum[key], true ); - } +module.exports.tests.parse_one_token = function(test, common) { + test('query with one token', function (t) { + var address = parser.get_parsed_address('yugolsavia'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); + test('query with two tokens, no numbers', function (t) { + var address = parser.get_parsed_address('small town'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); + test('query with two tokens, number first', function (t) { + var address = parser.get_parsed_address('123 main'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); + test('query with two tokens, number second', function (t) { + var address = parser.get_parsed_address('main 123'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); + test('query with many tokens', function(t) { + var address = parser.get_parsed_address('main particle new york'); + t.equal(address, null, 'nothing address specific detected'); + t.end(); + }); }; module.exports.tests.parse_address = function(test, common) { - var addresses_nonum = [{ non_street: 'main particle', city: 'new york'}, - { non_street: 'biggg city block' }, - { non_street: 'the empire state building' } - ]; - var address_with_num = [{ number: 123, street: 'main st', city: 'new york', state: 'ny'}, - { number: 456, street: 'pine ave', city: 'san francisco', state: 'CA'}, - { number: 1980, street: 'house st', city: 'hoboken', state: 'NY'} - ]; - var address_with_zip = [{ number: 1, street: 'main st', city: 'new york', state: 'ny', zip: 10010}, - { number: 4, street: 'ape ave', city: 'san diego', state: 'CA', zip: 98970}, - { number: 19, street: 'house dr', city: 'houston', state: 'TX', zip: 79089} - ]; - - var testParse = function(query, hasNumber, hasZip) { - var testcase = 'parse query with ' + (hasNumber ? 'a house number ': 'no house number '); - testcase += 'and ' + (hasZip ? 'a zip ' : 'no zip '); - - test(testcase, function(t) { - var query_string = ''; - for (var k in query) { - query_string += ' ' + query[k]; - } - - // remove leading whitespace - query_string = query_string.substring(1); - - var address = parser.get_parsed_address(query_string); - - t.equal(typeof address, 'object', 'valid object for the address ('+query_string+')'); - - if (!hasNumber && !hasZip && query.non_street) { - t.equal(address.regions.join(''), query_string, 'expected parsing result'); - } else { - t.equal(address.regions.join(''), query.city, 'city in regions (' + query.city +')'); - } - - if ((hasNumber || hasZip) && query.street) { - t.equal(typeof address.number, 'number', 'valid house number format (' + address.number + ')'); - t.equal(address.number, query.number, 'correct house number (' + query.number + ')'); - t.equal(typeof address.street, 'string', 'valid street name format (' + address.street + ')'); - t.equal(address.street, query.street, 'correct street name (' + query.street + ')'); - } - - if (hasZip) { - t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')'); - t.equal(address.postalcode, query.zip, 'correct postal code (' + query.zip + ')'); - } - - t.end(); - }); - }; - - for (var key in addresses_nonum) { - testParse( addresses_nonum[key] ); - } - for (key in address_with_num) { - testParse( address_with_num[key], true ); - } - for (key in address_with_zip) { - testParse( address_with_zip[key], true, true ); - } + test('valid address, house number', function(t) { + var query_string = '123 main st new york ny'; + var address = parser.get_parsed_address(query_string); + + t.equal(typeof address, 'object', 'valid object for the address'); + t.equal(address.number, 123, 'parsed house number'); + t.equal(address.street, 'main st', 'parsed street'); + t.deepEqual(address.regions, ['new york'], 'parsed city'); + t.equal(address.state , 'NY', 'parsed state'); + t.end(); + }); + test('valid address, zipcode', function(t) { + var query_string = '123 main st new york ny 10010'; + var address = parser.get_parsed_address(query_string); + + t.equal(typeof address, 'object', 'valid object for the address'); + t.equal(address.number, 123, 'parsed house number'); + t.equal(address.street, 'main st', 'parsed street'); + t.deepEqual(address.regions, ['new york'], 'parsed city'); + t.equal(address.state , 'NY', 'parsed state'); + t.equal(address.postalcode, 10010, 'parsed zip'); + t.end(); + }); }; + module.exports.all = function (tape, common) { function test(name, testFunction) {