Browse Source

Merge pull request #291 from pelias/remove-regions-check

Remove admin matching when address is not parsed
pull/299/head
Diana Shkolnikov 9 years ago
parent
commit
ae50201c27
  1. 9
      helper/query_parser.js
  2. 19
      middleware/distance.js
  3. 4
      routes/v1.js
  4. 5
      sanitiser/_text.js
  5. 176
      test/unit/helper/query_parser.js

9
helper/query_parser.js

@ -3,6 +3,8 @@ var parser = require('addressit');
var extend = require('extend'); var extend = require('extend');
var type_mapping = require('../helper/type_mapping'); var type_mapping = require('../helper/type_mapping');
var delim = ','; var delim = ',';
var check = require('check-types');
var logger = require('pelias-logger').get('api');
module.exports = {}; module.exports = {};
@ -61,5 +63,12 @@ module.exports.get_parsed_address = function get_parsed_address(query) {
} }
}); });
// if all we found was regions, ignore it as it is not enough information to make smarter decisions
if (Object.keys(parsed_text).length === 1 && !check.undefined(parsed_text.regions))
{
logger.info('Ignoring address parser output, regions only');
return null;
}
return parsed_text; return parsed_text;
}; };

19
middleware/distance.js

@ -2,26 +2,31 @@ var geolib = require('geolib');
var check = require('check-types'); var check = require('check-types');
function setup() { function setup(prefix) {
return computeDistances; return function (req, res, next) {
var opts = {
prefix: prefix || 'point.'
};
return computeDistances(req, res, next, opts);
};
} }
function computeDistances(req, res, next) { function computeDistances(req, res, next, opts) {
// do nothing if no result data set // do nothing if no result data set
if (!res || !res.data) { if (!res || !res.data) {
return next(); return next();
} }
if (!(check.number(req.clean['point.lat']) && if (!(check.number(req.clean[opts.prefix + 'lat']) &&
check.number(req.clean['point.lon']))) { check.number(req.clean[opts.prefix + 'lon']))) {
return next(); return next();
} }
var point = { var point = {
latitude: req.clean['point.lat'], latitude: req.clean[opts.prefix + 'lat'],
longitude: req.clean['point.lon'] longitude: req.clean[opts.prefix + 'lon']
}; };
res.data.forEach(function (place) { res.data.forEach(function (place) {

4
routes/v1.js

@ -58,6 +58,7 @@ function addRoutes(app, peliasConfig) {
sanitisers.search.middleware, sanitisers.search.middleware,
middleware.types, middleware.types,
controllers.search(), controllers.search(),
postProc.distances('focus.point.'),
postProc.confidenceScores(peliasConfig), postProc.confidenceScores(peliasConfig),
postProc.renamePlacenames(), postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig, base), postProc.geocodeJSON(peliasConfig, base),
@ -67,6 +68,7 @@ function addRoutes(app, peliasConfig) {
sanitisers.autocomplete.middleware, sanitisers.autocomplete.middleware,
middleware.types, middleware.types,
controllers.search(null, require('../query/autocomplete')), controllers.search(null, require('../query/autocomplete')),
postProc.distances('focus.point.'),
postProc.confidenceScores(peliasConfig), postProc.confidenceScores(peliasConfig),
postProc.renamePlacenames(), postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig, base), postProc.geocodeJSON(peliasConfig, base),
@ -76,7 +78,7 @@ function addRoutes(app, peliasConfig) {
sanitisers.reverse.middleware, sanitisers.reverse.middleware,
middleware.types, middleware.types,
controllers.search(undefined, reverseQuery), controllers.search(undefined, reverseQuery),
postProc.distances(), postProc.distances('point.'),
// reverse confidence scoring depends on distance from origin // reverse confidence scoring depends on distance from origin
// so it must be calculated first // so it must be calculated first
postProc.confidenceScoresReverse(), postProc.confidenceScoresReverse(),

5
sanitiser/_text.js

@ -19,7 +19,10 @@ function sanitize( raw, clean ){
clean.text = raw.text; clean.text = raw.text;
// parse text with query parser // parse text with query parser
clean.parsed_text = query_parser.get_parsed_address(clean.text); var parsed_text = query_parser.get_parsed_address(clean.text);
if (check.assigned(parsed_text)) {
clean.parsed_text = parsed_text;
}
// try to set layers from query parser results // try to set layers from query parser results
clean.types = clean.layers || {}; clean.types = clean.layers || {};

176
test/unit/helper/query_parser.js

@ -14,26 +14,22 @@ module.exports.tests.interface = function(test, common) {
}; };
module.exports.tests.split_on_comma = function(test, common) { module.exports.tests.split_on_comma = function(test, common) {
var queries = ['soho, new york', 'chelsea, london', '123 main, new york']; var queries = [
var delim = ','; { name: 'soho', admin_parts: 'new york' },
{ name: 'chelsea', admin_parts: 'london' },
{ name: '123 main', admin_parts: 'new york' }
];
var testParse = function(query) { queries.forEach(function (query) {
test('naive parsing ' + query, function(t) { test('naive parsing ' + query, function(t) {
var address = parser.get_parsed_address(query); var address = parser.get_parsed_address(query.name + ', ' + query.admin_parts);
var delimIndex = query.indexOf(delim);
var name = query.substring(0, delimIndex);
var admin_parts = query.substring(delimIndex + 1).trim();
t.equal(typeof address, 'object', 'valid object'); t.equal(typeof address, 'object', 'valid object');
t.equal(address.name, name, 'name set correctly to ' + address.name); t.equal(address.name, query.name, 'name set correctly to ' + address.name);
t.equal(address.admin_parts, admin_parts, 'admin_parts set correctly to ' + address.admin_parts); t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts);
t.end(); t.end();
}); });
}; });
for (var key in queries) {
testParse( queries[key] );
}
}; };
module.exports.tests.parse_three_chars_or_less = function(test, common) { module.exports.tests.parse_three_chars_or_less = function(test, common) {
@ -41,7 +37,8 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) {
var num_queries = ['1', '12', '123']; var num_queries = ['1', '12', '123'];
var alphanum_q = ['a1', '1a2', '12c']; var alphanum_q = ['a1', '1a2', '12c'];
var testParse = function(query) { var queries = chars_queries.concat(num_queries).concat(alphanum_q);
queries.forEach(function(query) {
test('query length < 3 (' + query + ')', function(t) { test('query length < 3 (' + query + ')', function(t) {
var address = parser.get_parsed_address(query); var address = parser.get_parsed_address(query);
var target_layer = layers_map.coarse; var target_layer = layers_map.coarse;
@ -51,111 +48,64 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) {
t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', '));
t.end(); t.end();
}); });
}; });
var queries = chars_queries.concat(num_queries).concat(alphanum_q);
for (var key in queries) {
testParse( queries[key] );
}
}; };
module.exports.tests.parse_one_or_more_tokens = function(test, common) { module.exports.tests.parse_one_token = function(test, common) {
var one_token_queries = ['hyderbad', 'yugoslavia', 'somethingreallybigbutjustonetokenstill']; test('query with one token', function (t) {
var two_tokens_nonum = ['small town', 'biggg city', 'another empire']; var address = parser.get_parsed_address('yugolsavia');
var two_tokens_withnum= ['123 main', 'sixty 1', '123-980 house']; t.equal(address, null, 'nothing address specific detected');
t.end();
// parse address is now always true to fix pelias/api#194 });
var testParse = function(query, parse_address) { test('query with two tokens, no numbers', function (t) {
test('query with one or more tokens (' + query + ')', function(t) { var address = parser.get_parsed_address('small town');
var address = parser.get_parsed_address(query); t.equal(address, null, 'nothing address specific detected');
var target_layer = layers_map.coarse.concat(layers_map.venue); t.end();
var layers = parser.get_layers(query); });
test('query with two tokens, number first', function (t) {
t.equal(typeof address, 'object', 'valid object'); var address = parser.get_parsed_address('123 main');
t.equal(address, null, 'nothing address specific detected');
if (parse_address) { t.end();
t.deepEqual(address.regions.join(''), query, 'since query contained a number, it went through address parsing'); });
} else { test('query with two tokens, number second', function (t) {
t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); var address = parser.get_parsed_address('main 123');
} t.equal(address, null, 'nothing address specific detected');
t.end();
t.end(); });
}); test('query with many tokens', function(t) {
}; var address = parser.get_parsed_address('main particle new york');
t.equal(address, null, 'nothing address specific detected');
var queries = one_token_queries.concat(two_tokens_nonum); t.end();
for (var key in queries) { });
testParse( queries[key], true );
}
for (key in two_tokens_withnum) {
testParse( two_tokens_withnum[key], true );
}
}; };
module.exports.tests.parse_address = function(test, common) { module.exports.tests.parse_address = function(test, common) {
var addresses_nonum = [{ non_street: 'main particle', city: 'new york'}, test('valid address, house number', function(t) {
{ non_street: 'biggg city block' }, var query_string = '123 main st new york ny';
{ non_street: 'the empire state building' } var address = parser.get_parsed_address(query_string);
];
var address_with_num = [{ number: 123, street: 'main st', city: 'new york', state: 'ny'}, t.equal(typeof address, 'object', 'valid object for the address');
{ number: 456, street: 'pine ave', city: 'san francisco', state: 'CA'}, t.equal(address.number, 123, 'parsed house number');
{ number: 1980, street: 'house st', city: 'hoboken', state: 'NY'} t.equal(address.street, 'main st', 'parsed street');
]; t.deepEqual(address.regions, ['new york'], 'parsed city');
var address_with_zip = [{ number: 1, street: 'main st', city: 'new york', state: 'ny', zip: 10010}, t.equal(address.state , 'NY', 'parsed state');
{ number: 4, street: 'ape ave', city: 'san diego', state: 'CA', zip: 98970}, t.end();
{ number: 19, street: 'house dr', city: 'houston', state: 'TX', zip: 79089} });
]; test('valid address, zipcode', function(t) {
var query_string = '123 main st new york ny 10010';
var testParse = function(query, hasNumber, hasZip) { var address = parser.get_parsed_address(query_string);
var testcase = 'parse query with ' + (hasNumber ? 'a house number ': 'no house number ');
testcase += 'and ' + (hasZip ? 'a zip ' : 'no zip '); t.equal(typeof address, 'object', 'valid object for the address');
t.equal(address.number, 123, 'parsed house number');
test(testcase, function(t) { t.equal(address.street, 'main st', 'parsed street');
var query_string = ''; t.deepEqual(address.regions, ['new york'], 'parsed city');
for (var k in query) { t.equal(address.state , 'NY', 'parsed state');
query_string += ' ' + query[k]; t.equal(address.postalcode, 10010, 'parsed zip');
} t.end();
});
// remove leading whitespace
query_string = query_string.substring(1);
var address = parser.get_parsed_address(query_string);
t.equal(typeof address, 'object', 'valid object for the address ('+query_string+')');
if (!hasNumber && !hasZip && query.non_street) {
t.equal(address.regions.join(''), query_string, 'expected parsing result');
} else {
t.equal(address.regions.join(''), query.city, 'city in regions (' + query.city +')');
}
if ((hasNumber || hasZip) && query.street) {
t.equal(typeof address.number, 'number', 'valid house number format (' + address.number + ')');
t.equal(address.number, query.number, 'correct house number (' + query.number + ')');
t.equal(typeof address.street, 'string', 'valid street name format (' + address.street + ')');
t.equal(address.street, query.street, 'correct street name (' + query.street + ')');
}
if (hasZip) {
t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')');
t.equal(address.postalcode, query.zip, 'correct postal code (' + query.zip + ')');
}
t.end();
});
};
for (var key in addresses_nonum) {
testParse( addresses_nonum[key] );
}
for (key in address_with_num) {
testParse( address_with_num[key], true );
}
for (key in address_with_zip) {
testParse( address_with_zip[key], true, true );
}
}; };
module.exports.all = function (tape, common) { module.exports.all = function (tape, common) {
function test(name, testFunction) { function test(name, testFunction) {

Loading…
Cancel
Save