Browse Source

Remove admin matching when address is not parsed

pull/291/head
Diana Shkolnikov 9 years ago
parent
commit
553f9780c5
  1. 9
      helper/query_parser.js
  2. 19
      middleware/distance.js
  3. 2
      query/defaults.js
  4. 4
      routes/v1.js
  5. 5
      sanitiser/_text.js
  6. 2
      test/unit/fixture/autocomplete_linguistic_focus.js
  7. 2
      test/unit/fixture/autocomplete_linguistic_focus_null_island.js
  8. 2
      test/unit/fixture/search_linguistic_focus.js
  9. 2
      test/unit/fixture/search_linguistic_focus_bbox.js
  10. 2
      test/unit/fixture/search_linguistic_focus_null_island.js
  11. 162
      test/unit/helper/query_parser.js

9
helper/query_parser.js

@ -3,6 +3,8 @@ var parser = require('addressit');
var extend = require('extend'); var extend = require('extend');
var layers_map = require('../query/layers'); var layers_map = require('../query/layers');
var delim = ','; var delim = ',';
var check = require('check-types');
var logger = require('pelias-logger').get('api');
module.exports = {}; module.exports = {};
@ -61,5 +63,12 @@ module.exports.get_parsed_address = function get_parsed_address(query) {
} }
}); });
// if all we found was regions, ignore it as it is not enough information to make smarter decisions
if (Object.keys(parsed_text).length === 1 && !check.undefined(parsed_text.regions))
{
logger.info('Ignoring address parser output, regions only');
return null;
}
return parsed_text; return parsed_text;
}; };

19
middleware/distance.js

@ -2,26 +2,31 @@ var geolib = require('geolib');
var check = require('check-types'); var check = require('check-types');
function setup() { function setup(prefix) {
return computeDistances; return function (req, res, next) {
var opts = {
prefix: prefix || 'point.'
};
return computeDistances(req, res, next, opts);
};
} }
function computeDistances(req, res, next) { function computeDistances(req, res, next, opts) {
// do nothing if no result data set // do nothing if no result data set
if (!res || !res.data) { if (!res || !res.data) {
return next(); return next();
} }
if (!(check.number(req.clean['point.lat']) && if (!(check.number(req.clean[opts.prefix + 'lat']) &&
check.number(req.clean['point.lon']))) { check.number(req.clean[opts.prefix + 'lon']))) {
return next(); return next();
} }
var point = { var point = {
latitude: req.clean['point.lat'], latitude: req.clean[opts.prefix + 'lat'],
longitude: req.clean['point.lon'] longitude: req.clean[opts.prefix + 'lon']
}; };
res.data.forEach(function (place) { res.data.forEach(function (place) {

2
query/defaults.js

@ -31,7 +31,7 @@ module.exports = extend( false, peliasQuery.defaults, {
'focus:function': 'linear', 'focus:function': 'linear',
'focus:offset': '1km', 'focus:offset': '1km',
'focus:scale': '50km', 'focus:scale': '100km',
'focus:decay': 0.5, 'focus:decay': 0.5,
'function_score:score_mode': 'avg', 'function_score:score_mode': 'avg',

4
routes/v1.js

@ -58,6 +58,7 @@ function addRoutes(app, peliasConfig) {
sanitisers.search.middleware, sanitisers.search.middleware,
middleware.types, middleware.types,
controllers.search(), controllers.search(),
postProc.distances('focus.point.'),
postProc.confidenceScores(peliasConfig), postProc.confidenceScores(peliasConfig),
postProc.renamePlacenames(), postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig, base), postProc.geocodeJSON(peliasConfig, base),
@ -67,6 +68,7 @@ function addRoutes(app, peliasConfig) {
sanitisers.autocomplete.middleware, sanitisers.autocomplete.middleware,
middleware.types, middleware.types,
controllers.search(null, require('../query/autocomplete')), controllers.search(null, require('../query/autocomplete')),
postProc.distances('focus.point.'),
postProc.confidenceScores(peliasConfig), postProc.confidenceScores(peliasConfig),
postProc.renamePlacenames(), postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig, base), postProc.geocodeJSON(peliasConfig, base),
@ -76,7 +78,7 @@ function addRoutes(app, peliasConfig) {
sanitisers.reverse.middleware, sanitisers.reverse.middleware,
middleware.types, middleware.types,
controllers.search(undefined, reverseQuery), controllers.search(undefined, reverseQuery),
postProc.distances(), postProc.distances('point.'),
// reverse confidence scoring depends on distance from origin // reverse confidence scoring depends on distance from origin
// so it must be calculated first // so it must be calculated first
postProc.confidenceScoresReverse(), postProc.confidenceScoresReverse(),

5
sanitiser/_text.js

@ -19,7 +19,10 @@ function sanitize( raw, clean ){
clean.text = raw.text; clean.text = raw.text;
// parse text with query parser // parse text with query parser
clean.parsed_text = query_parser.get_parsed_address(clean.text); var parsed_text = query_parser.get_parsed_address(clean.text);
if (check.assigned(parsed_text)) {
clean.parsed_text = parsed_text;
}
// try to set layers from query parser results // try to set layers from query parser results
clean.types = clean.layers || {}; clean.types = clean.layers || {};

2
test/unit/fixture/autocomplete_linguistic_focus.js

@ -44,7 +44,7 @@ module.exports = {
'lon': -82.50622 'lon': -82.50622
}, },
'offset': '1km', 'offset': '1km',
'scale': '50km', 'scale': '100km',
'decay': 0.5 'decay': 0.5
} }
} }

2
test/unit/fixture/autocomplete_linguistic_focus_null_island.js

@ -44,7 +44,7 @@ module.exports = {
'lon': 0 'lon': 0
}, },
'offset': '1km', 'offset': '1km',
'scale': '50km', 'scale': '100km',
'decay': 0.5 'decay': 0.5
} }
} }

2
test/unit/fixture/search_linguistic_focus.js

@ -44,7 +44,7 @@ module.exports = {
'lon': -82.50622 'lon': -82.50622
}, },
'offset': '1km', 'offset': '1km',
'scale': '50km', 'scale': '100km',
'decay': 0.5 'decay': 0.5
} }
} }

2
test/unit/fixture/search_linguistic_focus_bbox.js

@ -44,7 +44,7 @@ module.exports = {
'lon': -82.50622 'lon': -82.50622
}, },
'offset': '1km', 'offset': '1km',
'scale': '50km', 'scale': '100km',
'decay': 0.5 'decay': 0.5
} }
} }

2
test/unit/fixture/search_linguistic_focus_null_island.js

@ -44,7 +44,7 @@ module.exports = {
'lon': 0 'lon': 0
}, },
'offset': '1km', 'offset': '1km',
'scale': '50km', 'scale': '100km',
'decay': 0.5 'decay': 0.5
} }
} }

162
test/unit/helper/query_parser.js

@ -13,26 +13,22 @@ module.exports.tests.interface = function(test, common) {
}; };
module.exports.tests.split_on_comma = function(test, common) { module.exports.tests.split_on_comma = function(test, common) {
var queries = ['soho, new york', 'chelsea, london', '123 main, new york']; var queries = [
var delim = ','; { name: 'soho', admin_parts: 'new york' },
{ name: 'chelsea', admin_parts: 'london' },
{ name: '123 main', admin_parts: 'new york' }
];
var testParse = function(query) { queries.forEach(function (query) {
test('naive parsing ' + query, function(t) { test('naive parsing ' + query, function(t) {
var address = parser.get_parsed_address(query); var address = parser.get_parsed_address(query.name + ', ' + query.admin_parts);
var delimIndex = query.indexOf(delim);
var name = query.substring(0, delimIndex);
var admin_parts = query.substring(delimIndex + 1).trim();
t.equal(typeof address, 'object', 'valid object'); t.equal(typeof address, 'object', 'valid object');
t.equal(address.name, name, 'name set correctly to ' + address.name); t.equal(address.name, query.name, 'name set correctly to ' + address.name);
t.equal(address.admin_parts, admin_parts, 'admin_parts set correctly to ' + address.admin_parts); t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts);
t.end(); t.end();
}); });
}; });
for (var key in queries) {
testParse( queries[key] );
}
}; };
module.exports.tests.parse_three_chars_or_less = function(test, common) { module.exports.tests.parse_three_chars_or_less = function(test, common) {
@ -40,7 +36,8 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) {
var num_queries = ['1', '12', '123']; var num_queries = ['1', '12', '123'];
var alphanum_q = ['a1', '1a2', '12c']; var alphanum_q = ['a1', '1a2', '12c'];
var testParse = function(query) { var queries = chars_queries.concat(num_queries).concat(alphanum_q);
queries.forEach(function(query) {
test('query length < 3 (' + query + ')', function(t) { test('query length < 3 (' + query + ')', function(t) {
var address = parser.get_parsed_address(query); var address = parser.get_parsed_address(query);
var target_layer = layers_map.coarse; var target_layer = layers_map.coarse;
@ -50,111 +47,64 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) {
t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', '));
t.end(); t.end();
}); });
}; });
var queries = chars_queries.concat(num_queries).concat(alphanum_q);
for (var key in queries) {
testParse( queries[key] );
}
}; };
module.exports.tests.parse_one_or_more_tokens = function(test, common) { module.exports.tests.parse_one_token = function(test, common) {
var one_token_queries = ['hyderbad', 'yugoslavia', 'somethingreallybigbutjustonetokenstill']; test('query with one token', function (t) {
var two_tokens_nonum = ['small town', 'biggg city', 'another empire']; var address = parser.get_parsed_address('yugolsavia');
var two_tokens_withnum= ['123 main', 'sixty 1', '123-980 house']; t.equal(address, null, 'nothing address specific detected');
t.end();
// parse address is now always true to fix pelias/api#194 });
var testParse = function(query, parse_address) { test('query with two tokens, no numbers', function (t) {
test('query with one or more tokens (' + query + ')', function(t) { var address = parser.get_parsed_address('small town');
var address = parser.get_parsed_address(query); t.equal(address, null, 'nothing address specific detected');
var target_layer = layers_map.coarse.concat(layers_map.venue); t.end();
var layers = parser.get_layers(query); });
test('query with two tokens, number first', function (t) {
t.equal(typeof address, 'object', 'valid object'); var address = parser.get_parsed_address('123 main');
t.equal(address, null, 'nothing address specific detected');
if (parse_address) { t.end();
t.deepEqual(address.regions.join(''), query, 'since query contained a number, it went through address parsing'); });
} else { test('query with two tokens, number second', function (t) {
t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); var address = parser.get_parsed_address('main 123');
} t.equal(address, null, 'nothing address specific detected');
t.end();
});
test('query with many tokens', function(t) {
var address = parser.get_parsed_address('main particle new york');
t.equal(address, null, 'nothing address specific detected');
t.end(); t.end();
}); });
};
var queries = one_token_queries.concat(two_tokens_nonum);
for (var key in queries) {
testParse( queries[key], true );
}
for (key in two_tokens_withnum) {
testParse( two_tokens_withnum[key], true );
}
}; };
module.exports.tests.parse_address = function(test, common) { module.exports.tests.parse_address = function(test, common) {
var addresses_nonum = [{ non_street: 'main particle', city: 'new york'}, test('valid address, house number', function(t) {
{ non_street: 'biggg city block' }, var query_string = '123 main st new york ny';
{ non_street: 'the empire state building' }
];
var address_with_num = [{ number: 123, street: 'main st', city: 'new york', state: 'ny'},
{ number: 456, street: 'pine ave', city: 'san francisco', state: 'CA'},
{ number: 1980, street: 'house st', city: 'hoboken', state: 'NY'}
];
var address_with_zip = [{ number: 1, street: 'main st', city: 'new york', state: 'ny', zip: 10010},
{ number: 4, street: 'ape ave', city: 'san diego', state: 'CA', zip: 98970},
{ number: 19, street: 'house dr', city: 'houston', state: 'TX', zip: 79089}
];
var testParse = function(query, hasNumber, hasZip) {
var testcase = 'parse query with ' + (hasNumber ? 'a house number ': 'no house number ');
testcase += 'and ' + (hasZip ? 'a zip ' : 'no zip ');
test(testcase, function(t) {
var query_string = '';
for (var k in query) {
query_string += ' ' + query[k];
}
// remove leading whitespace
query_string = query_string.substring(1);
var address = parser.get_parsed_address(query_string); var address = parser.get_parsed_address(query_string);
t.equal(typeof address, 'object', 'valid object for the address ('+query_string+')'); t.equal(typeof address, 'object', 'valid object for the address');
t.equal(address.number, 123, 'parsed house number');
if (!hasNumber && !hasZip && query.non_street) { t.equal(address.street, 'main st', 'parsed street');
t.equal(address.regions.join(''), query_string, 'expected parsing result'); t.deepEqual(address.regions, ['new york'], 'parsed city');
} else { t.equal(address.state , 'NY', 'parsed state');
t.equal(address.regions.join(''), query.city, 'city in regions (' + query.city +')');
}
if ((hasNumber || hasZip) && query.street) {
t.equal(typeof address.number, 'number', 'valid house number format (' + address.number + ')');
t.equal(address.number, query.number, 'correct house number (' + query.number + ')');
t.equal(typeof address.street, 'string', 'valid street name format (' + address.street + ')');
t.equal(address.street, query.street, 'correct street name (' + query.street + ')');
}
if (hasZip) {
t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')');
t.equal(address.postalcode, query.zip, 'correct postal code (' + query.zip + ')');
}
t.end(); t.end();
}); });
}; test('valid address, zipcode', function(t) {
var query_string = '123 main st new york ny 10010';
var address = parser.get_parsed_address(query_string);
for (var key in addresses_nonum) { t.equal(typeof address, 'object', 'valid object for the address');
testParse( addresses_nonum[key] ); t.equal(address.number, 123, 'parsed house number');
} t.equal(address.street, 'main st', 'parsed street');
for (key in address_with_num) { t.deepEqual(address.regions, ['new york'], 'parsed city');
testParse( address_with_num[key], true ); t.equal(address.state , 'NY', 'parsed state');
} t.equal(address.postalcode, 10010, 'parsed zip');
for (key in address_with_zip) { t.end();
testParse( address_with_zip[key], true, true ); });
}
}; };
module.exports.all = function (tape, common) { module.exports.all = function (tape, common) {
function test(name, testFunction) { function test(name, testFunction) {

Loading…
Cancel
Save