Browse Source

Remove admin matching when address is not parsed

pull/291/head
Diana Shkolnikov 9 years ago
parent
commit
553f9780c5
  1. 9
      helper/query_parser.js
  2. 19
      middleware/distance.js
  3. 2
      query/defaults.js
  4. 4
      routes/v1.js
  5. 5
      sanitiser/_text.js
  6. 2
      test/unit/fixture/autocomplete_linguistic_focus.js
  7. 2
      test/unit/fixture/autocomplete_linguistic_focus_null_island.js
  8. 2
      test/unit/fixture/search_linguistic_focus.js
  9. 2
      test/unit/fixture/search_linguistic_focus_bbox.js
  10. 2
      test/unit/fixture/search_linguistic_focus_null_island.js
  11. 162
      test/unit/helper/query_parser.js

9
helper/query_parser.js

@ -3,6 +3,8 @@ var parser = require('addressit');
var extend = require('extend');
var layers_map = require('../query/layers');
var delim = ',';
var check = require('check-types');
var logger = require('pelias-logger').get('api');
module.exports = {};
@ -61,5 +63,12 @@ module.exports.get_parsed_address = function get_parsed_address(query) {
}
});
// if all we found was regions, ignore it as it is not enough information to make smarter decisions
if (Object.keys(parsed_text).length === 1 && !check.undefined(parsed_text.regions))
{
logger.info('Ignoring address parser output, regions only');
return null;
}
return parsed_text;
};

19
middleware/distance.js

@ -2,26 +2,31 @@ var geolib = require('geolib');
var check = require('check-types');
function setup() {
function setup(prefix) {
return computeDistances;
return function (req, res, next) {
var opts = {
prefix: prefix || 'point.'
};
return computeDistances(req, res, next, opts);
};
}
function computeDistances(req, res, next) {
function computeDistances(req, res, next, opts) {
// do nothing if no result data set
if (!res || !res.data) {
return next();
}
if (!(check.number(req.clean['point.lat']) &&
check.number(req.clean['point.lon']))) {
if (!(check.number(req.clean[opts.prefix + 'lat']) &&
check.number(req.clean[opts.prefix + 'lon']))) {
return next();
}
var point = {
latitude: req.clean['point.lat'],
longitude: req.clean['point.lon']
latitude: req.clean[opts.prefix + 'lat'],
longitude: req.clean[opts.prefix + 'lon']
};
res.data.forEach(function (place) {

2
query/defaults.js

@ -31,7 +31,7 @@ module.exports = extend( false, peliasQuery.defaults, {
'focus:function': 'linear',
'focus:offset': '1km',
'focus:scale': '50km',
'focus:scale': '100km',
'focus:decay': 0.5,
'function_score:score_mode': 'avg',

4
routes/v1.js

@ -58,6 +58,7 @@ function addRoutes(app, peliasConfig) {
sanitisers.search.middleware,
middleware.types,
controllers.search(),
postProc.distances('focus.point.'),
postProc.confidenceScores(peliasConfig),
postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig, base),
@ -67,6 +68,7 @@ function addRoutes(app, peliasConfig) {
sanitisers.autocomplete.middleware,
middleware.types,
controllers.search(null, require('../query/autocomplete')),
postProc.distances('focus.point.'),
postProc.confidenceScores(peliasConfig),
postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig, base),
@ -76,7 +78,7 @@ function addRoutes(app, peliasConfig) {
sanitisers.reverse.middleware,
middleware.types,
controllers.search(undefined, reverseQuery),
postProc.distances(),
postProc.distances('point.'),
// reverse confidence scoring depends on distance from origin
// so it must be calculated first
postProc.confidenceScoresReverse(),

5
sanitiser/_text.js

@ -19,7 +19,10 @@ function sanitize( raw, clean ){
clean.text = raw.text;
// parse text with query parser
clean.parsed_text = query_parser.get_parsed_address(clean.text);
var parsed_text = query_parser.get_parsed_address(clean.text);
if (check.assigned(parsed_text)) {
clean.parsed_text = parsed_text;
}
// try to set layers from query parser results
clean.types = clean.layers || {};

2
test/unit/fixture/autocomplete_linguistic_focus.js

@ -44,7 +44,7 @@ module.exports = {
'lon': -82.50622
},
'offset': '1km',
'scale': '50km',
'scale': '100km',
'decay': 0.5
}
}

2
test/unit/fixture/autocomplete_linguistic_focus_null_island.js

@ -44,7 +44,7 @@ module.exports = {
'lon': 0
},
'offset': '1km',
'scale': '50km',
'scale': '100km',
'decay': 0.5
}
}

2
test/unit/fixture/search_linguistic_focus.js

@ -44,7 +44,7 @@ module.exports = {
'lon': -82.50622
},
'offset': '1km',
'scale': '50km',
'scale': '100km',
'decay': 0.5
}
}

2
test/unit/fixture/search_linguistic_focus_bbox.js

@ -44,7 +44,7 @@ module.exports = {
'lon': -82.50622
},
'offset': '1km',
'scale': '50km',
'scale': '100km',
'decay': 0.5
}
}

2
test/unit/fixture/search_linguistic_focus_null_island.js

@ -44,7 +44,7 @@ module.exports = {
'lon': 0
},
'offset': '1km',
'scale': '50km',
'scale': '100km',
'decay': 0.5
}
}

162
test/unit/helper/query_parser.js

@ -13,26 +13,22 @@ module.exports.tests.interface = function(test, common) {
};
module.exports.tests.split_on_comma = function(test, common) {
var queries = ['soho, new york', 'chelsea, london', '123 main, new york'];
var delim = ',';
var queries = [
{ name: 'soho', admin_parts: 'new york' },
{ name: 'chelsea', admin_parts: 'london' },
{ name: '123 main', admin_parts: 'new york' }
];
var testParse = function(query) {
queries.forEach(function (query) {
test('naive parsing ' + query, function(t) {
var address = parser.get_parsed_address(query);
var delimIndex = query.indexOf(delim);
var name = query.substring(0, delimIndex);
var admin_parts = query.substring(delimIndex + 1).trim();
var address = parser.get_parsed_address(query.name + ', ' + query.admin_parts);
t.equal(typeof address, 'object', 'valid object');
t.equal(address.name, name, 'name set correctly to ' + address.name);
t.equal(address.admin_parts, admin_parts, 'admin_parts set correctly to ' + address.admin_parts);
t.equal(address.name, query.name, 'name set correctly to ' + address.name);
t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts);
t.end();
});
};
for (var key in queries) {
testParse( queries[key] );
}
});
};
module.exports.tests.parse_three_chars_or_less = function(test, common) {
@ -40,7 +36,8 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) {
var num_queries = ['1', '12', '123'];
var alphanum_q = ['a1', '1a2', '12c'];
var testParse = function(query) {
var queries = chars_queries.concat(num_queries).concat(alphanum_q);
queries.forEach(function(query) {
test('query length < 3 (' + query + ')', function(t) {
var address = parser.get_parsed_address(query);
var target_layer = layers_map.coarse;
@ -50,111 +47,64 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) {
t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', '));
t.end();
});
};
var queries = chars_queries.concat(num_queries).concat(alphanum_q);
for (var key in queries) {
testParse( queries[key] );
}
});
};
module.exports.tests.parse_one_or_more_tokens = function(test, common) {
var one_token_queries = ['hyderbad', 'yugoslavia', 'somethingreallybigbutjustonetokenstill'];
var two_tokens_nonum = ['small town', 'biggg city', 'another empire'];
var two_tokens_withnum= ['123 main', 'sixty 1', '123-980 house'];
// parse address is now always true to fix pelias/api#194
var testParse = function(query, parse_address) {
test('query with one or more tokens (' + query + ')', function(t) {
var address = parser.get_parsed_address(query);
var target_layer = layers_map.coarse.concat(layers_map.venue);
var layers = parser.get_layers(query);
t.equal(typeof address, 'object', 'valid object');
if (parse_address) {
t.deepEqual(address.regions.join(''), query, 'since query contained a number, it went through address parsing');
} else {
t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', '));
}
module.exports.tests.parse_one_token = function(test, common) {
test('query with one token', function (t) {
var address = parser.get_parsed_address('yugolsavia');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
test('query with two tokens, no numbers', function (t) {
var address = parser.get_parsed_address('small town');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
test('query with two tokens, number first', function (t) {
var address = parser.get_parsed_address('123 main');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
test('query with two tokens, number second', function (t) {
var address = parser.get_parsed_address('main 123');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
test('query with many tokens', function(t) {
var address = parser.get_parsed_address('main particle new york');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
};
var queries = one_token_queries.concat(two_tokens_nonum);
for (var key in queries) {
testParse( queries[key], true );
}
for (key in two_tokens_withnum) {
testParse( two_tokens_withnum[key], true );
}
};
module.exports.tests.parse_address = function(test, common) {
var addresses_nonum = [{ non_street: 'main particle', city: 'new york'},
{ non_street: 'biggg city block' },
{ non_street: 'the empire state building' }
];
var address_with_num = [{ number: 123, street: 'main st', city: 'new york', state: 'ny'},
{ number: 456, street: 'pine ave', city: 'san francisco', state: 'CA'},
{ number: 1980, street: 'house st', city: 'hoboken', state: 'NY'}
];
var address_with_zip = [{ number: 1, street: 'main st', city: 'new york', state: 'ny', zip: 10010},
{ number: 4, street: 'ape ave', city: 'san diego', state: 'CA', zip: 98970},
{ number: 19, street: 'house dr', city: 'houston', state: 'TX', zip: 79089}
];
var testParse = function(query, hasNumber, hasZip) {
var testcase = 'parse query with ' + (hasNumber ? 'a house number ': 'no house number ');
testcase += 'and ' + (hasZip ? 'a zip ' : 'no zip ');
test(testcase, function(t) {
var query_string = '';
for (var k in query) {
query_string += ' ' + query[k];
}
// remove leading whitespace
query_string = query_string.substring(1);
test('valid address, house number', function(t) {
var query_string = '123 main st new york ny';
var address = parser.get_parsed_address(query_string);
t.equal(typeof address, 'object', 'valid object for the address ('+query_string+')');
if (!hasNumber && !hasZip && query.non_street) {
t.equal(address.regions.join(''), query_string, 'expected parsing result');
} else {
t.equal(address.regions.join(''), query.city, 'city in regions (' + query.city +')');
}
if ((hasNumber || hasZip) && query.street) {
t.equal(typeof address.number, 'number', 'valid house number format (' + address.number + ')');
t.equal(address.number, query.number, 'correct house number (' + query.number + ')');
t.equal(typeof address.street, 'string', 'valid street name format (' + address.street + ')');
t.equal(address.street, query.street, 'correct street name (' + query.street + ')');
}
if (hasZip) {
t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')');
t.equal(address.postalcode, query.zip, 'correct postal code (' + query.zip + ')');
}
t.equal(typeof address, 'object', 'valid object for the address');
t.equal(address.number, 123, 'parsed house number');
t.equal(address.street, 'main st', 'parsed street');
t.deepEqual(address.regions, ['new york'], 'parsed city');
t.equal(address.state , 'NY', 'parsed state');
t.end();
});
};
test('valid address, zipcode', function(t) {
var query_string = '123 main st new york ny 10010';
var address = parser.get_parsed_address(query_string);
for (var key in addresses_nonum) {
testParse( addresses_nonum[key] );
}
for (key in address_with_num) {
testParse( address_with_num[key], true );
}
for (key in address_with_zip) {
testParse( address_with_zip[key], true, true );
}
t.equal(typeof address, 'object', 'valid object for the address');
t.equal(address.number, 123, 'parsed house number');
t.equal(address.street, 'main st', 'parsed street');
t.deepEqual(address.regions, ['new york'], 'parsed city');
t.equal(address.state , 'NY', 'parsed state');
t.equal(address.postalcode, 10010, 'parsed zip');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {

Loading…
Cancel
Save