mirror of https://github.com/pelias/api.git
Harish Krishna
10 years ago
18 changed files with 788 additions and 60 deletions
@ -0,0 +1,13 @@
|
||||
/** |
||||
* These values specify how much a document that matches a certain _type |
||||
* should be boosted in elasticsearch results. |
||||
*/ |
||||
|
||||
module.exports = { |
||||
'admin0': 4, |
||||
'admin1': 3, |
||||
'admin2': 2, |
||||
'local_admin': 1, |
||||
'locality':1, |
||||
'neighborhood':1 |
||||
}; |
@ -0,0 +1,93 @@
|
||||
|
||||
var parser = require('addressit'); |
||||
var extend = require('extend'); |
||||
var get_layers = require('../helper/layers'); |
||||
var delim = ','; |
||||
|
||||
module.exports = function(query) { |
||||
|
||||
var tokenized = query.split(/[ ,]+/); |
||||
var hasNumber = /\d/.test(query); |
||||
|
||||
var getAdminPartsBySplittingOnDelim = function(query) { |
||||
// naive approach - for admin matching during query time
|
||||
// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
|
||||
var delimIndex = query.indexOf(delim); |
||||
var address = {}; |
||||
if ( delimIndex !== -1 ) { |
||||
address.name = query.substring(0, delimIndex); |
||||
address.admin_parts = query.substring(delimIndex + 1).trim(); |
||||
}
|
||||
|
||||
return address; |
||||
}; |
||||
|
||||
var getTargetLayersWhenAddressParsingIsNotNecessary = function(query) { |
||||
var address = {}; |
||||
// set target_layer if input length <= 3 characters
|
||||
if (query.length <= 3 ) { |
||||
// no address parsing required
|
||||
address.target_layer = get_layers(['admin']); |
||||
} else if (tokenized.length === 1 || (tokenized.length < 3 && !hasNumber)) { |
||||
// no need to hit address layers if there's only one (or two) token(s)
|
||||
address.target_layer = get_layers(['admin', 'poi']); |
||||
} |
||||
|
||||
return address.target_layer ? address : null; |
||||
}; |
||||
|
||||
var getAddressParts = function(query) { |
||||
// address parsing
|
||||
var address = parser( query ); |
||||
// set target_layer if input suggests no address
|
||||
if (address.text === address.regions.join(' ') && !hasNumber) { |
||||
address.target_layer = get_layers(['admin', 'poi']); |
||||
} |
||||
|
||||
return address; |
||||
}; |
||||
|
||||
var addressWithAdminParts = getAdminPartsBySplittingOnDelim(query); |
||||
var addressWithTargetLayers= getTargetLayersWhenAddressParsingIsNotNecessary(query); |
||||
var addressWithAddressParts= !addressWithTargetLayers ? getAddressParts(query) : {};
|
||||
|
||||
var parsedAddress = extend(addressWithAdminParts,
|
||||
addressWithTargetLayers,
|
||||
addressWithAddressParts); |
||||
|
||||
var address_parts = [ 'name', |
||||
'number', |
||||
'street', |
||||
'city', |
||||
'state', |
||||
'country', |
||||
'postalcode', |
||||
'regions', |
||||
'admin_parts', |
||||
'target_layer' |
||||
]; |
||||
|
||||
var parsed_input = {}; |
||||
|
||||
address_parts.forEach(function(part){
|
||||
if (parsedAddress[part]) { |
||||
parsed_input[part] = parsedAddress[part]; |
||||
} |
||||
}); |
||||
|
||||
return parsed_input; |
||||
}; |
||||
|
||||
|
||||
// parsed_input = {
|
||||
// name : parsedAddress.name,
|
||||
// number : parsedAddress.number,
|
||||
// street : parsedAddress.street,
|
||||
// city : parsedAddress.city,
|
||||
// state : parsedAddress.state,
|
||||
// country: parsedAddress.country,
|
||||
// postalcode : parsedAddress.postalcode,
|
||||
// regions: parsedAddress.regions,
|
||||
// admin_parts: parsedAddress.admin_parts,
|
||||
// target_layer: parsedAddress.target_layer
|
||||
// }
|
@ -0,0 +1,168 @@
|
||||
|
||||
var parser = require('../../../helper/query_parser'); |
||||
var get_layers = require('../../../helper/layers'); |
||||
|
||||
module.exports.tests = {}; |
||||
|
||||
module.exports.tests.interface = function(test, common) { |
||||
test('interface', function(t) { |
||||
t.equal(typeof parser, 'function', 'valid function'); |
||||
t.end(); |
||||
}); |
||||
}; |
||||
|
||||
module.exports.tests.split_on_comma = function(test, common) { |
||||
var queries = ['soho, new york', 'chelsea, london', '123 main, new york']; |
||||
var delim = ','; |
||||
|
||||
var testParse = function(query) { |
||||
test('naive parsing ' + query, function(t) { |
||||
var address = parser(query); |
||||
var delimIndex = query.indexOf(delim); |
||||
var name = query.substring(0, delimIndex); |
||||
var admin_parts = query.substring(delimIndex + 1).trim(); |
||||
|
||||
t.equal(typeof address, 'object', 'valid object'); |
||||
t.equal(address.name, name, 'name set correctly to ' + address.name); |
||||
t.equal(address.admin_parts, admin_parts, 'admin_parts set correctly to ' + address.admin_parts); |
||||
t.end(); |
||||
}); |
||||
}; |
||||
|
||||
for (var key in queries) { |
||||
testParse( queries[key] ); |
||||
} |
||||
}; |
||||
|
||||
module.exports.tests.parse_three_chars_or_less = function(test, common) { |
||||
var chars_queries = ['a', 'bb', 'ccc']; |
||||
var num_queries = ['1', '12', '123']; |
||||
var alphanum_q = ['a1', '1a2', '12c']; |
||||
|
||||
var testParse = function(query) { |
||||
test('query length < 3 (' + query + ')', function(t) { |
||||
var address = parser(query); |
||||
var target_layer = get_layers(['admin']); |
||||
|
||||
t.equal(typeof address, 'object', 'valid object'); |
||||
t.deepEqual(address.target_layer, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); |
||||
t.end(); |
||||
}); |
||||
}; |
||||
|
||||
var queries = chars_queries.concat(num_queries).concat(alphanum_q); |
||||
for (var key in queries) { |
||||
testParse( queries[key] ); |
||||
} |
||||
}; |
||||
|
||||
module.exports.tests.parse_one_or_more_tokens = function(test, common) { |
||||
var one_token_queries = ['hyderbad', 'yugoslavia', 'somethingreallybigbutjustonetokenstill']; |
||||
var two_tokens_nonum = ['small town', 'biggg city', 'another empire']; |
||||
var two_tokens_withnum= ['123 main', 'sixty 1', '123-980 house']; |
||||
|
||||
var testParse = function(query, parse_address) { |
||||
test('query with one or more tokens (' + query + ')', function(t) { |
||||
var address = parser(query); |
||||
var target_layer = get_layers(['admin', 'poi']); |
||||
|
||||
t.equal(typeof address, 'object', 'valid object'); |
||||
|
||||
if (parse_address) { |
||||
t.deepEqual(address.regions.join(''), query, 'since query contained a number, it went through address parsing'); |
||||
} else { |
||||
t.deepEqual(address.target_layer, target_layer, 'admin_parts set correctly to ' + target_layer.join(', '));
|
||||
} |
||||
|
||||
t.end(); |
||||
}); |
||||
}; |
||||
|
||||
var queries = one_token_queries.concat(two_tokens_nonum); |
||||
for (var key in queries) { |
||||
testParse( queries[key] ); |
||||
} |
||||
for (key in two_tokens_withnum) { |
||||
testParse( two_tokens_withnum[key], true ); |
||||
} |
||||
}; |
||||
|
||||
module.exports.tests.parse_address = function(test, common) { |
||||
var addresses_nonum = [{ non_street: 'main particle', city: 'new york'},
|
||||
{ non_street: 'biggg city block' },
|
||||
{ non_street: 'the empire state building' } |
||||
]; |
||||
var address_with_num = [{ number: 123, street: 'main st', city: 'new york', state: 'ny'},
|
||||
{ number: 456, street: 'pine ave', city: 'san francisco', state: 'CA'},
|
||||
{ number: 1980, street: 'house st', city: 'hoboken', state: 'NY'} |
||||
]; |
||||
var address_with_zip = [{ number: 1, street: 'main st', city: 'new york', state: 'ny', zip: 10010},
|
||||
{ number: 4, street: 'ape ave', city: 'san diego', state: 'CA', zip: 98970},
|
||||
{ number: 19, street: 'house dr', city: 'houston', state: 'TX', zip: 79089} |
||||
]; |
||||
|
||||
var testParse = function(query, hasNumber, hasZip) { |
||||
var testcase = 'parse query with ' + (hasNumber ? 'a house number ': 'no house number ');
|
||||
testcase += 'and ' + (hasZip ? 'a zip ' : 'no zip '); |
||||
|
||||
test(testcase, function(t) { |
||||
var query_string = ''; |
||||
for (var k in query) {
|
||||
query_string += ' ' + query[k]; |
||||
} |
||||
|
||||
// remove leading whitespace
|
||||
query_string = query_string.substring(1); |
||||
|
||||
var address = parser(query_string); |
||||
var non_address_layer = get_layers(['admin', 'poi']); |
||||
|
||||
t.equal(typeof address, 'object', 'valid object for the address ('+query_string+')'); |
||||
|
||||
if (!hasNumber && !hasZip && query.non_street) { |
||||
t.equal(address.regions.join(''), query_string, 'expected parsing result'); |
||||
} else { |
||||
t.equal(address.regions.join(''), query.city, 'city in regions (' + query.city +')'); |
||||
} |
||||
|
||||
if ((hasNumber || hasZip) && query.street) { |
||||
t.equal(typeof address.number, 'number', 'valid house number format (' + address.number + ')');
|
||||
t.equal(address.number, query.number, 'correct house number (' + query.number + ')'); |
||||
t.equal(typeof address.street, 'string', 'valid street name format (' + address.street + ')');
|
||||
t.equal(address.street, query.street, 'correct street name (' + query.street + ')'); |
||||
} |
||||
|
||||
if (hasZip) { |
||||
t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')');
|
||||
t.equal(address.postalcode, query.zip, 'correct postal code (' + query.zip + ')'); |
||||
} |
||||
|
||||
if (address.text === address.regions.join(' ')) { |
||||
t.deepEqual(address.target_layer, query.target_layer, 'admin_parts set correctly to ' + query.target_layer.join(', '));
|
||||
} |
||||
|
||||
t.end(); |
||||
}); |
||||
}; |
||||
|
||||
for (var key in addresses_nonum) { |
||||
testParse( addresses_nonum[key] ); |
||||
} |
||||
for (key in address_with_num) { |
||||
testParse( address_with_num[key], true ); |
||||
} |
||||
for (key in address_with_zip) { |
||||
testParse( address_with_zip[key], true, true ); |
||||
} |
||||
}; |
||||
|
||||
module.exports.all = function (tape, common) { |
||||
|
||||
function test(name, testFunction) { |
||||
return tape('QUERY PARSING: ' + name, testFunction); |
||||
} |
||||
|
||||
for( var testCase in module.exports.tests ){ |
||||
module.exports.tests[testCase](test, common); |
||||
} |
||||
}; |
@ -0,0 +1,29 @@
|
||||
|
||||
var input = require('../../../sanitiser/_input'), |
||||
parser = require('../../../helper/query_parser'), |
||||
delim = ',', |
||||
defaultError = 'invalid param \'input\': text length, must be >0', |
||||
allLayers = [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
|
||||
'locality', 'local_admin', 'osmaddress', 'openaddresses' ], |
||||
nonAddressLayers = [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
|
||||
'locality', 'local_admin' ], |
||||
defaultParsed= { target_layer: nonAddressLayers }, |
||||
defaultClean = { input: 'test',
|
||||
layers: allLayers,
|
||||
size: 10, |
||||
details: true, |
||||
parsed_input: defaultParsed, |
||||
lat:0, |
||||
lon:0 |
||||
}, |
||||
getTargetLayers = function(query) { |
||||
var address = parser(query); |
||||
return address.target_layer; |
||||
}; |
||||
|
||||
|
||||
module.exports = { |
||||
defaultParsed: defaultParsed, |
||||
defaultClean : defaultClean, |
||||
getTargetLayers: getTargetLayers |
||||
}; |
Loading…
Reference in new issue