mirror of https://github.com/pelias/api.git
Stephen Hess
9 years ago
8 changed files with 9 additions and 247 deletions
@ -1,86 +0,0 @@ |
|||||||
|
|
||||||
var parser = require('addressit'); |
|
||||||
var extend = require('extend'); |
|
||||||
var type_mapping = require('../helper/type_mapping'); |
|
||||||
var check = require('check-types'); |
|
||||||
var logger = require('pelias-logger').get('api'); |
|
||||||
|
|
||||||
var DELIM = ','; |
|
||||||
|
|
||||||
/* |
|
||||||
* For performance, and to prefer POI and admin records, express a preference |
|
||||||
* to only search coarse layers on very short text inputs. |
|
||||||
*/ |
|
||||||
module.exports.get_layers = function get_layers(query) { |
|
||||||
if (query.length <= 3 ) { |
|
||||||
// no address parsing required
|
|
||||||
return type_mapping.layer_mapping.coarse; |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
module.exports.get_parsed_address = function get_parsed_address(query) { |
|
||||||
|
|
||||||
var getAdminPartsBySplittingOnDelim = function(queryParts) { |
|
||||||
// naive approach - for admin matching during query time
|
|
||||||
// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
|
|
||||||
|
|
||||||
var address = {}; |
|
||||||
|
|
||||||
if (queryParts.length > 1) { |
|
||||||
address.name = queryParts[0].trim(); |
|
||||||
|
|
||||||
// 1. slice away all parts after the first one
|
|
||||||
// 2. trim spaces from each part just in case
|
|
||||||
// 3. join the parts back together with appropriate delimiter and spacing
|
|
||||||
address.admin_parts = queryParts.slice(1) |
|
||||||
.map(function (part) { return part.trim(); }) |
|
||||||
.join(DELIM + ' '); |
|
||||||
} |
|
||||||
|
|
||||||
return address; |
|
||||||
}; |
|
||||||
|
|
||||||
var getAddressParts = function(query) { |
|
||||||
// perform full address parsing
|
|
||||||
// except on queries so short they obviously can't contain an address
|
|
||||||
if (query.length > 3) { |
|
||||||
return parser( query ); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
var queryParts = query.split(DELIM); |
|
||||||
|
|
||||||
var addressWithAdminParts = getAdminPartsBySplittingOnDelim(queryParts); |
|
||||||
var addressWithAddressParts= getAddressParts(queryParts.join(DELIM + ' ')); |
|
||||||
|
|
||||||
var parsedAddress = extend(addressWithAdminParts, |
|
||||||
addressWithAddressParts); |
|
||||||
|
|
||||||
var address_parts = [ 'name', |
|
||||||
'number', |
|
||||||
'street', |
|
||||||
'city', |
|
||||||
'state', |
|
||||||
'country', |
|
||||||
'postalcode', |
|
||||||
'regions', |
|
||||||
'admin_parts' |
|
||||||
]; |
|
||||||
|
|
||||||
var parsed_text = {}; |
|
||||||
|
|
||||||
address_parts.forEach(function(part){ |
|
||||||
if (parsedAddress[part]) { |
|
||||||
parsed_text[part] = parsedAddress[part]; |
|
||||||
} |
|
||||||
}); |
|
||||||
|
|
||||||
// if all we found was regions, ignore it as it is not enough information to make smarter decisions
|
|
||||||
if (Object.keys(parsed_text).length === 1 && !check.undefined(parsed_text.regions)) |
|
||||||
{ |
|
||||||
logger.info('Ignoring address parser output, regions only'); |
|
||||||
return null; |
|
||||||
} |
|
||||||
|
|
||||||
return parsed_text; |
|
||||||
}; |
|
@ -1,150 +0,0 @@ |
|||||||
var parser = require('../../../helper/text_parser'); |
|
||||||
|
|
||||||
var type_mapping = require('../../../helper/type_mapping'); |
|
||||||
var layers_map = type_mapping.layer_mapping; |
|
||||||
|
|
||||||
module.exports.tests = {}; |
|
||||||
|
|
||||||
module.exports.tests.interface = function(test, common) { |
|
||||||
test('interface', function(t) { |
|
||||||
t.equal(typeof parser.get_parsed_address, 'function', 'valid function'); |
|
||||||
t.equal(typeof parser.get_layers, 'function', 'valid function'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
}; |
|
||||||
|
|
||||||
module.exports.tests.split_on_comma = function(test, common) { |
|
||||||
var queries = [ |
|
||||||
{ name: 'soho', admin_parts: 'new york' }, |
|
||||||
{ name: 'chelsea', admin_parts: 'london' }, |
|
||||||
{ name: '123 main', admin_parts: 'new york' } |
|
||||||
]; |
|
||||||
|
|
||||||
queries.forEach(function (query) { |
|
||||||
test('naive parsing ' + query, function(t) { |
|
||||||
var address = parser.get_parsed_address(query.name + ', ' + query.admin_parts); |
|
||||||
|
|
||||||
t.equal(typeof address, 'object', 'valid object'); |
|
||||||
t.equal(address.name, query.name, 'name set correctly to ' + address.name); |
|
||||||
t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
|
|
||||||
test('naive parsing ' + query + 'without spaces', function(t) { |
|
||||||
var address = parser.get_parsed_address(query.name + ',' + query.admin_parts); |
|
||||||
|
|
||||||
t.equal(typeof address, 'object', 'valid object'); |
|
||||||
t.equal(address.name, query.name, 'name set correctly to ' + address.name); |
|
||||||
t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
}); |
|
||||||
}; |
|
||||||
|
|
||||||
module.exports.tests.parse_three_chars_or_less = function(test, common) { |
|
||||||
var chars_queries = ['a', 'bb', 'ccc']; |
|
||||||
var num_queries = ['1', '12', '123']; |
|
||||||
var alphanum_q = ['a1', '1a2', '12c']; |
|
||||||
|
|
||||||
var queries = chars_queries.concat(num_queries).concat(alphanum_q); |
|
||||||
queries.forEach(function(query) { |
|
||||||
test('query length < 3 (' + query + ')', function(t) { |
|
||||||
var address = parser.get_parsed_address(query); |
|
||||||
var target_layer = layers_map.coarse; |
|
||||||
var layers = parser.get_layers(query); |
|
||||||
|
|
||||||
t.equal(typeof address, 'object', 'valid object'); |
|
||||||
t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
}); |
|
||||||
}; |
|
||||||
|
|
||||||
module.exports.tests.parse_one_token = function(test, common) { |
|
||||||
test('query with one token', function (t) { |
|
||||||
var address = parser.get_parsed_address('yugolsavia'); |
|
||||||
t.equal(address, null, 'nothing address specific detected'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
test('query with two tokens, no numbers', function (t) { |
|
||||||
var address = parser.get_parsed_address('small town'); |
|
||||||
t.equal(address, null, 'nothing address specific detected'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
test('query with two tokens, number first', function (t) { |
|
||||||
var address = parser.get_parsed_address('123 main'); |
|
||||||
t.equal(address, null, 'nothing address specific detected'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
test('query with two tokens, number second', function (t) { |
|
||||||
var address = parser.get_parsed_address('main 123'); |
|
||||||
t.equal(address, null, 'nothing address specific detected'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
test('query with many tokens', function(t) { |
|
||||||
var address = parser.get_parsed_address('main particle new york'); |
|
||||||
t.equal(address, null, 'nothing address specific detected'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
}; |
|
||||||
|
|
||||||
module.exports.tests.parse_address = function(test, common) { |
|
||||||
test('valid address, house number', function(t) { |
|
||||||
var query_string = '123 main st new york ny'; |
|
||||||
var address = parser.get_parsed_address(query_string); |
|
||||||
|
|
||||||
t.equal(typeof address, 'object', 'valid object for the address'); |
|
||||||
t.equal(address.number, '123', 'parsed house number'); |
|
||||||
t.equal(address.street, 'main st', 'parsed street'); |
|
||||||
t.deepEqual(address.regions, ['new york'], 'parsed city'); |
|
||||||
t.equal(address.state , 'NY', 'parsed state'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
test('valid address, zipcode', function(t) { |
|
||||||
var query_string = '123 main st new york ny 10010'; |
|
||||||
var address = parser.get_parsed_address(query_string); |
|
||||||
|
|
||||||
t.equal(typeof address, 'object', 'valid object for the address'); |
|
||||||
t.equal(address.number, '123', 'parsed house number'); |
|
||||||
t.equal(address.street, 'main st', 'parsed street'); |
|
||||||
t.deepEqual(address.regions, ['new york'], 'parsed city'); |
|
||||||
t.equal(address.state , 'NY', 'parsed state'); |
|
||||||
t.equal(address.postalcode, '10010', 'parsed zip is a string'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
test('valid address with leading 0s in zipcode', function(t) { |
|
||||||
var query_string = '339 W Main St, Cheshire, 06410'; |
|
||||||
var address = parser.get_parsed_address(query_string); |
|
||||||
|
|
||||||
console.log(address); |
|
||||||
|
|
||||||
t.equal(typeof address, 'object', 'valid object for the address'); |
|
||||||
t.equal(address.street, 'W Main St', 'parsed street'); |
|
||||||
t.deepEqual(address.regions, ['Cheshire'], 'parsed city'); |
|
||||||
t.equal(address.postalcode, '06410', 'parsed zip'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
test('valid address without spaces after commas', function(t) { |
|
||||||
var query_string = '339 W Main St,Lancaster,PA'; |
|
||||||
var address = parser.get_parsed_address(query_string); |
|
||||||
|
|
||||||
t.equal(typeof address, 'object', 'valid object for the address'); |
|
||||||
t.equal(address.number, '339', 'parsed house number'); |
|
||||||
t.equal(address.street, 'W Main St', 'parsed street'); |
|
||||||
t.deepEqual(address.regions, ['Lancaster'], 'parsed city'); |
|
||||||
t.deepEqual(address.state, 'PA', 'parsed state'); |
|
||||||
t.end(); |
|
||||||
}); |
|
||||||
}; |
|
||||||
|
|
||||||
|
|
||||||
module.exports.all = function (tape, common) { |
|
||||||
|
|
||||||
function test(name, testFunction) { |
|
||||||
return tape('QUERY PARSING: ' + name, testFunction); |
|
||||||
} |
|
||||||
|
|
||||||
for( var testCase in module.exports.tests ){ |
|
||||||
module.exports.tests[testCase](test, common); |
|
||||||
} |
|
||||||
}; |
|
Loading…
Reference in new issue