Browse Source

Merge pull request #502 from pelias/pelias-api-218

Fix address parsing without spaces after delimiters
pull/508/head
Diana Shkolnikov 9 years ago
parent
commit
c1b75ee72d
  1. 26
      helper/text_parser.js
  2. 20
      test/unit/helper/text_parser.js

26
helper/text_parser.js

@ -2,11 +2,10 @@
var parser = require('addressit');
var extend = require('extend');
var type_mapping = require('../helper/type_mapping');
var delim = ',';
var check = require('check-types');
var logger = require('pelias-logger').get('api');
module.exports = {};
var DELIM = ',';
/*
* For performance, and to prefer POI and admin records, express a preference
@ -21,14 +20,21 @@ module.exports.get_layers = function get_layers(query) {
module.exports.get_parsed_address = function get_parsed_address(query) {
var getAdminPartsBySplittingOnDelim = function(query) {
var getAdminPartsBySplittingOnDelim = function(queryParts) {
// naive approach - for admin matching during query time
// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
var delimIndex = query.indexOf(delim);
var address = {};
if ( delimIndex !== -1 ) {
address.name = query.substring(0, delimIndex);
address.admin_parts = query.substring(delimIndex + 1).trim();
if (queryParts.length > 1) {
address.name = queryParts[0].trim();
// 1. slice away all parts after the first one
// 2. trim spaces from each part just in case
// 3. join the parts back together with appropriate delimiter and spacing
address.admin_parts = queryParts.slice(1)
.map(function (part) { return part.trim(); })
.join(DELIM + ' ');
}
return address;
@ -42,8 +48,10 @@ module.exports.get_parsed_address = function get_parsed_address(query) {
}
};
var addressWithAdminParts = getAdminPartsBySplittingOnDelim(query);
var addressWithAddressParts= getAddressParts(query);
var queryParts = query.split(DELIM);
var addressWithAdminParts = getAdminPartsBySplittingOnDelim(queryParts);
var addressWithAddressParts= getAddressParts(queryParts.join(DELIM + ' '));
var parsedAddress = extend(addressWithAdminParts,
addressWithAddressParts);

20
test/unit/helper/text_parser.js

@ -29,6 +29,15 @@ module.exports.tests.split_on_comma = function(test, common) {
t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts);
t.end();
});
test('naive parsing ' + query + 'without spaces', function(t) {
var address = parser.get_parsed_address(query.name + ',' + query.admin_parts);
t.equal(typeof address, 'object', 'valid object');
t.equal(address.name, query.name, 'name set correctly to ' + address.name);
t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts);
t.end();
});
});
};
@ -115,6 +124,17 @@ module.exports.tests.parse_address = function(test, common) {
t.equal(address.postalcode, '06410', 'parsed zip');
t.end();
});
test('valid address without spaces after commas', function(t) {
var query_string = '339 W Main St,Lancaster,PA';
var address = parser.get_parsed_address(query_string);
t.equal(typeof address, 'object', 'valid object for the address');
t.equal(address.number, '339', 'parsed house number');
t.equal(address.street, 'W Main St', 'parsed street');
t.deepEqual(address.regions, ['Lancaster'], 'parsed city');
t.deepEqual(address.state, 'PA', 'parsed state');
t.end();
});
};

Loading…
Cancel
Save