diff --git a/helper/text_parser.js b/helper/text_parser.js index 7b9ffcfe..0db8bede 100644 --- a/helper/text_parser.js +++ b/helper/text_parser.js @@ -2,11 +2,10 @@ var parser = require('addressit'); var extend = require('extend'); var type_mapping = require('../helper/type_mapping'); -var delim = ','; var check = require('check-types'); var logger = require('pelias-logger').get('api'); -module.exports = {}; +var DELIM = ','; /* * For performance, and to prefer POI and admin records, express a preference @@ -21,14 +20,21 @@ module.exports.get_layers = function get_layers(query) { module.exports.get_parsed_address = function get_parsed_address(query) { - var getAdminPartsBySplittingOnDelim = function(query) { + var getAdminPartsBySplittingOnDelim = function(queryParts) { // naive approach - for admin matching during query time // split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny' - var delimIndex = query.indexOf(delim); + var address = {}; - if ( delimIndex !== -1 ) { - address.name = query.substring(0, delimIndex); - address.admin_parts = query.substring(delimIndex + 1).trim(); + + if (queryParts.length > 1) { + address.name = queryParts[0].trim(); + + // 1. slice away all parts after the first one + // 2. trim spaces from each part just in case + // 3. join the parts back together with appropriate delimiter and spacing + address.admin_parts = queryParts.slice(1) + .map(function (part) { return part.trim(); }) + .join(DELIM + ' '); } return address; @@ -42,8 +48,10 @@ module.exports.get_parsed_address = function get_parsed_address(query) { } }; - var addressWithAdminParts = getAdminPartsBySplittingOnDelim(query); - var addressWithAddressParts= getAddressParts(query); + var queryParts = query.split(DELIM); + + var addressWithAdminParts = getAdminPartsBySplittingOnDelim(queryParts); + var addressWithAddressParts= getAddressParts(queryParts.join(DELIM + ' ')); var parsedAddress = extend(addressWithAdminParts, addressWithAddressParts); diff --git a/test/unit/helper/text_parser.js b/test/unit/helper/text_parser.js index 90b8fa52..ca5b05f0 100644 --- a/test/unit/helper/text_parser.js +++ b/test/unit/helper/text_parser.js @@ -29,6 +29,15 @@ module.exports.tests.split_on_comma = function(test, common) { t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts); t.end(); }); + + test('naive parsing ' + query + 'without spaces', function(t) { + var address = parser.get_parsed_address(query.name + ',' + query.admin_parts); + + t.equal(typeof address, 'object', 'valid object'); + t.equal(address.name, query.name, 'name set correctly to ' + address.name); + t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts); + t.end(); + }); }); }; @@ -115,6 +124,17 @@ module.exports.tests.parse_address = function(test, common) { t.equal(address.postalcode, '06410', 'parsed zip'); t.end(); }); + test('valid address without spaces after commas', function(t) { + var query_string = '339 W Main St,Lancaster,PA'; + var address = parser.get_parsed_address(query_string); + + t.equal(typeof address, 'object', 'valid object for the address'); + t.equal(address.number, '339', 'parsed house number'); + t.equal(address.street, 'W Main St', 'parsed street'); + t.deepEqual(address.regions, ['Lancaster'], 'parsed city'); + t.deepEqual(address.state, 'PA', 'parsed state'); + t.end(); + }); };