|
|
|
const _ = require('lodash');
|
|
|
|
const text_analyzer = require('pelias-text-analyzer');
|
|
|
|
|
|
|
|
const fields = {
|
|
|
|
'venue': 'query',
|
|
|
|
'address': 'address',
|
|
|
|
'neighbourhood': 'neighbourhood',
|
|
|
|
'borough': 'borough',
|
|
|
|
'locality': 'city',
|
|
|
|
'county': 'county',
|
|
|
|
'region': 'state',
|
|
|
|
'postalcode': 'postalcode',
|
|
|
|
'country': 'country'
|
|
|
|
};
|
|
|
|
|
|
|
|
function normalizeWhitespaceToSingleSpace(val) {
|
|
|
|
return _.replace(_.trim(val), /\s+/g, ' ');
|
|
|
|
}
|
|
|
|
|
|
|
|
function isPostalCodeOnly(parsed_text) {
|
|
|
|
return Object.keys(parsed_text).length === 1 &&
|
|
|
|
parsed_text.hasOwnProperty('postalcode');
|
|
|
|
}
|
|
|
|
|
|
|
|
// figure out which field contains the probable house number, prefer number
|
|
|
|
// libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street
|
|
|
|
// so because we're treating the entire field as a street address, it's safe
|
|
|
|
// to assume that an identified postcode is actually a house number.
|
|
|
|
function getHouseNumberField(analyzed_address) {
|
|
|
|
// return the first field available in the libpostal response, undefined if none
|
|
|
|
return _.find(['number', 'postalcode'], _.partial(_.has, analyzed_address));
|
|
|
|
}
|
|
|
|
|
|
|
|
function _sanitize( raw, clean ){
|
|
|
|
|
|
|
|
// error & warning messages
|
|
|
|
const messages = { errors: [], warnings: [] };
|
|
|
|
|
|
|
|
// collect all the valid values into a single object
|
|
|
|
clean.parsed_text = Object.keys(fields).reduce( (o, f) => {
|
|
|
|
if (_.isString(raw[f]) && !_.isEmpty(_.trim(raw[f]))) {
|
|
|
|
o[fields[f]] = normalizeWhitespaceToSingleSpace(raw[f]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return o;
|
|
|
|
|
|
|
|
}, {});
|
|
|
|
|
|
|
|
if (_.isEmpty(Object.keys(clean.parsed_text))) {
|
|
|
|
messages.errors.push(
|
|
|
|
`at least one of the following fields is required: ${Object.keys(fields).join(', ')}`);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (clean.parsed_text.hasOwnProperty('address')) {
|
|
|
|
const analyzed_address = text_analyzer.parse(clean.parsed_text.address);
|
|
|
|
|
|
|
|
const house_number_field = getHouseNumberField(analyzed_address);
|
|
|
|
|
|
|
|
// if we're fairly certain that libpostal identified a house number
|
|
|
|
// (from either the house_number or postcode field), place it into the
|
|
|
|
// number field and remove the first instance of that value from address
|
|
|
|
// and assign to street
|
|
|
|
// eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St
|
|
|
|
if (house_number_field) {
|
|
|
|
clean.parsed_text.number = analyzed_address[house_number_field];
|
|
|
|
|
|
|
|
// remove the first instance of the number and trim whitespace
|
|
|
|
clean.parsed_text.street = _.trim(_.replace(clean.parsed_text.address, clean.parsed_text.number, ''));
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// otherwise no house number was identifiable, so treat the entire input
|
|
|
|
// as a street
|
|
|
|
clean.parsed_text.street = clean.parsed_text.address;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// the address field no longer means anything since it's been parsed, so remove it
|
|
|
|
delete clean.parsed_text.address;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return messages;
|
|
|
|
}
|
|
|
|
|
|
|
|
function _expected() {
|
|
|
|
return [
|
|
|
|
{ 'name': 'venue' },
|
|
|
|
{ 'name': 'address' },
|
|
|
|
{ 'name': 'neighbourhood' },
|
|
|
|
{ 'name': 'borough' },
|
|
|
|
{ 'name': 'locality' },
|
|
|
|
{ 'name': 'county' },
|
|
|
|
{ 'name': 'region' },
|
|
|
|
{ 'name': 'postalcode' },
|
|
|
|
{ 'name': 'country' }];
|
|
|
|
}
|
|
|
|
// export function
|
|
|
|
module.exports = () => ({
|
|
|
|
sanitize: _sanitize,
|
|
|
|
expected: _expected
|
|
|
|
});
|