mirror of https://github.com/pelias/api.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
111 lines
3.0 KiB
111 lines
3.0 KiB
8 years ago
|
var check = require('check-types');
|
||
|
var parser = require('addressit');
|
||
|
var extend = require('extend');
|
||
|
var _ = require('lodash');
|
||
|
var logger = require('pelias-logger').get('api');
|
||
|
|
||
|
// validate texts, convert types and apply defaults
|
||
|
function sanitize( raw, clean ){
|
||
|
|
||
|
// error & warning messages
|
||
|
var messages = { errors: [], warnings: [] };
|
||
|
|
||
|
// invalid input 'text'
|
||
|
if( !check.nonEmptyString( raw.text ) ){
|
||
|
messages.errors.push('invalid param \'text\': text length, must be >0');
|
||
|
}
|
||
|
|
||
|
// valid input 'text'
|
||
|
else {
|
||
|
|
||
|
// valid text
|
||
|
clean.text = raw.text;
|
||
|
|
||
8 years ago
|
// remove anything that may have been parsed before
|
||
|
delete clean.parsed_text;
|
||
|
|
||
8 years ago
|
// parse text with query parser
|
||
|
var parsed_text = parse(clean.text);
|
||
|
if (check.assigned(parsed_text)) {
|
||
|
clean.parsed_text = parsed_text;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return messages;
|
||
|
}
|
||
|
|
||
|
// export function
|
||
|
module.exports = sanitize;
|
||
|
|
||
|
|
||
|
|
||
|
// this is the addressit functionality from https://github.com/pelias/text-analyzer/blob/master/src/addressItParser.js
|
||
|
var DELIM = ',';
|
||
|
|
||
|
function parse(query) {
|
||
|
var getAdminPartsBySplittingOnDelim = function(queryParts) {
|
||
|
// naive approach - for admin matching during query time
|
||
|
// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
|
||
|
|
||
|
var address = {};
|
||
|
|
||
|
if (queryParts.length > 1) {
|
||
|
address.name = queryParts[0].trim();
|
||
|
|
||
|
// 1. slice away all parts after the first one
|
||
|
// 2. trim spaces from each part just in case
|
||
|
// 3. join the parts back together with appropriate delimiter and spacing
|
||
|
address.admin_parts = queryParts.slice(1)
|
||
|
.map(function (part) { return part.trim(); })
|
||
|
.join(DELIM + ' ');
|
||
|
}
|
||
|
|
||
|
return address;
|
||
|
};
|
||
|
|
||
|
var getAddressParts = function(query) {
|
||
|
// perform full address parsing
|
||
|
// except on queries so short they obviously can't contain an address
|
||
|
if (query.length > 3) {
|
||
|
return parser( query );
|
||
|
}
|
||
|
};
|
||
|
|
||
|
var queryParts = query.split(DELIM);
|
||
|
|
||
|
var addressWithAdminParts = getAdminPartsBySplittingOnDelim(queryParts);
|
||
|
var addressWithAddressParts= getAddressParts(queryParts.join(DELIM + ' '));
|
||
|
|
||
|
var parsedAddress = extend(addressWithAdminParts,
|
||
|
addressWithAddressParts);
|
||
|
|
||
|
var address_parts = [ 'name',
|
||
|
'number',
|
||
|
'street',
|
||
|
'city',
|
||
|
'state',
|
||
|
'country',
|
||
|
'postalcode',
|
||
|
'regions',
|
||
|
'admin_parts'
|
||
|
];
|
||
|
|
||
|
var parsed_text = {};
|
||
|
|
||
|
address_parts.forEach(function(part){
|
||
|
if (parsedAddress[part]) {
|
||
|
parsed_text[part] = parsedAddress[part];
|
||
|
}
|
||
|
});
|
||
|
|
||
|
// if all we found was regions, ignore it as it is not enough information to make smarter decisions
|
||
|
if (Object.keys(parsed_text).length === 1 && !_.isUndefined(parsed_text.regions))
|
||
|
{
|
||
|
logger.info('Ignoring address parser output, regions only');
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
return parsed_text;
|
||
|
|
||
|
}
|