Browse Source

address parser initial pass - breaks 68 tests! (ignoring tests for now)

pull/169/head
Harish Krishna 10 years ago
parent
commit
0b5b1dce85
  1. 5
      package.json
  2. 88
      query/search.js
  3. 51
      sanitiser/_input.js

5
package.json

@ -46,7 +46,10 @@
"morgan": "1.5.2",
"pelias-config": "^0.1.4",
"microtime": "1.4.0",
"pelias-suggester-pipeline": "2.0.2"
"pelias-suggester-pipeline": "2.0.2",
"extend": "2.0.1",
"parse-address": "0.0.4",
"addressit": "1.2.1"
},
"devDependencies": {
"ciao": "^0.3.4",

88
query/search.js

@ -13,36 +13,94 @@ function generate( params ){
}
var query = queries.distance( centroid, { size: params.size } );
var input = params.input;
if (params.bbox) {
query = queries.bbox ( centroid, { size: params.size, bbox: params.bbox } );
}
// add search condition to distance query
query.query.filtered.query = {
'bool': {
'must': [{
'match': {
'name.default': params.input
}
}
]
'must': [],
'should': []
}
};
if (params.input_admin) {
var admin_fields = ['admin0', 'admin1', 'admin1_abbr', 'admin2', 'alpha3'];
if (params.parsed_input) {
query.query.filtered.query.bool.should = [];
admin_fields.forEach(function(admin_field) {
var match = {};
match[admin_field] = params.input_admin;
query.query.filtered.query.bool.should.push({
'match': match
var admin_fields = [];
var qb = function(admin_fields, value) {
admin_fields.forEach(function(admin_field) {
var match = {};
match[admin_field] = value;
query.query.filtered.query.bool.should.push({
'match': match
});
});
});
};
// update input
if (params.parsed_input.number && params.parsed_input.street) {
input = params.parsed_input.number + ' ' + params.parsed_input.street;
} else if (params.parsed_input.admin_parts) {
input = params.parsed_input.name;
}
// address
// number, street, zip
if (params.parsed_input.number) {
qb(['address.number'], params.parsed_input.number);
}
if (params.parsed_input.street) {
qb(['address.street'], params.parsed_input.street);
}
if (params.parsed_input.zip) {
qb(['address.zip'], params.parsed_input.zip);
}
// city
// admin2, locality, local_admin, neighborhood
if (params.parsed_input.admin2) {
qb(['admin2'], params.parsed_input.admin2);
} else {
admin_fields.push('admin2');
}
// state
// admin1, admin1_abbr
if (params.parsed_input.admin1) {
qb(['admin1', 'admin1_abbr'], params.parsed_input.admin1);
} else {
admin_fields.push('admin1', 'admin1_abbr');
}
// country
// admin0, alpha3
if (params.parsed_input.admin0) {
qb(['admin0', 'alpha3'], params.parsed_input.admin0);
} else {
admin_fields.push('admin0', 'alpha3');
}
var input_regions = params.parsed_input.regions.join(' ');
if (admin_fields.length === 5 && input_regions !== params.input) {
if (params.parsed_input.admin_parts) {
qb(admin_fields, params.parsed_input.admin_parts);
} else {
qb(admin_fields, input_regions);
}
}
}
// add search condition to distance query
query.query.filtered.query.bool.must.push({
'match': {
'name.default': input
}
});
query.sort = query.sort.concat( sort( params ) );
return query;

51
sanitiser/_input.js

@ -1,4 +1,7 @@
var isObject = require('is-object');
var parser1 = require('parse-address'); // works well with US addresses
var parser2 = require('addressit'); // freeform address parser (backup)
var extend = require('extend');
// validate inputs, convert types and apply defaults
function sanitize( req ){
@ -22,14 +25,54 @@ function sanitize( req ){
req.clean.input = params.input;
// naive approach
// for admin matching during query time
// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
var delim_index = params.input.indexOf(delim);
if ( delim_index !== -1 ) {
req.clean.input = params.input.substring(0, delim_index);
req.clean.input_admin = params.input.substring(delim_index + 1).trim();
var delimIndex = params.input.indexOf(delim);
var parsedAddress0 = {};
if ( delimIndex !== -1 ) {
parsedAddress0.name = params.input.substring(0, delimIndex);
parsedAddress0.admin_parts = params.input.substring(delimIndex + 1).trim();
}
// address parsing
var parsedAddress1 = parser1.parseAddress(params.input);
var parsedAddress2 = parser2(params.input);
var parsedAddress = extend(parsedAddress0, parsedAddress1, parsedAddress2);
var address_parts = [ 'name',
'number',
'street',
'city',
'state',
'country',
'zip',
'regions',
'admin_parts'
];
req.clean.parsed_input = {};
address_parts.forEach(function(part){
if (parsedAddress[part]) {
req.clean.parsed_input[part] = parsedAddress[part];
}
});
// req.clean.parsed_input = {
// name : parsedAddress.name,
// number : parsedAddress.number,
// street : parsedAddress.street,
// admin2 : parsedAddress.city,
// admin1 : parsedAddress.state,
// admin0 : parsedAddress.country,
// zip : parsedAddress.zip,
// regions: parsedAddress.regions,
// admin_parts: parsedAddress.admin_parts
// }
return { 'error': false };
}

Loading…
Cancel
Save