Browse Source

Merge pull request #169 from pelias/ngram-address-parser

Address Parser
pull/171/head 2.2.1
Harish Krishna 10 years ago
parent
commit
1b6d3feb27
  1. 5
      controller/search.js
  2. 13
      helper/admin_weights.js
  3. 93
      helper/query_parser.js
  4. 2
      package.json
  5. 98
      query/search.js
  6. 13
      query/sort.js
  7. 13
      sanitiser/_input.js
  8. 1
      sanitiser/_layers.js
  9. 168
      test/unit/helper/query_parser.js
  10. 13
      test/unit/query/reverse.js
  11. 312
      test/unit/query/search.js
  12. 15
      test/unit/query/sort.js
  13. 1
      test/unit/run.js
  14. 29
      test/unit/sanitiser/_input.js
  15. 14
      test/unit/sanitiser/coarse.js
  16. 1
      test/unit/sanitiser/reverse.js
  17. 27
      test/unit/sanitiser/search.js
  18. 24
      test/unit/sanitiser/suggest.js

5
controller/search.js

@ -21,6 +21,11 @@ function setup( backend, query ){
cmd.type = req.clean.layers; cmd.type = req.clean.layers;
} }
// set type if input suggests targeting a layer(s)
if (req.clean.default_layers_set && req.clean.parsed_input) {
cmd.type = req.clean.parsed_input.target_layer || cmd.type;
}
// query backend // query backend
service.search( backend, cmd, function( err, docs ){ service.search( backend, cmd, function( err, docs ){

13
helper/admin_weights.js

@ -0,0 +1,13 @@
/**
* These values specify how much a document that matches a certain _type
* should be boosted in elasticsearch results.
*/
module.exports = {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality':1,
'neighborhood':1
};

93
helper/query_parser.js

@ -0,0 +1,93 @@
var parser = require('addressit');
var extend = require('extend');
var get_layers = require('../helper/layers');
var delim = ',';
module.exports = function(query) {
var tokenized = query.split(/[ ,]+/);
var hasNumber = /\d/.test(query);
var getAdminPartsBySplittingOnDelim = function(query) {
// naive approach - for admin matching during query time
// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
var delimIndex = query.indexOf(delim);
var address = {};
if ( delimIndex !== -1 ) {
address.name = query.substring(0, delimIndex);
address.admin_parts = query.substring(delimIndex + 1).trim();
}
return address;
};
var getTargetLayersWhenAddressParsingIsNotNecessary = function(query) {
var address = {};
// set target_layer if input length <= 3 characters
if (query.length <= 3 ) {
// no address parsing required
address.target_layer = get_layers(['admin']);
} else if (tokenized.length === 1 || (tokenized.length < 3 && !hasNumber)) {
// no need to hit address layers if there's only one (or two) token(s)
address.target_layer = get_layers(['admin', 'poi']);
}
return address.target_layer ? address : null;
};
var getAddressParts = function(query) {
// address parsing
var address = parser( query );
// set target_layer if input suggests no address
if (address.text === address.regions.join(' ') && !hasNumber) {
address.target_layer = get_layers(['admin', 'poi']);
}
return address;
};
var addressWithAdminParts = getAdminPartsBySplittingOnDelim(query);
var addressWithTargetLayers= getTargetLayersWhenAddressParsingIsNotNecessary(query);
var addressWithAddressParts= !addressWithTargetLayers ? getAddressParts(query) : {};
var parsedAddress = extend(addressWithAdminParts,
addressWithTargetLayers,
addressWithAddressParts);
var address_parts = [ 'name',
'number',
'street',
'city',
'state',
'country',
'postalcode',
'regions',
'admin_parts',
'target_layer'
];
var parsed_input = {};
address_parts.forEach(function(part){
if (parsedAddress[part]) {
parsed_input[part] = parsedAddress[part];
}
});
return parsed_input;
};
// parsed_input = {
// name : parsedAddress.name,
// number : parsedAddress.number,
// street : parsedAddress.street,
// city : parsedAddress.city,
// state : parsedAddress.state,
// country: parsedAddress.country,
// postalcode : parsedAddress.postalcode,
// regions: parsedAddress.regions,
// admin_parts: parsedAddress.admin_parts,
// target_layer: parsedAddress.target_layer
// }

2
package.json

@ -43,6 +43,8 @@
"microtime": "1.4.0", "microtime": "1.4.0",
"morgan": "1.5.2", "morgan": "1.5.2",
"pelias-config": "^0.1.4", "pelias-config": "^0.1.4",
"extend": "2.0.1",
"addressit": "1.3.0",
"pelias-esclient": "0.0.25", "pelias-esclient": "0.0.25",
"pelias-logger": "^0.0.8", "pelias-logger": "^0.0.8",
"pelias-suggester-pipeline": "2.0.2", "pelias-suggester-pipeline": "2.0.2",

98
query/search.js

@ -13,42 +13,104 @@ function generate( params ){
} }
var query = queries.distance( centroid, { size: params.size } ); var query = queries.distance( centroid, { size: params.size } );
var input = params.input;
if (params.bbox) { if (params.bbox) {
query = queries.bbox ( centroid, { size: params.size, bbox: params.bbox } ); query = queries.bbox ( centroid, { size: params.size, bbox: params.bbox } );
} }
// add search condition to filtered query
query.query.filtered.query = { query.query.filtered.query = {
'bool': { 'bool': {
'must': [{ 'must': [],
'match': { 'should': []
'name.default': params.input
}
}]
} }
}; };
// should query contitions if (params.parsed_input) {
query.query.filtered.query.bool.should = [];
if (params.input_admin) { query.query.filtered.query.bool.should = [];
var admin_fields = ['admin0', 'admin1', 'admin1_abbr', 'admin2', 'alpha3'];
var unmatched_admin_fields = [];
// qb stands for query builder
var qb = function(unmatched_admin_fields, value) {
if (value) {
unmatched_admin_fields.forEach(function(admin_field) {
var match = {};
match[admin_field] = value;
query.query.filtered.query.bool.should.push({
'match': match
});
});
}
};
// update input
if (params.parsed_input.number && params.parsed_input.street) {
input = params.parsed_input.number + ' ' + params.parsed_input.street;
} else if (params.parsed_input.admin_parts) {
input = params.parsed_input.name;
}
// address
// number, street, postalcode
if (params.parsed_input.number) {
qb(['address.number'], params.parsed_input.number);
}
if (params.parsed_input.street) {
qb(['address.street'], params.parsed_input.street);
}
if (params.parsed_input.postalcode) {
qb(['address.zip'], params.parsed_input.postalcode);
}
// city
// admin2, locality, local_admin, neighborhood
if (params.parsed_input.city) {
qb(['admin2'], params.parsed_input.admin2);
} else {
unmatched_admin_fields.push('admin2');
}
// state
// admin1, admin1_abbr
if (params.parsed_input.state) {
qb(['admin1_abbr'], params.parsed_input.state);
} else {
unmatched_admin_fields.push('admin1', 'admin1_abbr');
}
// country
// admin0, alpha3
if (params.parsed_input.country) {
qb(['alpha3'], params.parsed_input.country);
} else {
unmatched_admin_fields.push('admin0', 'alpha3');
}
var input_regions = params.parsed_input.regions ? params.parsed_input.regions.join(' ') : undefined;
// if no address was identified and input suggests some admin info in it
if (unmatched_admin_fields.length === 5 && input_regions !== params.input) {
if (params.parsed_input.admin_parts) {
qb(unmatched_admin_fields, params.parsed_input.admin_parts);
} else {
qb(unmatched_admin_fields, input_regions);
}
}
admin_fields.forEach(function(admin_field) {
var match = {};
match[admin_field] = params.input_admin;
query.query.filtered.query.bool.should.push({
'match': match
});
});
} }
// add search condition to distance query
query.query.filtered.query.bool.must.push({
'match': {
'name.default': input
}
});
// add phrase matching query // add phrase matching query
// note: this is required for shingle/phrase matching // note: this is required for shingle/phrase matching
query.query.filtered.query.bool.should.push({ query.query.filtered.query.bool.should.push({
'match': { 'match': {
'phrase.default': params.input 'phrase.default': input
} }
}); });

13
query/sort.js

@ -3,6 +3,7 @@ var population = 'population';
var popularity = 'popularity'; var popularity = 'popularity';
var category = 'category'; var category = 'category';
var category_weights = require('../helper/category_weights'); var category_weights = require('../helper/category_weights');
var admin_weights = require('../helper/admin_weights');
var weights = require('pelias-suggester-pipeline').weights; var weights = require('pelias-suggester-pipeline').weights;
var isObject = require( 'is-object' ); var isObject = require( 'is-object' );
@ -15,6 +16,13 @@ module.exports = function( params ){
'order': 'desc' 'order': 'desc'
} }
}, },
{
'_script': {
'file': popularity,
'type': 'number',
'order': 'desc'
}
},
{ {
'_script': { '_script': {
'file': population, 'file': population,
@ -24,7 +32,10 @@ module.exports = function( params ){
}, },
{ {
'_script': { '_script': {
'file': popularity, 'params': {
'weights': admin_weights
},
'file': 'weights',
'type': 'number', 'type': 'number',
'order': 'desc' 'order': 'desc'
} }

13
sanitiser/_input.js

@ -1,11 +1,11 @@
var isObject = require('is-object'); var isObject = require('is-object');
var query_parse= require('../helper/query_parser');
// validate inputs, convert types and apply defaults // validate inputs, convert types and apply defaults
function sanitize( req ){ function sanitize( req ){
req.clean = req.clean || {}; req.clean = req.clean || {};
var params= req.query; var params= req.query;
var delim = ',';
// ensure the input params are a valid object // ensure the input params are a valid object
if( !isObject( params ) ){ if( !isObject( params ) ){
@ -22,13 +22,8 @@ function sanitize( req ){
req.clean.input = params.input; req.clean.input = params.input;
// for admin matching during query time req.clean.parsed_input = query_parse(params.input);
// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
var delim_index = params.input.indexOf(delim);
if ( delim_index !== -1 ) {
req.clean.input = params.input.substring(0, delim_index);
req.clean.input_admin = params.input.substring(delim_index + 1).trim();
}
return { 'error': false }; return { 'error': false };

1
sanitiser/_layers.js

@ -17,6 +17,7 @@ function sanitize( req ){
// default case (no layers specified in GET params) // default case (no layers specified in GET params)
if('string' !== typeof params.layers || !params.layers.length){ if('string' !== typeof params.layers || !params.layers.length){
params.layers = 'poi,admin,address'; // default layers params.layers = 'poi,admin,address'; // default layers
clean.default_layers_set = true;
} }
// decide which layers can be queried // decide which layers can be queried

168
test/unit/helper/query_parser.js

@ -0,0 +1,168 @@
var parser = require('../../../helper/query_parser');
var get_layers = require('../../../helper/layers');
module.exports.tests = {};
module.exports.tests.interface = function(test, common) {
test('interface', function(t) {
t.equal(typeof parser, 'function', 'valid function');
t.end();
});
};
module.exports.tests.split_on_comma = function(test, common) {
var queries = ['soho, new york', 'chelsea, london', '123 main, new york'];
var delim = ',';
var testParse = function(query) {
test('naive parsing ' + query, function(t) {
var address = parser(query);
var delimIndex = query.indexOf(delim);
var name = query.substring(0, delimIndex);
var admin_parts = query.substring(delimIndex + 1).trim();
t.equal(typeof address, 'object', 'valid object');
t.equal(address.name, name, 'name set correctly to ' + address.name);
t.equal(address.admin_parts, admin_parts, 'admin_parts set correctly to ' + address.admin_parts);
t.end();
});
};
for (var key in queries) {
testParse( queries[key] );
}
};
module.exports.tests.parse_three_chars_or_less = function(test, common) {
var chars_queries = ['a', 'bb', 'ccc'];
var num_queries = ['1', '12', '123'];
var alphanum_q = ['a1', '1a2', '12c'];
var testParse = function(query) {
test('query length < 3 (' + query + ')', function(t) {
var address = parser(query);
var target_layer = get_layers(['admin']);
t.equal(typeof address, 'object', 'valid object');
t.deepEqual(address.target_layer, target_layer, 'admin_parts set correctly to ' + target_layer.join(', '));
t.end();
});
};
var queries = chars_queries.concat(num_queries).concat(alphanum_q);
for (var key in queries) {
testParse( queries[key] );
}
};
module.exports.tests.parse_one_or_more_tokens = function(test, common) {
var one_token_queries = ['hyderbad', 'yugoslavia', 'somethingreallybigbutjustonetokenstill'];
var two_tokens_nonum = ['small town', 'biggg city', 'another empire'];
var two_tokens_withnum= ['123 main', 'sixty 1', '123-980 house'];
var testParse = function(query, parse_address) {
test('query with one or more tokens (' + query + ')', function(t) {
var address = parser(query);
var target_layer = get_layers(['admin', 'poi']);
t.equal(typeof address, 'object', 'valid object');
if (parse_address) {
t.deepEqual(address.regions.join(''), query, 'since query contained a number, it went through address parsing');
} else {
t.deepEqual(address.target_layer, target_layer, 'admin_parts set correctly to ' + target_layer.join(', '));
}
t.end();
});
};
var queries = one_token_queries.concat(two_tokens_nonum);
for (var key in queries) {
testParse( queries[key] );
}
for (key in two_tokens_withnum) {
testParse( two_tokens_withnum[key], true );
}
};
module.exports.tests.parse_address = function(test, common) {
var addresses_nonum = [{ non_street: 'main particle', city: 'new york'},
{ non_street: 'biggg city block' },
{ non_street: 'the empire state building' }
];
var address_with_num = [{ number: 123, street: 'main st', city: 'new york', state: 'ny'},
{ number: 456, street: 'pine ave', city: 'san francisco', state: 'CA'},
{ number: 1980, street: 'house st', city: 'hoboken', state: 'NY'}
];
var address_with_zip = [{ number: 1, street: 'main st', city: 'new york', state: 'ny', zip: 10010},
{ number: 4, street: 'ape ave', city: 'san diego', state: 'CA', zip: 98970},
{ number: 19, street: 'house dr', city: 'houston', state: 'TX', zip: 79089}
];
var testParse = function(query, hasNumber, hasZip) {
var testcase = 'parse query with ' + (hasNumber ? 'a house number ': 'no house number ');
testcase += 'and ' + (hasZip ? 'a zip ' : 'no zip ');
test(testcase, function(t) {
var query_string = '';
for (var k in query) {
query_string += ' ' + query[k];
}
// remove leading whitespace
query_string = query_string.substring(1);
var address = parser(query_string);
var non_address_layer = get_layers(['admin', 'poi']);
t.equal(typeof address, 'object', 'valid object for the address ('+query_string+')');
if (!hasNumber && !hasZip && query.non_street) {
t.equal(address.regions.join(''), query_string, 'expected parsing result');
} else {
t.equal(address.regions.join(''), query.city, 'city in regions (' + query.city +')');
}
if ((hasNumber || hasZip) && query.street) {
t.equal(typeof address.number, 'number', 'valid house number format (' + address.number + ')');
t.equal(address.number, query.number, 'correct house number (' + query.number + ')');
t.equal(typeof address.street, 'string', 'valid street name format (' + address.street + ')');
t.equal(address.street, query.street, 'correct street name (' + query.street + ')');
}
if (hasZip) {
t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')');
t.equal(address.postalcode, query.zip, 'correct postal code (' + query.zip + ')');
}
if (address.text === address.regions.join(' ')) {
t.deepEqual(address.target_layer, query.target_layer, 'admin_parts set correctly to ' + query.target_layer.join(', '));
}
t.end();
});
};
for (var key in addresses_nonum) {
testParse( addresses_nonum[key] );
}
for (key in address_with_num) {
testParse( address_with_num[key], true );
}
for (key in address_with_zip) {
testParse( address_with_zip[key], true, true );
}
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('QUERY PARSING: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

13
test/unit/query/reverse.js

@ -5,6 +5,7 @@ var population = 'population';
var popularity = 'popularity'; var popularity = 'popularity';
var category = 'category'; var category = 'category';
var category_weights = require('../../../helper/category_weights'); var category_weights = require('../../../helper/category_weights');
var admin_weights = require('../../../helper/admin_weights');
var weights = require('pelias-suggester-pipeline').weights; var weights = require('pelias-suggester-pipeline').weights;
module.exports.tests = {}; module.exports.tests = {};
@ -25,6 +26,13 @@ var sort = [
'order': 'desc' 'order': 'desc'
} }
}, },
{
'_script': {
'file': popularity,
'type': 'number',
'order': 'desc'
}
},
{ {
'_script': { '_script': {
'file': population, 'file': population,
@ -34,7 +42,10 @@ var sort = [
}, },
{ {
'_script': { '_script': {
'file': popularity, 'params': {
'weights': admin_weights
},
'file': 'weights',
'type': 'number', 'type': 'number',
'order': 'desc' 'order': 'desc'
} }

312
test/unit/query/search.js

@ -4,7 +4,9 @@ var admin_boost = 'admin_boost';
var population = 'population'; var population = 'population';
var popularity = 'popularity'; var popularity = 'popularity';
var category = 'category'; var category = 'category';
var parser = require('../../../helper/query_parser');
var category_weights = require('../../../helper/category_weights'); var category_weights = require('../../../helper/category_weights');
var admin_weights = require('../../../helper/admin_weights');
var weights = require('pelias-suggester-pipeline').weights; var weights = require('pelias-suggester-pipeline').weights;
module.exports.tests = {}; module.exports.tests = {};
@ -25,6 +27,13 @@ var sort = [
'order': 'desc' 'order': 'desc'
} }
}, },
{
'_script': {
'file': popularity,
'type': 'number',
'order': 'desc'
}
},
{ {
'_script': { '_script': {
'file': population, 'file': population,
@ -34,7 +43,10 @@ var sort = [
}, },
{ {
'_script': { '_script': {
'file': popularity, 'params': {
'weights': admin_weights
},
'file': 'weights',
'type': 'number', 'type': 'number',
'order': 'desc' 'order': 'desc'
} }
@ -228,6 +240,304 @@ module.exports.tests.query = function(test, common) {
t.deepEqual(query, expected, 'valid search query'); t.deepEqual(query, expected, 'valid search query');
t.end(); t.end();
}); });
test('valid query with a full valid address', function(t) {
var address = '123 main st new york ny 10010 US';
var query = generate({ input: address,
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10,
details: true,
parsed_input: parser(address),
default_layers_set: true
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'name.default': '123 main st'
}
}
],
'should': [
{
'match': {
'address.number': 123
}
},
{
'match': {
'address.street': 'main st'
}
},
{
'match': {
'address.zip': 10010
}
},
{
'match': {
'admin1_abbr': 'NY'
}
},
{
'match': {
'alpha3': 'USA'
}
},
{
'match': {
'phrase.default': '123 main st'
}
}
]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};
t.deepEqual(query, expected, 'valid search query');
t.end();
});
test('valid query with partial address', function(t) {
var partial_address = 'soho grand, new york';
var query = generate({ input: partial_address,
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10,
details: true,
parsed_input: parser(partial_address),
default_layers_set: true
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'name.default': 'soho grand'
}
}
],
'should': [
{
'match': {
'admin2': 'new york'
}
},
{
'match': {
'admin1': 'new york'
}
},
{
'match': {
'admin1_abbr': 'new york'
}
},
{
'match': {
'admin0': 'new york'
}
},
{
'match': {
'alpha3': 'new york'
}
},
{
'match': {
'phrase.default': 'soho grand'
}
}
]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};
t.deepEqual(query, expected, 'valid search query');
t.end();
});
}; };
module.exports.all = function (tape, common) { module.exports.all = function (tape, common) {

15
test/unit/query/sort.js

@ -5,6 +5,7 @@ var population = 'population';
var popularity = 'popularity'; var popularity = 'popularity';
var category = 'category'; var category = 'category';
var category_weights = require('../../../helper/category_weights'); var category_weights = require('../../../helper/category_weights');
var admin_weights = require('../../../helper/admin_weights');
var weights = require('pelias-suggester-pipeline').weights; var weights = require('pelias-suggester-pipeline').weights;
module.exports.tests = {}; module.exports.tests = {};
@ -25,6 +26,13 @@ var expected = [
'order': 'desc' 'order': 'desc'
} }
}, },
{
'_script': {
'file': popularity,
'type': 'number',
'order': 'desc'
}
},
{ {
'_script': { '_script': {
'file': population, 'file': population,
@ -34,12 +42,15 @@ var expected = [
}, },
{ {
'_script': { '_script': {
'file': popularity, 'params': {
'weights': admin_weights
},
'file': 'weights',
'type': 'number', 'type': 'number',
'order': 'desc' 'order': 'desc'
} }
}, },
{ {
'_script': { '_script': {
'params': { 'params': {
'category_weights': category_weights 'category_weights': category_weights

1
test/unit/run.js

@ -17,6 +17,7 @@ var tests = [
require('./query/sort'), require('./query/sort'),
require('./query/search'), require('./query/search'),
require('./query/reverse'), require('./query/reverse'),
require('./helper/query_parser'),
require('./helper/geojsonify'), require('./helper/geojsonify'),
require('./helper/outputSchema') require('./helper/outputSchema')
]; ];

29
test/unit/sanitiser/_input.js

@ -0,0 +1,29 @@
var input = require('../../../sanitiser/_input'),
parser = require('../../../helper/query_parser'),
delim = ',',
defaultError = 'invalid param \'input\': text length, must be >0',
allLayers = [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
nonAddressLayers = [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin' ],
defaultParsed= { target_layer: nonAddressLayers },
defaultClean = { input: 'test',
layers: allLayers,
size: 10,
details: true,
parsed_input: defaultParsed,
lat:0,
lon:0
},
getTargetLayers = function(query) {
var address = parser(query);
return address.target_layer;
};
module.exports = {
defaultParsed: defaultParsed,
defaultClean : defaultClean,
getTargetLayers: getTargetLayers
};

14
test/unit/sanitiser/coarse.js

@ -3,6 +3,7 @@ var coarse = require('../../../sanitiser/coarse'),
_sanitize = coarse.sanitize, _sanitize = coarse.sanitize,
middleware = coarse.middleware, middleware = coarse.middleware,
valid_layers = [ 'admin0', 'admin1', 'admin2', 'neighborhood', 'locality', 'local_admin' ], valid_layers = [ 'admin0', 'admin1', 'admin2', 'neighborhood', 'locality', 'local_admin' ],
defaultClean = require('../sanitiser/_input').defaultClean,
sanitize = function(query, cb) { _sanitize({'query':query}, cb); }; sanitize = function(query, cb) { _sanitize({'query':query}, cb); };
module.exports.tests = {}; module.exports.tests = {};
@ -47,17 +48,12 @@ module.exports.tests.middleware_failure = function(test, common) {
module.exports.tests.middleware_success = function(test, common) { module.exports.tests.middleware_success = function(test, common) {
test('middleware success', function(t) { test('middleware success', function(t) {
var req = { query: { input: 'test', lat: 0, lon: 0 }}; var req = { query: { input: 'test', lat: 0, lon: 0 }};
var clean = defaultClean;
clean.layers = valid_layers;
var next = function( message ){ var next = function( message ){
var defaultClean = {
input: 'test',
size: 10,
layers: [ 'admin0', 'admin1', 'admin2', 'neighborhood', 'locality', 'local_admin' ],
lat: 0,
lon: 0,
details: true
};
t.equal(message, undefined, 'no error message set'); t.equal(message, undefined, 'no error message set');
t.deepEqual(req.clean, defaultClean); t.deepEqual(req.clean, clean);
t.end(); t.end();
}; };
middleware( req, undefined, next ); middleware( req, undefined, next );

1
test/unit/sanitiser/reverse.js

@ -10,6 +10,7 @@ var suggest = require('../../../sanitiser/reverse'),
lon: 0, lon: 0,
size: 10, size: 10,
details: true, details: true,
default_layers_set: true,
categories: [] categories: []
}, },
sanitize = function(query, cb) { _sanitize({'query':query}, cb); }; sanitize = function(query, cb) { _sanitize({'query':query}, cb); };

27
test/unit/sanitiser/search.js

@ -1,5 +1,8 @@
var search = require('../../../sanitiser/search'), var search = require('../../../sanitiser/search'),
_input = require('../sanitiser/_input'),
parser = require('../../../helper/query_parser'),
defaultParsed = _input.defaultParsed,
_sanitize = search.sanitize, _sanitize = search.sanitize,
middleware = search.middleware, middleware = search.middleware,
delim = ',', delim = ',',
@ -8,7 +11,9 @@ var search = require('../../../sanitiser/search'),
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ], 'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10, size: 10,
details: true details: true,
parsed_input: defaultParsed,
default_layers_set: true
}, },
sanitize = function(query, cb) { _sanitize({'query':query}, cb); }; sanitize = function(query, cb) { _sanitize({'query':query}, cb); };
@ -46,6 +51,8 @@ module.exports.tests.sanitize_input = function(test, common) {
sanitize({ input: input }, function( err, clean ){ sanitize({ input: input }, function( err, clean ){
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
expected.input = input; expected.input = input;
expected.parsed_input.target_layer = _input.getTargetLayers(input);
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
t.deepEqual(clean, expected, 'clean set correctly (' + input + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + input + ')');
}); });
@ -63,14 +70,12 @@ module.exports.tests.sanitize_input_with_delim = function(test, common) {
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
expected.input = input; expected.input = input;
var delim_index = input.indexOf(delim); expected.parsed_input = parser(input);
if (delim_index!==-1) {
expected.input = input.substring(0, input.indexOf(delim));
expected.input_admin = input.substring(delim_index + 1).trim();
}
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
t.equal(clean.parsed_input.name, expected.parsed_input.name, 'clean name set correctly');
t.equal(clean.parsed_input.admin_parts, expected.parsed_input.admin_parts, 'clean admin_parts set correctly');
t.deepEqual(clean, expected, 'clean set correctly (' + input + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + input + ')');
}); });
}); });
t.end(); t.end();
@ -98,6 +103,7 @@ module.exports.tests.sanitize_lat = function(test, common) {
expected.lat = parseFloat( lat ); expected.lat = parseFloat( lat );
expected.lon = 0; expected.lon = 0;
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'clean set correctly (' + lat + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + lat + ')');
}); });
}); });
@ -127,6 +133,7 @@ module.exports.tests.sanitize_lon = function(test, common) {
expected.lon = parseFloat( lon ); expected.lon = parseFloat( lon );
expected.lat = 0; expected.lat = 0;
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'clean set correctly (' + lon + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + lon + ')');
}); });
}); });
@ -141,6 +148,7 @@ module.exports.tests.sanitize_optional_geo = function(test, common) {
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
t.equal(clean.lat, undefined, 'clean set without lat'); t.equal(clean.lat, undefined, 'clean set without lat');
t.equal(clean.lon, undefined, 'clean set without lon'); t.equal(clean.lon, undefined, 'clean set without lon');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'clean set without lat/lon'); t.deepEqual(clean, expected, 'clean set without lat/lon');
}); });
t.end(); t.end();
@ -150,6 +158,7 @@ module.exports.tests.sanitize_optional_geo = function(test, common) {
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
expected.lon = 0; expected.lon = 0;
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'clean set correctly (without any lat)'); t.deepEqual(clean, expected, 'clean set correctly (without any lat)');
}); });
t.end(); t.end();
@ -159,6 +168,7 @@ module.exports.tests.sanitize_optional_geo = function(test, common) {
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
expected.lat = 0; expected.lat = 0;
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'clean set correctly (without any lon)'); t.deepEqual(clean, expected, 'clean set correctly (without any lon)');
}); });
t.end(); t.end();
@ -199,6 +209,7 @@ module.exports.tests.sanitize_bbox = function(test, common) {
sanitize({ input: 'test', bbox: bbox }, function( err, clean ){ sanitize({ input: 'test', bbox: bbox }, function( err, clean ){
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'falling back on 50km distance from centroid'); t.deepEqual(clean, expected, 'falling back on 50km distance from centroid');
}); });
}); });
@ -218,6 +229,7 @@ module.exports.tests.sanitize_bbox = function(test, common) {
bottom: Math.min(bboxArray[1], bboxArray[3]) bottom: Math.min(bboxArray[1], bboxArray[3])
}; };
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'clean set correctly (' + bbox + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + bbox + ')');
}); });
}); });
@ -409,6 +421,7 @@ module.exports.tests.middleware_success = function(test, common) {
var req = { query: { input: 'test' }}; var req = { query: { input: 'test' }};
var next = function( message ){ var next = function( message ){
t.equal(message, undefined, 'no error message set'); t.equal(message, undefined, 'no error message set');
req.clean.parsed_input = parser('test');
t.deepEqual(req.clean, defaultClean); t.deepEqual(req.clean, defaultClean);
t.end(); t.end();
}; };

24
test/unit/sanitiser/suggest.js

@ -2,15 +2,20 @@
var suggest = require('../../../sanitiser/suggest'), var suggest = require('../../../sanitiser/suggest'),
_sanitize = suggest.sanitize, _sanitize = suggest.sanitize,
middleware = suggest.middleware, middleware = suggest.middleware,
_input = require('../sanitiser/_input'),
parser = require('../../../helper/query_parser'),
defaultParsed = _input.defaultParsed,
delim = ',', delim = ',',
defaultError = 'invalid param \'input\': text length, must be >0', defaultError = 'invalid param \'input\': text length, must be >0',
defaultClean = { input: 'test', defaultClean = { input: 'test',
lat:0,
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ], 'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
lon: 0,
size: 10, size: 10,
details: true details: true,
lat:0,
lon:0,
parsed_input: defaultParsed,
default_layers_set: true
}, },
sanitize = function(query, cb) { _sanitize({'query':query}, cb); }; sanitize = function(query, cb) { _sanitize({'query':query}, cb); };
@ -49,6 +54,7 @@ module.exports.tests.sanitize_input = function(test, common) {
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
expected.input = input; expected.input = input;
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser(input);
t.deepEqual(clean, expected, 'clean set correctly (' + input + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + input + ')');
}); });
}); });
@ -64,12 +70,7 @@ module.exports.tests.sanitize_input_with_delim = function(test, common) {
sanitize({ input: input, lat: 0, lon: 0 }, function( err, clean ){ sanitize({ input: input, lat: 0, lon: 0 }, function( err, clean ){
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
expected.input = input; expected.input = input;
expected.parsed_input = parser(input);
var delim_index = input.indexOf(delim);
if (delim_index!==-1) {
expected.input = input.substring(0, input.indexOf(delim));
expected.input_admin = input.substring(delim_index + 1).trim();
}
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
t.deepEqual(clean, expected, 'clean set correctly (' + input + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + input + ')');
@ -99,6 +100,7 @@ module.exports.tests.sanitize_lat = function(test, common) {
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
expected.lat = parseFloat( lat ); expected.lat = parseFloat( lat );
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'clean set correctly (' + lat + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + lat + ')');
}); });
}); });
@ -127,6 +129,7 @@ module.exports.tests.sanitize_lon = function(test, common) {
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
expected.lon = parseFloat( lon ); expected.lon = parseFloat( lon );
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'clean set correctly (' + lon + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + lon + ')');
}); });
}); });
@ -168,6 +171,7 @@ module.exports.tests.sanitize_bbox = function(test, common) {
sanitize({ input: 'test', lat: 0, lon: 0, bbox: bbox }, function( err, clean ){ sanitize({ input: 'test', lat: 0, lon: 0, bbox: bbox }, function( err, clean ){
var expected = JSON.parse(JSON.stringify( defaultClean )); var expected = JSON.parse(JSON.stringify( defaultClean ));
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'falling back on 50km distance from centroid'); t.deepEqual(clean, expected, 'falling back on 50km distance from centroid');
}); });
}); });
@ -187,6 +191,7 @@ module.exports.tests.sanitize_bbox = function(test, common) {
bottom: Math.min(bboxArray[1], bboxArray[3]) bottom: Math.min(bboxArray[1], bboxArray[3])
}; };
t.equal(err, undefined, 'no error'); t.equal(err, undefined, 'no error');
expected.parsed_input = parser('test');
t.deepEqual(clean, expected, 'clean set correctly (' + bbox + ')'); t.deepEqual(clean, expected, 'clean set correctly (' + bbox + ')');
}); });
}); });
@ -378,6 +383,7 @@ module.exports.tests.middleware_success = function(test, common) {
var req = { query: { input: 'test', lat: 0, lon: 0 }}; var req = { query: { input: 'test', lat: 0, lon: 0 }};
var next = function( message ){ var next = function( message ){
t.equal(message, undefined, 'no error message set'); t.equal(message, undefined, 'no error message set');
req.clean.parsed_input = parser('test');
t.deepEqual(req.clean, defaultClean); t.deepEqual(req.clean, defaultClean);
t.end(); t.end();
}; };

Loading…
Cancel
Save