Browse Source

Merge pull request #229 from pelias/query

merge query changes
pull/242/head
Peter Johnson a.k.a. insertcoffee 10 years ago
parent
commit
5004532c13
  1. 3
      controller/search.js
  2. 4
      helper/types.js
  3. 1
      package.json
  4. 59
      query/reverse.js
  5. 287
      query/search.js
  6. 5
      sanitiser/_text.js
  7. 43
      test/unit/fixture/reverse_standard.js
  8. 206
      test/unit/fixture/search_full_address.js
  9. 50
      test/unit/fixture/search_linguistic_bbox.js
  10. 68
      test/unit/fixture/search_linguistic_focus.js
  11. 79
      test/unit/fixture/search_linguistic_focus_bbox.js
  12. 39
      test/unit/fixture/search_linguistic_only.js
  13. 98
      test/unit/fixture/search_partial_address.js
  14. 190
      test/unit/fixture/search_regions_address.js
  15. 59
      test/unit/fixture/sort_default.js
  16. 189
      test/unit/query/reverse.js
  17. 734
      test/unit/query/search.js
  18. 4
      test/unit/sanitiser/search.js

3
controller/search.js

@ -15,7 +15,8 @@ function setup( backend, query ){
body: query( req.clean )
};
if ( req.clean.hasOwnProperty('type') ) {
// ?
if( req.clean.hasOwnProperty('type') ){
cmd.type = req.clean.type;
}

4
helper/types.js

@ -5,7 +5,7 @@ var valid_types = require( '../query/types' );
*/
var intersection = function intersection(set1, set2) {
return set2.filter(function(value) {
return set1.indexOf(value) !== -1;
return set1.indexOf(value) !== -1;
});
};
@ -38,4 +38,4 @@ module.exports = function calculate_types(clean_types) {
if (clean_types.from_address_parser) {
return clean_types.from_address_parser;
}
};
};

1
package.json

@ -49,6 +49,7 @@
"pelias-config": "^1.0.1",
"pelias-esclient": "0.0.25",
"pelias-logger": "^0.0.8",
"pelias-query": "^1.1.0",
"pelias-schema": "1.0.0",
"pelias-suggester-pipeline": "2.0.2",
"stats-lite": "^1.0.3",

59
query/reverse.js

@ -1,33 +1,52 @@
var queries = require('geopipes-elasticsearch-backend').queries,
var peliasQuery = require('pelias-query'),
sort = require('./sort');
function generate( params ){
//------------------------------
// reverse geocode query
//------------------------------
var query = new peliasQuery.layout.FilteredBooleanQuery();
var centroid = {
lat: params.lat,
lon: params.lon
};
// scoring boost
query.sort( peliasQuery.view.sort_distance );
var query = queries.distance( centroid, {
size: params.size || 1,
sort: true,
distance: '500km'
// non-scoring hard filters
query.filter( peliasQuery.view.boundary_circle );
// --------------------------------
function generateQuery( clean ){
var vs = new peliasQuery.Vars( peliasQuery.defaults );
// set defaults
vs.set({
'size': 1,
'boundary:circle:radius': '500km'
});
query.sort = query.sort.concat( sort( params ) );
// set size
if( clean.size ){
vs.var( 'size', clean.size );
}
if ( params.categories && params.categories.length > 0 ) {
addCategoriesFilter( query, params.categories );
// focus point centroid
if( clean.lat && clean.lon ){
vs.set({
// focus point to score by distance
'focus:point:lat': clean.lat,
'focus:point:lon': clean.lon,
// bounding circle
'boundary:circle:lat': clean.lat,
'boundary:circle:lon': clean.lon,
});
}
return query;
}
var result = query.render( vs );
function addCategoriesFilter( query, categories ) {
query.query.filtered.filter.bool.must.push({
terms: { category: categories }
});
// @todo: remove this hack
return JSON.parse( JSON.stringify( result ) );
}
module.exports = generate;
module.exports = generateQuery;

287
query/search.js

@ -1,188 +1,161 @@
var queries = require('geopipes-elasticsearch-backend').queries,
var peliasQuery = require('pelias-query'),
sort = require('../query/sort'),
adminFields = require('../helper/adminFields')(),
addressWeights = require('../helper/address_weights');
adminFields = require('../helper/adminFields')();
//------------------------------
// general-purpose search query
//------------------------------
var query = new peliasQuery.layout.FilteredBooleanQuery();
function generate( params ){
var centroid = null;
// mandatory matches
query.score( peliasQuery.view.boundary_country, 'must' );
query.score( peliasQuery.view.ngrams, 'must' );
if ( params.lat && params.lon ){
centroid = {
lat: params.lat,
lon: params.lon
};
}
var query = queries.distance( centroid, { size: params.size } );
var text = params.text;
// scoring boost
query.score( peliasQuery.view.phrase );
query.score( peliasQuery.view.focus );
if (params.bbox) {
query = queries.bbox ( centroid, { size: params.size, bbox: params.bbox } );
// address components
query.score( peliasQuery.view.address('housenumber') );
query.score( peliasQuery.view.address('street') );
query.score( peliasQuery.view.address('postcode') );
// admin components
query.score( peliasQuery.view.admin('alpha3') );
query.score( peliasQuery.view.admin('admin0') );
query.score( peliasQuery.view.admin('admin1') );
query.score( peliasQuery.view.admin('admin1_abbr') );
query.score( peliasQuery.view.admin('admin2') );
query.score( peliasQuery.view.admin('local_admin') );
query.score( peliasQuery.view.admin('locality') );
query.score( peliasQuery.view.admin('neighborhood') );
// non-scoring hard filters
query.filter( peliasQuery.view.boundary_circle );
query.filter( peliasQuery.view.boundary_rect );
// --------------------------------
function generateQuery( clean ){
var vs = new peliasQuery.Vars( peliasQuery.defaults );
// set input text
vs.var( 'input:name', clean.text );
// set size
if( clean.size ){
vs.var( 'size', clean.size );
}
query.query.filtered.query = {
'bool': {
'must': [],
'should': []
}
};
if (params.parsed_text) {
// update text
if (params.parsed_text.number && params.parsed_text.street) {
text = params.parsed_text.number + ' ' + params.parsed_text.street;
} else if (params.parsed_text.admin_parts) {
text = params.parsed_text.name;
}
// focus point
if( clean.lat && clean.lon ){
vs.set({
'focus:point:lat': clean.lat,
'focus:point:lon': clean.lon
});
}
addParsedMatch(query, text, params.parsed_text);
// bbox
if( clean.bbox ){
vs.set({
'boundary:rect:top': clean.bbox.top,
'boundary:rect:right': clean.bbox.right,
'boundary:rect:bottom': clean.bbox.bottom,
'boundary:rect:left': clean.bbox.left
});
}
// add search condition to distance query
query.query.filtered.query.bool.must.push({
'match': {
'name.default': {
'query': text,
'analyzer': 'peliasOneEdgeGram'
}
// address parsing
if( clean.parsed_text ){
// is it a street address?
var isStreetAddress = clean.parsed_text.hasOwnProperty('number') && clean.parsed_text.hasOwnProperty('street');
if( isStreetAddress ){
vs.var( 'input:name', clean.parsed_text.number + ' ' + clean.parsed_text.street );
}
});
// add phrase matching query
// note: this is required for shingle/phrase matching
query.query.filtered.query.bool.should.push({
'match': {
'phrase.default': {
'query': text,
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
// I don't understand this
else if( clean.parsed_text.admin_parts ) {
vs.var( 'input:name', clean.parsed_text.name );
}
});
query.sort = query.sort.concat( sort( params ) );
// or this..
else {
console.warn( 'chaos monkey asks: what happens now?' );
console.log( clean );
try{ throw new Error(); } catch(e){ console.error( e.stack ); } // print a stack trace
}
return query;
}
// ==== add parsed matches [address components] ====
/**
* Traverse the parsed text object, containing all the address parts detected in query string.
* Add matches to query for each identifiable component.
*
* @param {Object} query
* @param {string} defaultText
* @param {Object} parsedText
*/
function addParsedMatch(query, defaultText, parsedText) {
query.query.filtered.query.bool.should = query.query.filtered.query.bool.should || [];
// copy expected admin fields so we can remove them as we parse the address
var unmatchedAdminFields = adminFields.slice();
// address
// number, street, postalcode
addMatch(query, unmatchedAdminFields, 'address.number', parsedText.number, addressWeights.number);
addMatch(query, unmatchedAdminFields, 'address.street', parsedText.street, addressWeights.street);
addMatch(query, unmatchedAdminFields, 'address.zip', parsedText.postalcode, addressWeights.zip);
// city
// admin2, locality, local_admin, neighborhood
addMatch(query, unmatchedAdminFields, 'admin2', parsedText.city, addressWeights.admin2);
// state
// admin1, admin1_abbr
addMatch(query, unmatchedAdminFields, 'admin1_abbr', parsedText.state, addressWeights.admin1_abbr);
// country
// admin0, alpha3
addMatch(query, unmatchedAdminFields, 'alpha3', parsedText.country, addressWeights.alpha3);
addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedText, defaultText);
}
// house number
if( clean.parsed_text.hasOwnProperty('number') ){
vs.var( 'input:housenumber', clean.parsed_text.number );
}
/**
* Check for additional admin fields in the parsed input, and if any was found
* combine into single string and match against all unmatched admin fields.
*
* @param {Object} query
* @param {Array} unmatchedAdminFields
* @param {Object} parsedInput
* @param {string} defaultInput
*/
function addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput, defaultInput) {
if (unmatchedAdminFields.length === 0 ) {
return;
}
// street name
if( clean.parsed_text.hasOwnProperty('street') ){
vs.var( 'input:street', clean.parsed_text.street );
}
var leftovers = [];
// postal code
if( clean.parsed_text.hasOwnProperty('postalcode') ){
vs.var( 'input:postcode', clean.parsed_text.postalcode );
}
if (parsedInput.admin_parts) {
leftovers.push(parsedInput.admin_parts);
}
else if (parsedInput.regions) {
leftovers.push(parsedInput.regions);
}
// ==== add parsed matches [admin components] ====
if (leftovers.length === 0) {
return;
}
// city
if( clean.parsed_text.hasOwnProperty('city') ){
vs.var( 'input:admin2', clean.parsed_text.city );
}
leftovers = leftovers.join(' ');
// state
if( clean.parsed_text.hasOwnProperty('state') ){
vs.var( 'input:admin1_abbr', clean.parsed_text.state );
}
// if there are additional regions/admin_parts found
if (leftovers !== defaultInput) {
unmatchedAdminFields.forEach(function (key) {
// combine all the leftover parts into one string
addMatch(query, [], key, leftovers);
});
}
}
// country
if( clean.parsed_text.hasOwnProperty('country') ){
vs.var( 'input:alpha3', clean.parsed_text.country );
}
/**
* Add key:value match to query. Apply boost if specified.
*
* @param {Object} query
* @param {Array} unmatched
* @param {string} key
* @param {string|number|undefined} value
* @param {number|undefined} [boost] optional
*/
function addMatch(query, unmatched, key, value, boost) { // jshint ignore:line
if (typeof value === 'undefined') {
return;
}
// ==== deal with the 'leftover' components ====
// @todo: clean up this code
var match = {};
// a concept called 'leftovers' which is just 'admin_parts' /or 'regions'.
var leftoversString = '';
if( clean.parsed_text.hasOwnProperty('admin_parts') ){
leftoversString = clean.parsed_text.admin_parts;
}
else if( clean.parsed_text.hasOwnProperty('regions') ){
leftoversString = clean.parsed_text.regions.join(' ');
}
if (boost) {
match[key] = {
query: value,
boost: boost
};
}
else {
match[key] = value;
// if we have 'leftovers' then assign them to any fields which
// currently don't have a value assigned.
if( leftoversString.length ){
var unmatchedAdminFields = adminFields.slice();
// cycle through fields and set fields which
// are still currently unset
unmatchedAdminFields.forEach( function( key ){
if( !vs.isset( 'input:' + key ) ){
vs.var( 'input:' + key, leftoversString );
}
});
}
}
query.query.filtered.query.bool.should.push({ 'match': match });
var result = query.render( vs );
removeFromUnmatched(unmatched, key);
}
// @todo: remove unnessesary sort conditions
result.sort = result.sort.concat( sort( clean ) );
/**
* If key is found in unmatched list, remove it from the array
*
* @param {Array} unmatched
* @param {string} key
*/
function removeFromUnmatched(unmatched, key) {
var index = unmatched.indexOf(key);
if (index !== -1) {
unmatched.splice(index, 1);
}
// @todo: remove this hack
return JSON.parse( JSON.stringify( result ) );
}
module.exports = generate;
module.exports = generateQuery;

5
sanitiser/_text.js

@ -1,16 +1,17 @@
var isObject = require('is-object');
var query_parser = require('../helper/query_parser');
// validate inputs, convert types and apply defaults
// validate texts, convert types and apply defaults
function sanitize( req ){
req.clean = req.clean || {};
var params= req.query;
// ensure the input params are a valid object
// ensure the text params are a valid object
if( !isObject( params ) ){
params = {};
}
// text text
if('string' !== typeof params.text || !params.text.length){
return {
'error': true,

43
test/unit/fixture/reverse_standard.js

@ -0,0 +1,43 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {}
},
'filter': {
'bool': {
'must': [
{
'geo_distance': {
'distance': '500km',
'distance_type': 'plane',
'optimize_bbox': 'indexed',
'_cache': true,
'center_point': {
'lat': 29.49136,
'lon': -82.50622
}
}
}
]
}
}
}
},
'sort': [
'_score',
{
'_geo_distance': {
'center_point': {
'lat': 29.49136,
'lon': -82.50622
},
'order': 'asc',
'distance_type': 'plane'
}
}
],
'size': 1,
'track_scores': true
};

206
test/unit/fixture/search_full_address.js

@ -0,0 +1,206 @@
var peliasQuery = require('pelias-query'),
vs = new peliasQuery.Vars( peliasQuery.defaults );
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': '123 main st',
'analyzer': 'peliasOneEdgeGram',
'boost': 1
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': '123 main st',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2,
'boost': 1
}
}
},{
'match': {
'address.number': {
'query': 123,
'boost': vs.var('address:housenumber:boost').get(),
'analyzer': vs.var('address:housenumber:analyzer').get()
}
}
}, {
'match': {
'address.street': {
'query': 'main st',
'boost': vs.var('address:street:boost').get(),
'analyzer': vs.var('address:street:analyzer').get()
}
}
}, {
'match': {
'address.zip': {
'query': 10010,
'boost': vs.var('address:postcode:boost').get(),
'analyzer': vs.var('address:postcode:analyzer').get()
}
}
}, {
'match': {
'alpha3': {
'query': 'USA',
'boost': vs.var('admin:alpha3:boost').get(),
'analyzer': vs.var('admin:alpha3:analyzer').get()
}
}
}, {
'match': {
'admin0': {
'query': 'new york',
'boost': vs.var('admin:admin0:boost').get(),
'analyzer': vs.var('admin:admin0:analyzer').get()
}
}
}, {
'match': {
'admin1': {
'query': 'new york',
'boost': vs.var('admin:admin1:boost').get(),
'analyzer': vs.var('admin:admin1:analyzer').get()
}
}
}, {
'match': {
'admin1_abbr': {
'query': 'NY',
'boost': vs.var('admin:admin1_abbr:boost').get(),
'analyzer': vs.var('admin:admin1_abbr:analyzer').get()
}
}
}, {
'match': {
'admin2': {
'query': 'new york',
'boost': vs.var('admin:admin2:boost').get(),
'analyzer': vs.var('admin:admin2:analyzer').get()
}
}
}, {
'match': {
'local_admin': {
'query': 'new york',
'boost': vs.var('admin:local_admin:boost').get(),
'analyzer': vs.var('admin:local_admin:analyzer').get()
}
}
}, {
'match': {
'locality': {
'query': 'new york',
'boost': vs.var('admin:locality:boost').get(),
'analyzer': vs.var('admin:locality:analyzer').get()
}
}
}, {
'match': {
'neighborhood': {
'query': 'new york',
'boost': vs.var('admin:neighborhood:boost').get(),
'analyzer': vs.var('admin:neighborhood:analyzer').get()
}
}
}]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};

50
test/unit/fixture/search_linguistic_bbox.js

@ -0,0 +1,50 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': 'test',
'boost': 1,
'analyzer': 'peliasOneEdgeGram'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2
}
}
}]
}
},
'filter': {
'bool': {
'must': [{
'geo_bounding_box': {
'center_point': {
'top': 47.47,
'right': -61.84,
'bottom': 11.51,
'left': -103.16
},
'_cache': true,
'type': 'indexed'
}
}]
}
}
}
},
'sort': [ '_sort' ],
'size': 10,
'track_scores': true
};

68
test/unit/fixture/search_linguistic_focus.js

@ -0,0 +1,68 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': 'test',
'boost': 1,
'analyzer': 'peliasOneEdgeGram'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2
}
}
}, {
'function_score': {
'query': {
'match': {
'phrase.default': {
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2,
'query': 'test'
}
}
},
'functions': [{
'linear': {
'center_point': {
'origin': {
'lat': 29.49136,
'lon': -82.50622
},
'offset': '1km',
'scale': '50km',
'decay': 0.5
}
}
}],
'score_mode': 'avg',
'boost_mode': 'replace'
}
}]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'sort': [ '_sort' ],
'size': 10,
'track_scores': true
};

79
test/unit/fixture/search_linguistic_focus_bbox.js

@ -0,0 +1,79 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': 'test',
'boost': 1,
'analyzer': 'peliasOneEdgeGram'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2
}
}
}, {
'function_score': {
'query': {
'match': {
'phrase.default': {
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2,
'query': 'test'
}
}
},
'functions': [{
'linear': {
'center_point': {
'origin': {
'lat': 29.49136,
'lon': -82.50622
},
'offset': '1km',
'scale': '50km',
'decay': 0.5
}
}
}],
'score_mode': 'avg',
'boost_mode': 'replace'
}
}]
}
},
'filter': {
'bool': {
'must': [{
'geo_bounding_box': {
'center_point': {
'top': 47.47,
'right': -61.84,
'bottom': 11.51,
'left': -103.16
},
'_cache': true,
'type': 'indexed'
}
}]
}
}
}
},
'sort': [ '_sort' ],
'size': 10,
'track_scores': true
};

39
test/unit/fixture/search_linguistic_only.js

@ -0,0 +1,39 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': 'test',
'boost': 1,
'analyzer': 'peliasOneEdgeGram'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2
}
}
}]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'sort': [ '_score' ],
'size': 10,
'track_scores': true
};

98
test/unit/fixture/search_partial_address.js

@ -0,0 +1,98 @@
var peliasQuery = require('pelias-query'),
vs = new peliasQuery.Vars( peliasQuery.defaults );
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': 'soho grand',
'analyzer': 'peliasOneEdgeGram',
'boost': 1
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'soho grand',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2,
'boost': 1
}
}
},{
'match': {
'admin0': {
'query': 'new york',
'boost': vs.var('admin:admin0:boost').get(),
'analyzer': vs.var('admin:admin0:analyzer').get()
}
}
}, {
'match': {
'admin1': {
'query': 'new york',
'boost': vs.var('admin:admin1:boost').get(),
'analyzer': vs.var('admin:admin1:analyzer').get()
}
}
}, {
'match': {
'admin1_abbr': {
'query': 'new york',
'boost': vs.var('admin:admin1_abbr:boost').get(),
'analyzer': vs.var('admin:admin1_abbr:analyzer').get()
}
}
}, {
'match': {
'admin2': {
'query': 'new york',
'boost': vs.var('admin:admin2:boost').get(),
'analyzer': vs.var('admin:admin2:analyzer').get()
}
}
}, {
'match': {
'local_admin': {
'query': 'new york',
'boost': vs.var('admin:local_admin:boost').get(),
'analyzer': vs.var('admin:local_admin:analyzer').get()
}
}
}, {
'match': {
'locality': {
'query': 'new york',
'boost': vs.var('admin:locality:boost').get(),
'analyzer': vs.var('admin:locality:analyzer').get()
}
}
}, {
'match': {
'neighborhood': {
'query': 'new york',
'boost': vs.var('admin:neighborhood:boost').get(),
'analyzer': vs.var('admin:neighborhood:analyzer').get()
}
}
}]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [ '_score' ],
'track_scores': true
};

190
test/unit/fixture/search_regions_address.js

@ -0,0 +1,190 @@
var peliasQuery = require('pelias-query'),
vs = new peliasQuery.Vars( peliasQuery.defaults );
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': '1 water st',
'analyzer': 'peliasOneEdgeGram',
'boost': 1
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': '1 water st',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2,
'boost': 1
}
}
},{
'match': {
'address.number': {
'query': 1,
'boost': vs.var('address:housenumber:boost').get(),
'analyzer': vs.var('address:housenumber:analyzer').get()
}
}
}, {
'match': {
'address.street': {
'query': 'water st',
'boost': vs.var('address:street:boost').get(),
'analyzer': vs.var('address:street:analyzer').get()
}
}
}, {
'match': {
'admin0': {
'query': 'manhattan',
'boost': vs.var('admin:admin0:boost').get(),
'analyzer': vs.var('admin:admin0:analyzer').get()
}
}
}, {
'match': {
'admin1': {
'query': 'manhattan',
'boost': vs.var('admin:admin1:boost').get(),
'analyzer': vs.var('admin:admin1:analyzer').get()
}
}
}, {
'match': {
'admin1_abbr': {
'query': 'NY',
'boost': vs.var('admin:admin1_abbr:boost').get(),
'analyzer': vs.var('admin:admin1_abbr:analyzer').get()
}
}
}, {
'match': {
'admin2': {
'query': 'manhattan',
'boost': vs.var('admin:admin2:boost').get(),
'analyzer': vs.var('admin:admin2:analyzer').get()
}
}
}, {
'match': {
'local_admin': {
'query': 'manhattan',
'boost': vs.var('admin:local_admin:boost').get(),
'analyzer': vs.var('admin:local_admin:analyzer').get()
}
}
}, {
'match': {
'locality': {
'query': 'manhattan',
'boost': vs.var('admin:locality:boost').get(),
'analyzer': vs.var('admin:locality:analyzer').get()
}
}
}, {
'match': {
'neighborhood': {
'query': 'manhattan',
'boost': vs.var('admin:neighborhood:boost').get(),
'analyzer': vs.var('admin:neighborhood:analyzer').get()
}
}
}]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};

59
test/unit/fixture/sort_default.js

@ -0,0 +1,59 @@
var category_weights = require('../../../helper/category_weights');
var admin_weights = require('../../../helper/admin_weights');
var weights = require('pelias-suggester-pipeline').weights;
module.exports = [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': admin_weights
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': category_weights.default
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': weights
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
];

189
test/unit/query/reverse.js

@ -1,15 +1,16 @@
var generate = require('../../../query/reverse');
var admin_boost = 'admin_boost';
var population = 'population';
var popularity = 'popularity';
var category = 'category';
var category_weights = require('../../../helper/category_weights');
var admin_weights = require('../../../helper/admin_weights');
var weights = require('pelias-suggester-pipeline').weights;
module.exports.tests = {};
function debug( a,b ){
console.log( '----------------------' );
console.log( JSON.stringify( a, null, 2 ) );
console.log( '----------------------' );
console.log( JSON.stringify( b, null, 2 ) );
console.log( '----------------------' );
}
module.exports.tests.interface = function(test, common) {
test('valid interface', function(t) {
t.equal(typeof generate, 'function', 'valid function');
@ -17,185 +18,25 @@ module.exports.tests.interface = function(test, common) {
});
};
var sort = [
'_score',
{
'_script': {
'file': admin_boost,
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': popularity,
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': population,
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': admin_weights
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': category_weights.default
},
'file': category,
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': weights
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
];
module.exports.tests.query = function(test, common) {
test('valid query', function(t) {
var query = generate({
lat: 29.49136, lon: -82.50622
});
var expected = {
'query': {
'filtered': {
'query': {
'match_all': {}
},
'filter': {
'bool': {
'must': [
{
'geo_distance': {
'distance': '500km',
'distance_type': 'plane',
'optimize_bbox': 'indexed',
'_cache': true,
'center_point': {
'lat': '29.49',
'lon': '-82.51'
}
}
}
]
}
}
}
},
'sort': [
'_score',
{
'_geo_distance': {
'center_point': {
'lat': 29.49136,
'lon': -82.50622
},
'order': 'asc',
'unit': 'km'
}
}
].concat(sort.slice(1)),
'size': 1,
'track_scores': true
};
var expected = require('../fixture/reverse_standard');
t.deepEqual(query, expected, 'valid reverse query');
t.end();
});
test('size fuzz test', function(t) {
// test different sizes
var sizes = [1,2,10,undefined,null];
sizes.forEach( function(size) {
query = generate({
sizes.forEach( function( size ){
var query = generate({
lat: 29.49136, lon: -82.50622, size: size
});
expected.size = size ? size : 1;
t.deepEqual(query, expected, 'valid reverse query for size: '+ size);
});
t.end();
});
test('valid query with categories', function(t) {
var params = { lat: 29.49136, lon: -82.50622, categories: ['food', 'education', 'entertainment'] };
var query = generate(params);
var expected = {
'query': {
'filtered': {
'query': {
'match_all': {}
},
'filter': {
'bool': {
'must': [
{
'geo_distance': {
'distance': '500km',
'distance_type': 'plane',
'optimize_bbox': 'indexed',
'_cache': true,
'center_point': {
'lat': '29.49',
'lon': '-82.51'
}
}
},
{
'terms': {
'category': params.categories
}
}
]
}
}
}
},
'sort': [
'_score',
{
'_geo_distance': {
'center_point': {
'lat': 29.49136,
'lon': -82.50622
},
'order': 'asc',
'unit': 'km'
}
}
].concat(sort.slice(1)),
'size': 1,
'track_scores': true
};
t.deepEqual(query, expected, 'valid reverse query with categories');
// test different sizes
var sizes = [1,2,10,undefined,null];
sizes.forEach( function(size) {
params.size = size;
query = generate(params);
expected.size = size ? size : 1;
t.deepEqual(query, expected, 'valid reverse query for size: '+ size);
t.equal( query.size, size ? size : 1, 'valid reverse query for size: '+ size);
});
t.end();
});

734
test/unit/query/search.js

@ -20,113 +20,10 @@ module.exports.tests.interface = function(test, common) {
});
};
var sort = [
'_score',
{
'_script': {
'file': admin_boost,
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': popularity,
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': population,
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': admin_weights
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': category_weights.default
},
'file': category,
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': weights
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
];
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': 'test',
'analyzer': 'peliasOneEdgeGram'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
}
}]
}
},
'filter': {
'bool': {
'must': [
{
'geo_bounding_box': {
'center_point': {
'top': '47.47',
'right': '-61.84',
'bottom':'11.51',
'left': '-103.16'
},
'_cache': true,
'type': 'indexed'
}
}
]
}
}
}
},
'sort': sort,
'size': 10,
'track_scores': true
};
var sort = require('../fixture/sort_default');
module.exports.tests.query = function(test, common) {
test('valid query', function(t) {
test('valid search + focus + bbox', function(t) {
var query = generate({
text: 'test', size: 10,
lat: 29.49136, lon: -82.50622,
@ -139,11 +36,14 @@ module.exports.tests.query = function(test, common) {
layers: ['test']
});
var expected = require('../fixture/search_linguistic_focus_bbox');
expected.sort = sort;
t.deepEqual(query, expected, 'valid search query');
t.end();
});
test('valid query without lat/lon', function(t) {
test('valid search + bbox', function(t) {
var query = generate({
text: 'test', size: 10,
bbox: {
@ -154,114 +54,36 @@ module.exports.tests.query = function(test, common) {
},
layers: ['test']
});
var expected = require('../fixture/search_linguistic_bbox');
expected.sort = sort;
t.deepEqual(query, expected, 'valid search query');
t.end();
});
test('valid query with no lat/lon and no bbox', function(t) {
test('valid lingustic-only search', function(t) {
var query = generate({
text: 'test', size: 10,
layers: ['test']
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': 'test',
'analyzer': 'peliasOneEdgeGram'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
}
}]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': sort,
'track_scores': true
};
var expected = require('../fixture/search_linguistic_only');
expected.sort = sort;
t.deepEqual(query, expected, 'valid search query');
t.end();
});
test('valid query without bbox', function(t) {
test('search search + focus', function(t) {
var query = generate({
text: 'test', size: 10,
lat: 29.49136, lon: -82.50622,
layers: ['test']
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': 'test',
'analyzer': 'peliasOneEdgeGram'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
}
}]
}
},
'filter': {
'bool': {
'must': [
{
'geo_distance': {
'distance': '50km',
'distance_type': 'plane',
'optimize_bbox': 'indexed',
'_cache': true,
'center_point': {
'lat': '29.49',
'lon': '-82.51'
}
}
}
]
}
}
}
},
'sort': ['_score'].concat(sort.slice(1)),
'size': 10,
'track_scores': true
};
var expected = require('../fixture/search_linguistic_focus');
expected.sort = sort;
t.deepEqual(query, expected, 'valid search query');
t.end();
@ -269,367 +91,32 @@ module.exports.tests.query = function(test, common) {
test('valid query with a full valid address', function(t) {
var address = '123 main st new york ny 10010 US';
var query = generate({ text: address,
var query = generate({ text: address,
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10,
details: true,
parsed_text: parser.get_parsed_address(address)
parsed_text: parser.get_parsed_address(address),
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'name.default': {
'query': '123 main st',
'analyzer': 'peliasOneEdgeGram'
}
}
}
],
'should': [
{
'match': {
'address.number': {
'query': 123,
'boost': address_weights.number
}
}
},
{
'match': {
'address.street': {
'query': 'main st',
'boost': address_weights.street
}
}
},
{
'match': {
'address.zip': {
'query': 10010,
'boost': address_weights.zip
}
}
},
{
'match': {
'admin1_abbr': {
'query': 'NY',
'boost': address_weights.admin1_abbr
}
}
},
{
'match': {
'alpha3': {
'query': 'USA',
'boost': address_weights.alpha3
}
}
},
{
match: {
admin0: 'new york'
}
},
{
match: {
admin1: 'new york'
}
},
{
match: {
admin2: 'new york'
}
},
{
match: {
local_admin: 'new york'
}
},
{
match: {
locality: 'new york'
}
},
{
match: {
neighborhood: 'new york'
}
},
{
'match': {
'phrase.default': {
'query': '123 main st',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
}
}
]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};
var expected = require('../fixture/search_full_address');
t.deepEqual(query, expected, 'valid search query');
t.end();
});
test('valid query with partial address', function(t) {
var partial_address = 'soho grand, new york';
var query = generate({ text: partial_address,
var query = generate({ text: partial_address,
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10,
details: true,
parsed_text: parser.get_parsed_address(partial_address)
parsed_text: parser.get_parsed_address(partial_address),
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'name.default': {
'query': 'soho grand',
'analyzer': 'peliasOneEdgeGram'
}
}
}
],
'should': [
{
'match': {
'admin0': 'new york'
}
},
{
'match': {
'admin1': 'new york'
}
},
{
'match': {
'admin1_abbr': 'new york'
}
},
{
'match': {
'admin2': 'new york'
}
},
{
'match': {
'local_admin': 'new york'
}
},
{
'match': {
'locality': 'new york'
}
},
{
'match': {
'neighborhood': 'new york'
}
},
{
'match': {
'phrase.default': {
'query': 'soho grand',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
}
}
]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2,
'admin': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};
var expected = require('../fixture/search_partial_address');
expected.sort = sort;
t.deepEqual(query, expected, 'valid search query');
t.end();
@ -642,179 +129,10 @@ module.exports.tests.query = function(test, common) {
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10,
details: true,
parsed_text: parser.get_parsed_address(partial_address)
parsed_text: parser.get_parsed_address(partial_address),
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'name.default': {
'query': '1 water st',
'analyzer': 'peliasOneEdgeGram'
}
}
}
],
'should': [
{
'match': {
'address.number': {
'query': 1,
'boost': address_weights.number
}
}
},
{
'match': {
'address.street': {
'query': 'water st',
'boost': address_weights.street
}
}
},
{
'match': {
'admin1_abbr': {
'query': 'NY',
'boost': address_weights.admin1_abbr
}
}
},
{
'match': {
'admin0': 'manhattan'
}
},
{
'match': {
'admin1': 'manhattan'
}
},
{
'match': {
'admin2': 'manhattan'
}
},
{
'match': {
'local_admin': 'manhattan'
}
},
{
'match': {
'locality': 'manhattan'
}
},
{
'match': {
'neighborhood': 'manhattan'
}
},
{
'match': {
'phrase.default': {
'query': '1 water st',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
}
}
]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};
var expected = require('../fixture/search_regions_address');
t.deepEqual(query, expected, 'valid search query');
t.end();

4
test/unit/sanitiser/search.js

@ -12,7 +12,7 @@ var search = require('../../../sanitiser/search'),
},
size: 10,
details: true,
parsed_text: defaultParsed
parsed_text: defaultParsed,
},
sanitize = function(query, cb) { _sanitize({'query':query}, cb); };
@ -70,7 +70,7 @@ module.exports.tests.sanitise_valid_text = function(test, common) {
module.exports.tests.sanitize_text_with_delim = function(test, common) {
var texts = [ 'a,bcd', '123 main st, admin1', ',,,', ' ' ];
test('valid texts with a comma', function(t) {
test('valid texts with a comma', function(t) {
texts.forEach( function( text ){
sanitize({ text: text }, function( err, clean ){
var expected = JSON.parse(JSON.stringify( defaultClean ));

Loading…
Cancel
Save