Browse Source

Merge pull request #192 from pelias/admin-fields

Search query builder not checking all available admin values
pull/206/head
Diana Shkolnikov 10 years ago
parent
commit
7f265c0f67
  1. 13
      helper/address_weights.js
  2. 42
      helper/adminFields.js
  3. 9
      helper/category_weights.js
  4. 5
      package.json
  5. 186
      query/search.js
  6. 11
      query/sort.js
  7. 84
      test/unit/helper/adminFields.js
  8. 2
      test/unit/query/reverse.js
  9. 259
      test/unit/query/search.js
  10. 2
      test/unit/query/sort.js
  11. 3
      test/unit/run.js

13
helper/address_weights.js

@ -0,0 +1,13 @@
/**
* These values specify how much a document that matches certain parts of an address
* should be boosted in elasticsearch results.
*/
module.exports = {
number: 1,
street: 3,
zip: 3,
admin2: 2,
admin1_abbr: 3,
alpha3: 5
};

42
helper/adminFields.js

@ -0,0 +1,42 @@
var peliasSchema = require('pelias-schema');
var peliasLogger = require( 'pelias-logger' ).get( 'api' );
var ADMIN_FIELDS = [
'admin0',
'admin1',
'admin1_abbr',
'admin2',
'local_admin',
'locality',
'neighborhood'
];
/**
* Get all admin fields that were expected and also found in schema
*
* @param {Object} [schema] optional: for testing only
* @param {Array} [expectedFields] optional: for testing only
* @param {Object} [logger] optional: for testing only
* @returns {Array.<string>}
*/
function getAvailableAdminFields(schema, expectedFields, logger) {
schema = schema || peliasSchema;
expectedFields = expectedFields || ADMIN_FIELDS;
logger = logger || peliasLogger;
var actualFields = Object.keys(schema.mappings._default_.properties);
// check if expected fields are actually in current schema
var available = expectedFields.filter(function (field) {
return (actualFields.indexOf(field) !== -1);
});
if (available.length === 0) {
logger.error('helper/adminFields: no expected admin fields found in schema');
}
return available;
}
module.exports = getAvailableAdminFields;

9
helper/category_weights.js

@ -3,7 +3,14 @@
* should be boosted in elasticsearch results.
*/
module.exports = {
module.exports.default = {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2,
'admin': 2
};
module.exports.address = {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2

5
package.json

@ -33,9 +33,11 @@
"elasticsearch": ">=1.2.1"
},
"dependencies": {
"addressit": "1.3.0",
"async": "^0.9.0",
"cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve",
"express": "^4.8.8",
"extend": "2.0.1",
"geojson": "^0.2.1",
"geojson-extent": "^0.3.1",
"geopipes-elasticsearch-backend": "^0.2.0",
@ -44,10 +46,9 @@
"microtime": "1.4.0",
"morgan": "1.5.2",
"pelias-config": "^0.1.4",
"extend": "2.0.1",
"addressit": "1.3.0",
"pelias-esclient": "0.0.25",
"pelias-logger": "^0.0.8",
"pelias-schema": "1.0.0",
"pelias-suggester-pipeline": "2.0.2",
"through2": "0.6.5"
},

186
query/search.js

@ -1,6 +1,9 @@
var queries = require('geopipes-elasticsearch-backend').queries,
sort = require('../query/sort');
sort = require('../query/sort'),
adminFields = require('../helper/adminFields')(),
addressWeights = require('../helper/address_weights');
function generate( params ){
var centroid = null;
@ -27,23 +30,6 @@ function generate( params ){
};
if (params.parsed_input) {
query.query.filtered.query.bool.should = [];
var unmatched_admin_fields = [];
// qb stands for query builder
var qb = function(unmatched_admin_fields, value) {
if (value) {
unmatched_admin_fields.forEach(function(admin_field) {
var match = {};
match[admin_field] = value;
query.query.filtered.query.bool.should.push({
'match': match
});
});
}
};
// update input
if (params.parsed_input.number && params.parsed_input.street) {
input = params.parsed_input.number + ' ' + params.parsed_input.street;
@ -51,52 +37,7 @@ function generate( params ){
input = params.parsed_input.name;
}
// address
// number, street, postalcode
if (params.parsed_input.number) {
qb(['address.number'], params.parsed_input.number);
}
if (params.parsed_input.street) {
qb(['address.street'], params.parsed_input.street);
}
if (params.parsed_input.postalcode) {
qb(['address.zip'], params.parsed_input.postalcode);
}
// city
// admin2, locality, local_admin, neighborhood
if (params.parsed_input.city) {
qb(['admin2'], params.parsed_input.admin2);
} else {
unmatched_admin_fields.push('admin2');
}
// state
// admin1, admin1_abbr
if (params.parsed_input.state) {
qb(['admin1_abbr'], params.parsed_input.state);
} else {
unmatched_admin_fields.push('admin1', 'admin1_abbr');
}
// country
// admin0, alpha3
if (params.parsed_input.country) {
qb(['alpha3'], params.parsed_input.country);
} else {
unmatched_admin_fields.push('admin0', 'alpha3');
}
var input_regions = params.parsed_input.regions ? params.parsed_input.regions.join(' ') : undefined;
// if no address was identified and input suggests some admin info in it
if (unmatched_admin_fields.length === 5 && input_regions !== params.input) {
if (params.parsed_input.admin_parts) {
qb(unmatched_admin_fields, params.parsed_input.admin_parts);
} else {
qb(unmatched_admin_fields, input_regions);
}
}
addParsedMatch(query, input, params.parsed_input);
}
// add search condition to distance query
@ -119,4 +60,121 @@ function generate( params ){
return query;
}
/**
* Traverse the parsed input object, containing all the address parts detected in query string.
* Add matches to query for each identifiable component.
*
* @param {Object} query
* @param {string} defaultInput
* @param {Object} parsedInput
*/
function addParsedMatch(query, defaultInput, parsedInput) {
query.query.filtered.query.bool.should = query.query.filtered.query.bool.should || [];
// copy expected admin fields so we can remove them as we parse the address
var unmatchedAdminFields = adminFields.slice();
// address
// number, street, postalcode
addMatch(query, unmatchedAdminFields, 'address.number', parsedInput.number, addressWeights.number);
addMatch(query, unmatchedAdminFields, 'address.street', parsedInput.street, addressWeights.street);
addMatch(query, unmatchedAdminFields, 'address.zip', parsedInput.postalcode, addressWeights.zip);
// city
// admin2, locality, local_admin, neighborhood
addMatch(query, unmatchedAdminFields, 'admin2', parsedInput.city, addressWeights.admin2);
// state
// admin1, admin1_abbr
addMatch(query, unmatchedAdminFields, 'admin1_abbr', parsedInput.state, addressWeights.admin1_abbr);
// country
// admin0, alpha3
addMatch(query, unmatchedAdminFields, 'alpha3', parsedInput.country, addressWeights.alpha3);
addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput, defaultInput);
}
/**
* Check for additional admin fields in the parsed input, and if any was found
* combine into single string and match against all unmatched admin fields.
*
* @param {Object} query
* @param {Array} unmatchedAdminFields
* @param {Object} parsedInput
* @param {string} defaultInput
*/
function addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput, defaultInput) {
if (unmatchedAdminFields.length === 0 ) {
return;
}
var leftovers = [];
if (parsedInput.admin_parts) {
leftovers.push(parsedInput.admin_parts);
}
else if (parsedInput.regions) {
leftovers.push(parsedInput.regions);
}
if (leftovers.length === 0) {
return;
}
leftovers = leftovers.join(' ');
// if there are additional regions/admin_parts found
if (leftovers !== defaultInput) {
unmatchedAdminFields.forEach(function (key) {
// combine all the leftover parts into one string
addMatch(query, [], key, leftovers);
});
}
}
/**
* Add key:value match to query. Apply boost if specified.
*
* @param {Object} query
* @param {Array} unmatched
* @param {string} key
* @param {string|number|undefined} value
* @param {number|undefined} [boost] optional
*/
function addMatch(query, unmatched, key, value, boost) { // jshint ignore:line
if (typeof value === 'undefined') {
return;
}
var match = {};
if (boost) {
match[key] = {
query: value,
boost: boost
};
}
else {
match[key] = value;
}
query.query.filtered.query.bool.should.push({ 'match': match });
removeFromUnmatched(unmatched, key);
}
/**
* If key is found in unmatched list, remove it from the array
*
* @param {Array} unmatched
* @param {string} key
*/
function removeFromUnmatched(unmatched, key) {
var index = unmatched.indexOf(key);
if (index !== -1) {
unmatched.splice(index, 1);
}
}
module.exports = generate;

11
query/sort.js

@ -43,7 +43,7 @@ module.exports = function( params ){
{
'_script': {
'params': {
'category_weights': category_weights
'category_weights': getCategoryWeights(params)
},
'file': category,
'type': 'number',
@ -64,3 +64,12 @@ module.exports = function( params ){
return scriptsConfig;
};
function getCategoryWeights(params) {
if (params && params.hasOwnProperty('parsed_input') &&
(params.parsed_input.hasOwnProperty('number') ||
params.parsed_input.hasOwnProperty('street'))) {
return category_weights.address;
}
return category_weights.default;
}

84
test/unit/helper/adminFields.js

@ -0,0 +1,84 @@
var adminFields = require('../../../helper/adminFields');
module.exports.tests = {};
module.exports.tests.interface = function(test, common) {
test('validate fields', function(t) {
t.assert(adminFields instanceof Function, 'adminFields is a function');
t.assert(adminFields() instanceof Array, 'adminFields() returns an array');
t.assert(adminFields().length > 0, 'adminFields array is not empty');
t.end();
});
};
module.exports.tests.lookupExistance = function(test, common) {
test('all expected fields in schema', function(t) {
var expectedFields = [
'one',
'two',
'three',
'four'
];
var schema = { mappings: { _default_: { properties: {} } } };
// inject all expected fields into schema mock
expectedFields.forEach(function (field) {
schema.mappings._default_.properties[field] = {};
});
var res = adminFields(schema, expectedFields);
t.deepEquals(res, expectedFields, 'all expected fields are returned');
t.end();
});
test('some expected fields in schema', function(t) {
var expectedFields = [
'one',
'two',
'three',
'four'
];
var schema = { mappings: { _default_: { properties: {} } } };
// inject only some of the expected fields into schema mock
expectedFields.slice(0, 3).forEach(function (field) {
schema.mappings._default_.properties[field] = {};
});
var res = adminFields(schema, expectedFields);
t.deepEquals(res, expectedFields.slice(0, 3), 'only matching expected fields are returned');
t.end();
});
test('no expected fields in schema', function(t) {
var schema = { mappings: { _default_: { properties: { foo: {} } } } };
var logErrorCalled = false;
var logger = {
error: function () {
logErrorCalled = true;
}};
var res = adminFields(schema, undefined, logger);
t.deepEquals(res, [], 'no admin fields found');
t.assert(logErrorCalled, 'log error called');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('adminFields: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

2
test/unit/query/reverse.js

@ -53,7 +53,7 @@ var sort = [
{
'_script': {
'params': {
'category_weights': category_weights
'category_weights': category_weights.default
},
'file': category,
'type': 'number',

259
test/unit/query/search.js

@ -7,8 +7,10 @@ var category = 'category';
var parser = require('../../../helper/query_parser');
var category_weights = require('../../../helper/category_weights');
var admin_weights = require('../../../helper/admin_weights');
var address_weights = require('../../../helper/address_weights');
var weights = require('pelias-suggester-pipeline').weights;
module.exports.tests = {};
module.exports.tests.interface = function(test, common) {
@ -54,7 +56,7 @@ var sort = [
{
'_script': {
'params': {
'category_weights': category_weights
'category_weights': category_weights.default
},
'file': category,
'type': 'number',
@ -267,27 +269,72 @@ module.exports.tests.query = function(test, common) {
'should': [
{
'match': {
'address.number': 123
'address.number': {
'query': 123,
'boost': address_weights.number
}
}
},
{
'match': {
'address.street': 'main st'
'address.street': {
'query': 'main st',
'boost': address_weights.street
}
}
},
{
'match': {
'address.zip': 10010
'address.zip': {
'query': 10010,
'boost': address_weights.zip
}
}
},
{
'match': {
'admin1_abbr': 'NY'
'admin1_abbr': {
'query': 'NY',
'boost': address_weights.admin1_abbr
}
}
},
{
'match': {
'alpha3': 'USA'
'alpha3': {
'query': 'USA',
'boost': address_weights.alpha3
}
}
},
{
match: {
admin0: 'new york'
}
},
{
match: {
admin1: 'new york'
}
},
{
match: {
admin2: 'new york'
}
},
{
match: {
local_admin: 'new york'
}
},
{
match: {
locality: 'new york'
}
},
{
match: {
neighborhood: 'new york'
}
},
{
@ -416,7 +463,7 @@ module.exports.tests.query = function(test, common) {
'should': [
{
'match': {
'admin2': 'new york'
'admin0': 'new york'
}
},
{
@ -431,12 +478,22 @@ module.exports.tests.query = function(test, common) {
},
{
'match': {
'admin0': 'new york'
'admin2': 'new york'
}
},
{
'match': {
'local_admin': 'new york'
}
},
{
'match': {
'locality': 'new york'
}
},
{
'match': {
'alpha3': 'new york'
'neighborhood': 'new york'
}
},
{
@ -501,7 +558,8 @@ module.exports.tests.query = function(test, common) {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
'transport:air:airport': 2,
'admin': 2
}
},
'file': 'category',
@ -534,10 +592,189 @@ module.exports.tests.query = function(test, common) {
],
'track_scores': true
};
t.deepEqual(query, expected, 'valid search query');
t.end();
});
test('valid query with regions in address', function(t) {
var partial_address = '1 water st manhattan ny';
var query = generate({ input: partial_address,
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10,
details: true,
parsed_input: parser(partial_address),
default_layers_set: true
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'name.default': '1 water st'
}
}
],
'should': [
{
match: {
'address.number': {
'query': 1,
'boost': address_weights.number
}
}
},
{
match: {
'address.street': {
'query': 'water st',
'boost': address_weights.street
}
}
},
{
'match': {
'admin1_abbr': {
'query': 'NY',
'boost': address_weights.admin1_abbr
}
}
},
{
'match': {
'admin0': 'manhattan'
}
},
{
'match': {
'admin1': 'manhattan'
}
},
{
'match': {
'admin2': 'manhattan'
}
},
{
match: {
local_admin: 'manhattan'
}
},
{
match: {
locality: 'manhattan'
}
},
{
match: {
neighborhood: 'manhattan'
}
},
{
'match': {
'phrase.default': '1 water st'
}
}
]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};
t.deepEqual(query, expected, 'valid search query');
t.end();
});
};
module.exports.all = function (tape, common) {

2
test/unit/query/sort.js

@ -53,7 +53,7 @@ var expected = [
{
'_script': {
'params': {
'category_weights': category_weights
'category_weights': category_weights.default
},
'file': category,
'type': 'number',

3
test/unit/run.js

@ -19,7 +19,8 @@ var tests = [
require('./query/reverse'),
require('./helper/query_parser'),
require('./helper/geojsonify'),
require('./helper/outputSchema')
require('./helper/outputSchema'),
require('./helper/adminFields'),
];
tests.map(function(t) {

Loading…
Cancel
Save