Browse Source

Search query builder not checking all available admin values

Fixes #187
pull/192/head
Diana Shkolnikov 9 years ago
parent
commit
ab436d5dfb
  1. 13
      helper/address_weights.js
  2. 30
      helper/adminFields.js
  3. 9
      helper/category_weights.js
  4. 5
      package.json
  5. 126
      query/search.js
  6. 11
      query/sort.js
  7. 84
      test/unit/helper/adminFields.js
  8. 2
      test/unit/query/reverse.js
  9. 253
      test/unit/query/search.js
  10. 2
      test/unit/query/sort.js
  11. 3
      test/unit/run.js

13
helper/address_weights.js

@ -0,0 +1,13 @@
/**
* These values specify how much a document that matches certain parts of an address
* should be boosted in elasticsearch results.
*/
module.exports = {
number: 1,
street: 3,
zip: 3,
admin2: 2,
admin1_abbr: 3,
alpha3: 5
};

30
helper/adminFields.js

@ -0,0 +1,30 @@
var schema = require('pelias-schema');
var logger = require( 'pelias-logger' ).get( 'api' );
var ADMIN_FIELDS = [
'admin0',
'admin1',
'admin1_abbr',
'admin2',
'local_admin',
'locality',
'neighborhood'
];
function getAvailableAdminFields() {
var actualFields = Object.keys(schema.mappings._default_.properties);
// check if expected fields are actually in current schema
var available = ADMIN_FIELDS.filter(function (field) {
return (actualFields.indexOf(field) !== -1);
});
if (available.length === 0) {
logger.error('helper/adminFields: no expected admin fields found in schema');
}
return available;
}
module.exports.availableFields = getAvailableAdminFields();
module.exports.expectedFields = ADMIN_FIELDS;

9
helper/category_weights.js

@ -3,7 +3,14 @@
* should be boosted in elasticsearch results.
*/
module.exports = {
module.exports.default = {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2,
'admin': 2
};
module.exports.address = {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2

5
package.json

@ -33,9 +33,11 @@
"elasticsearch": ">=1.2.1"
},
"dependencies": {
"addressit": "1.3.0",
"async": "^0.9.0",
"cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve",
"express": "^4.8.8",
"extend": "2.0.1",
"geojson": "^0.2.1",
"geojson-extent": "^0.3.1",
"geopipes-elasticsearch-backend": "^0.2.0",
@ -44,10 +46,9 @@
"microtime": "1.4.0",
"morgan": "1.5.2",
"pelias-config": "^0.1.4",
"extend": "2.0.1",
"addressit": "1.3.0",
"pelias-esclient": "0.0.25",
"pelias-logger": "^0.0.8",
"pelias-schema": "1.0.0",
"pelias-suggester-pipeline": "2.0.2",
"through2": "0.6.5"
},

126
query/search.js

@ -1,6 +1,9 @@
var queries = require('geopipes-elasticsearch-backend').queries,
sort = require('../query/sort');
sort = require('../query/sort'),
adminFields = require('../helper/adminFields').availableFields,
addressWeights = require('../helper/address_weights');
function generate( params ){
var centroid = null;
@ -27,22 +30,7 @@ function generate( params ){
};
if (params.parsed_input) {
query.query.filtered.query.bool.should = [];
var unmatched_admin_fields = [];
// qb stands for query builder
var qb = function(unmatched_admin_fields, value) {
if (value) {
unmatched_admin_fields.forEach(function(admin_field) {
var match = {};
match[admin_field] = value;
query.query.filtered.query.bool.should.push({
'match': match
});
});
}
};
addParsedMatch(query, input, params.parsed_input);
// update input
if (params.parsed_input.number && params.parsed_input.street) {
@ -50,73 +38,89 @@ function generate( params ){
} else if (params.parsed_input.admin_parts) {
input = params.parsed_input.name;
}
// address
// number, street, postalcode
if (params.parsed_input.number) {
qb(['address.number'], params.parsed_input.number);
}
if (params.parsed_input.street) {
qb(['address.street'], params.parsed_input.street);
// add search condition to distance query
query.query.filtered.query.bool.must.push({
'match': {
'name.default': input
}
if (params.parsed_input.postalcode) {
qb(['address.zip'], params.parsed_input.postalcode);
});
// add phrase matching query
// note: this is required for shingle/phrase matching
query.query.filtered.query.bool.should.push({
'match': {
'phrase.default': input
}
});
query.sort = query.sort.concat( sort( params ) );
return query;
}
function addParsedMatch(query, defaultInput, parsedInput) {
query.query.filtered.query.bool.should = [];
// copy expected admin fields so we can remove them as we parse the address
var unmatchedAdminFields = adminFields.slice();
// address
// number, street, postalcode
addMatch(query, unmatchedAdminFields, 'address.number', parsedInput.number, addressWeights.number);
addMatch(query, unmatchedAdminFields, 'address.street', parsedInput.street, addressWeights.street);
addMatch(query, unmatchedAdminFields, 'address.zip', parsedInput.postalcode, addressWeights.zip);
// city
// admin2, locality, local_admin, neighborhood
if (params.parsed_input.city) {
qb(['admin2'], params.parsed_input.admin2);
} else {
unmatched_admin_fields.push('admin2');
}
addMatch(query, unmatchedAdminFields, 'admin2', parsedInput.admin2, addressWeights.admin2);
// state
// admin1, admin1_abbr
if (params.parsed_input.state) {
qb(['admin1_abbr'], params.parsed_input.state);
} else {
unmatched_admin_fields.push('admin1', 'admin1_abbr');
}
addMatch(query, unmatchedAdminFields, 'admin1_abbr', parsedInput.state, addressWeights.admin1_abbr);
// country
// admin0, alpha3
if (params.parsed_input.country) {
qb(['alpha3'], params.parsed_input.country);
} else {
unmatched_admin_fields.push('admin0', 'alpha3');
}
addMatch(query, unmatchedAdminFields, 'alpha3', parsedInput.country, addressWeights.alpha3);
var input_regions = params.parsed_input.regions ? params.parsed_input.regions.join(' ') : undefined;
var inputRegions = parsedInput.regions ? parsedInput.regions.join(' ') : undefined;
// if no address was identified and input suggests some admin info in it
if (unmatched_admin_fields.length === 5 && input_regions !== params.input) {
if (params.parsed_input.admin_parts) {
qb(unmatched_admin_fields, params.parsed_input.admin_parts);
} else {
qb(unmatched_admin_fields, input_regions);
if (unmatchedAdminFields.length > 0 && inputRegions !== defaultInput) {
unmatchedAdminFields.forEach(function (key) {
if (parsedInput.admin_parts) {
addMatch(query, [], key, parsedInput.admin_parts);
}
else {
addMatch(query, [], key, inputRegions);
}
});
}
}
// add search condition to distance query
query.query.filtered.query.bool.must.push({
'match': {
'name.default': input
function addMatch(query, unmatched, key, value, boost) { // jshint ignore:line
if (typeof value === 'undefined') {
return;
}
});
// add phrase matching query
// note: this is required for shingle/phrase matching
query.query.filtered.query.bool.should.push({
'match': {
'phrase.default': input
var match = {};
if (boost) {
match[key] = {
query: value,
boost: boost
};
}
else {
match[key] = value;
}
});
query.sort = query.sort.concat( sort( params ) );
query.query.filtered.query.bool.should.push({ 'match': match });
return query;
var index = unmatched.indexOf(key);
if (index !== -1) {
unmatched.splice(index, 1);
}
}
module.exports = generate;

11
query/sort.js

@ -43,7 +43,7 @@ module.exports = function( params ){
{
'_script': {
'params': {
'category_weights': category_weights
'category_weights': getCategoryWeights(params)
},
'file': category,
'type': 'number',
@ -64,3 +64,12 @@ module.exports = function( params ){
return scriptsConfig;
};
function getCategoryWeights(params) {
if (params && params.hasOwnProperty('parsed_input') &&
(params.parsed_input.hasOwnProperty('number') ||
params.parsed_input.hasOwnProperty('street'))) {
return category_weights.address;
}
return category_weights.default;
}

84
test/unit/helper/adminFields.js

@ -0,0 +1,84 @@
var proxyquire = require('proxyquire');
module.exports.tests = {};
module.exports.tests.interface = function(test, common) {
test('validate fields', function(t) {
var adminFields = require('../../../helper/adminFields').availableFields;
t.assert(adminFields instanceof Array, 'adminFields is an array');
t.assert(adminFields.length > 0, 'adminFields array is not empty');
t.end();
});
test('validate fields', function(t) {
var adminFields = require('../../../helper/adminFields').expectedFields;
t.assert(adminFields instanceof Array, 'adminFields is an array');
t.assert(adminFields.length > 0, 'adminFields array is not empty');
t.end();
});
};
module.exports.tests.lookupExistance = function(test, common) {
test('all expected fields in schema', function(t) {
var expectedFields = require('../../../helper/adminFields').expectedFields;
var schemaMock = { mappings: { _default_: { properties: {} } } };
// inject all expected fields into schema mock
expectedFields.forEach(function (field) {
schemaMock.mappings._default_.properties[field] = {};
});
var adminFields = proxyquire('../../../helper/adminFields', {'pelias-schema': schemaMock});
t.deepEquals(adminFields.availableFields, adminFields.expectedFields, 'all expected fields are returned');
t.end();
});
test('some expected fields in schema', function(t) {
var expectedFields = require('../../../helper/adminFields').expectedFields.slice(0, 3);
var schemaMock = { mappings: { _default_: { properties: {} } } };
// inject all expected fields into schema mock
expectedFields.forEach(function (field) {
schemaMock.mappings._default_.properties[field] = {};
});
var adminFields = proxyquire('../../../helper/adminFields', {'pelias-schema': schemaMock});
t.deepEquals(adminFields.availableFields, expectedFields, 'only matching expected fields are returned');
t.end();
});
test('no expected fields in schema', function(t) {
var schemaMock = { mappings: { _default_: { properties: { foo: {} } } } };
var loggerMock = { get: function (name) {
t.equal(name, 'api');
return {
error: function () {}
};
}};
var adminFields = proxyquire('../../../helper/adminFields',
{
'pelias-schema': schemaMock,
'pelias-logger': loggerMock
});
t.deepEquals([], adminFields.availableFields, 'no admin fields found');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('adminFields: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

2
test/unit/query/reverse.js

@ -53,7 +53,7 @@ var sort = [
{
'_script': {
'params': {
'category_weights': category_weights
'category_weights': category_weights.default
},
'file': category,
'type': 'number',

253
test/unit/query/search.js

@ -54,7 +54,7 @@ var sort = [
{
'_script': {
'params': {
'category_weights': category_weights
'category_weights': category_weights.default
},
'file': category,
'type': 'number',
@ -267,27 +267,72 @@ module.exports.tests.query = function(test, common) {
'should': [
{
'match': {
'address.number': 123
'address.number': {
'query': 123,
'boost': 1
}
}
},
{
'match': {
'address.street': 'main st'
'address.street': {
'query': 'main st',
'boost': 3
}
}
},
{
'match': {
'address.zip': 10010
'address.zip': {
'query': 10010,
'boost': 3
}
}
},
{
'match': {
'admin1_abbr': 'NY'
'admin1_abbr': {
'query': 'NY',
'boost': 3
}
}
},
{
'match': {
'alpha3': 'USA'
'alpha3': {
'query': 'USA',
'boost': 5
}
}
},
{
match: {
admin0: 'new york'
}
},
{
match: {
admin1: 'new york'
}
},
{
match: {
admin2: 'new york'
}
},
{
match: {
local_admin: 'new york'
}
},
{
match: {
locality: 'new york'
}
},
{
match: {
neighborhood: 'new york'
}
},
{
@ -416,7 +461,7 @@ module.exports.tests.query = function(test, common) {
'should': [
{
'match': {
'admin2': 'new york'
'admin0': 'new york'
}
},
{
@ -431,12 +476,22 @@ module.exports.tests.query = function(test, common) {
},
{
'match': {
'admin0': 'new york'
'admin2': 'new york'
}
},
{
'match': {
'local_admin': 'new york'
}
},
{
'match': {
'locality': 'new york'
}
},
{
'match': {
'alpha3': 'new york'
'neighborhood': 'new york'
}
},
{
@ -455,6 +510,185 @@ module.exports.tests.query = function(test, common) {
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2,
'admin': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};
t.deepEqual(query, expected, 'valid search query');
t.end();
});
test('valid query with regions in address', function(t) {
var partial_address = '1 water st manhattan ny';
var query = generate({ input: partial_address,
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10,
details: true,
parsed_input: parser(partial_address),
default_layers_set: true
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'name.default': '1 water st'
}
}
],
'should': [
{
match: {
'address.number': {
'query': 1,
'boost': 1
}
}
},
{
match: {
'address.street': {
'query': 'water st',
'boost': 3
}
}
},
{
'match': {
'admin1_abbr': {
'query': 'NY',
'boost': 3
}
}
},
{
'match': {
'admin0': 'manhattan'
}
},
{
'match': {
'admin1': 'manhattan'
}
},
{
'match': {
'admin2': 'manhattan'
}
},
{
match: {
local_admin: 'manhattan'
}
},
{
match: {
locality: 'manhattan'
}
},
{
match: {
neighborhood: 'manhattan'
}
},
{
'match': {
'phrase.default': '1 water st'
}
}
]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
@ -538,6 +772,7 @@ module.exports.tests.query = function(test, common) {
t.deepEqual(query, expected, 'valid search query');
t.end();
});
};
module.exports.all = function (tape, common) {

2
test/unit/query/sort.js

@ -53,7 +53,7 @@ var expected = [
{
'_script': {
'params': {
'category_weights': category_weights
'category_weights': category_weights.default
},
'file': category,
'type': 'number',

3
test/unit/run.js

@ -19,7 +19,8 @@ var tests = [
require('./query/reverse'),
require('./helper/query_parser'),
require('./helper/geojsonify'),
require('./helper/outputSchema')
require('./helper/outputSchema'),
require('./helper/adminFields'),
];
tests.map(function(t) {

Loading…
Cancel
Save