Browse Source

Search query builder not checking all available admin values

Fixes #187
pull/192/head
Diana Shkolnikov 10 years ago
parent
commit
ab436d5dfb
  1. 13
      helper/address_weights.js
  2. 30
      helper/adminFields.js
  3. 9
      helper/category_weights.js
  4. 5
      package.json
  5. 132
      query/search.js
  6. 11
      query/sort.js
  7. 84
      test/unit/helper/adminFields.js
  8. 2
      test/unit/query/reverse.js
  9. 257
      test/unit/query/search.js
  10. 2
      test/unit/query/sort.js
  11. 3
      test/unit/run.js

13
helper/address_weights.js

@ -0,0 +1,13 @@
/**
* These values specify how much a document that matches certain parts of an address
* should be boosted in elasticsearch results.
*/
module.exports = {
number: 1,
street: 3,
zip: 3,
admin2: 2,
admin1_abbr: 3,
alpha3: 5
};

30
helper/adminFields.js

@ -0,0 +1,30 @@
var schema = require('pelias-schema');
var logger = require( 'pelias-logger' ).get( 'api' );
var ADMIN_FIELDS = [
'admin0',
'admin1',
'admin1_abbr',
'admin2',
'local_admin',
'locality',
'neighborhood'
];
function getAvailableAdminFields() {
var actualFields = Object.keys(schema.mappings._default_.properties);
// check if expected fields are actually in current schema
var available = ADMIN_FIELDS.filter(function (field) {
return (actualFields.indexOf(field) !== -1);
});
if (available.length === 0) {
logger.error('helper/adminFields: no expected admin fields found in schema');
}
return available;
}
module.exports.availableFields = getAvailableAdminFields();
module.exports.expectedFields = ADMIN_FIELDS;

9
helper/category_weights.js

@ -3,7 +3,14 @@
* should be boosted in elasticsearch results. * should be boosted in elasticsearch results.
*/ */
module.exports = { module.exports.default = {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2,
'admin': 2
};
module.exports.address = {
'transport:air': 2, 'transport:air': 2,
'transport:air:aerodrome': 2, 'transport:air:aerodrome': 2,
'transport:air:airport': 2 'transport:air:airport': 2

5
package.json

@ -33,9 +33,11 @@
"elasticsearch": ">=1.2.1" "elasticsearch": ">=1.2.1"
}, },
"dependencies": { "dependencies": {
"addressit": "1.3.0",
"async": "^0.9.0", "async": "^0.9.0",
"cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve", "cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve",
"express": "^4.8.8", "express": "^4.8.8",
"extend": "2.0.1",
"geojson": "^0.2.1", "geojson": "^0.2.1",
"geojson-extent": "^0.3.1", "geojson-extent": "^0.3.1",
"geopipes-elasticsearch-backend": "^0.2.0", "geopipes-elasticsearch-backend": "^0.2.0",
@ -44,10 +46,9 @@
"microtime": "1.4.0", "microtime": "1.4.0",
"morgan": "1.5.2", "morgan": "1.5.2",
"pelias-config": "^0.1.4", "pelias-config": "^0.1.4",
"extend": "2.0.1",
"addressit": "1.3.0",
"pelias-esclient": "0.0.25", "pelias-esclient": "0.0.25",
"pelias-logger": "^0.0.8", "pelias-logger": "^0.0.8",
"pelias-schema": "1.0.0",
"pelias-suggester-pipeline": "2.0.2", "pelias-suggester-pipeline": "2.0.2",
"through2": "0.6.5" "through2": "0.6.5"
}, },

132
query/search.js

@ -1,6 +1,9 @@
var queries = require('geopipes-elasticsearch-backend').queries, var queries = require('geopipes-elasticsearch-backend').queries,
sort = require('../query/sort'); sort = require('../query/sort'),
adminFields = require('../helper/adminFields').availableFields,
addressWeights = require('../helper/address_weights');
function generate( params ){ function generate( params ){
var centroid = null; var centroid = null;
@ -27,22 +30,7 @@ function generate( params ){
}; };
if (params.parsed_input) { if (params.parsed_input) {
addParsedMatch(query, input, params.parsed_input);
query.query.filtered.query.bool.should = [];
var unmatched_admin_fields = [];
// qb stands for query builder
var qb = function(unmatched_admin_fields, value) {
if (value) {
unmatched_admin_fields.forEach(function(admin_field) {
var match = {};
match[admin_field] = value;
query.query.filtered.query.bool.should.push({
'match': match
});
});
}
};
// update input // update input
if (params.parsed_input.number && params.parsed_input.street) { if (params.parsed_input.number && params.parsed_input.street) {
@ -50,53 +38,6 @@ function generate( params ){
} else if (params.parsed_input.admin_parts) { } else if (params.parsed_input.admin_parts) {
input = params.parsed_input.name; input = params.parsed_input.name;
} }
// address
// number, street, postalcode
if (params.parsed_input.number) {
qb(['address.number'], params.parsed_input.number);
}
if (params.parsed_input.street) {
qb(['address.street'], params.parsed_input.street);
}
if (params.parsed_input.postalcode) {
qb(['address.zip'], params.parsed_input.postalcode);
}
// city
// admin2, locality, local_admin, neighborhood
if (params.parsed_input.city) {
qb(['admin2'], params.parsed_input.admin2);
} else {
unmatched_admin_fields.push('admin2');
}
// state
// admin1, admin1_abbr
if (params.parsed_input.state) {
qb(['admin1_abbr'], params.parsed_input.state);
} else {
unmatched_admin_fields.push('admin1', 'admin1_abbr');
}
// country
// admin0, alpha3
if (params.parsed_input.country) {
qb(['alpha3'], params.parsed_input.country);
} else {
unmatched_admin_fields.push('admin0', 'alpha3');
}
var input_regions = params.parsed_input.regions ? params.parsed_input.regions.join(' ') : undefined;
// if no address was identified and input suggests some admin info in it
if (unmatched_admin_fields.length === 5 && input_regions !== params.input) {
if (params.parsed_input.admin_parts) {
qb(unmatched_admin_fields, params.parsed_input.admin_parts);
} else {
qb(unmatched_admin_fields, input_regions);
}
}
} }
// add search condition to distance query // add search condition to distance query
@ -119,4 +60,67 @@ function generate( params ){
return query; return query;
} }
function addParsedMatch(query, defaultInput, parsedInput) {
query.query.filtered.query.bool.should = [];
// copy expected admin fields so we can remove them as we parse the address
var unmatchedAdminFields = adminFields.slice();
// address
// number, street, postalcode
addMatch(query, unmatchedAdminFields, 'address.number', parsedInput.number, addressWeights.number);
addMatch(query, unmatchedAdminFields, 'address.street', parsedInput.street, addressWeights.street);
addMatch(query, unmatchedAdminFields, 'address.zip', parsedInput.postalcode, addressWeights.zip);
// city
// admin2, locality, local_admin, neighborhood
addMatch(query, unmatchedAdminFields, 'admin2', parsedInput.admin2, addressWeights.admin2);
// state
// admin1, admin1_abbr
addMatch(query, unmatchedAdminFields, 'admin1_abbr', parsedInput.state, addressWeights.admin1_abbr);
// country
// admin0, alpha3
addMatch(query, unmatchedAdminFields, 'alpha3', parsedInput.country, addressWeights.alpha3);
var inputRegions = parsedInput.regions ? parsedInput.regions.join(' ') : undefined;
// if no address was identified and input suggests some admin info in it
if (unmatchedAdminFields.length > 0 && inputRegions !== defaultInput) {
unmatchedAdminFields.forEach(function (key) {
if (parsedInput.admin_parts) {
addMatch(query, [], key, parsedInput.admin_parts);
}
else {
addMatch(query, [], key, inputRegions);
}
});
}
}
function addMatch(query, unmatched, key, value, boost) { // jshint ignore:line
if (typeof value === 'undefined') {
return;
}
var match = {};
if (boost) {
match[key] = {
query: value,
boost: boost
};
}
else {
match[key] = value;
}
query.query.filtered.query.bool.should.push({ 'match': match });
var index = unmatched.indexOf(key);
if (index !== -1) {
unmatched.splice(index, 1);
}
}
module.exports = generate; module.exports = generate;

11
query/sort.js

@ -43,7 +43,7 @@ module.exports = function( params ){
{ {
'_script': { '_script': {
'params': { 'params': {
'category_weights': category_weights 'category_weights': getCategoryWeights(params)
}, },
'file': category, 'file': category,
'type': 'number', 'type': 'number',
@ -64,3 +64,12 @@ module.exports = function( params ){
return scriptsConfig; return scriptsConfig;
}; };
function getCategoryWeights(params) {
if (params && params.hasOwnProperty('parsed_input') &&
(params.parsed_input.hasOwnProperty('number') ||
params.parsed_input.hasOwnProperty('street'))) {
return category_weights.address;
}
return category_weights.default;
}

84
test/unit/helper/adminFields.js

@ -0,0 +1,84 @@
var proxyquire = require('proxyquire');
module.exports.tests = {};
module.exports.tests.interface = function(test, common) {
test('validate fields', function(t) {
var adminFields = require('../../../helper/adminFields').availableFields;
t.assert(adminFields instanceof Array, 'adminFields is an array');
t.assert(adminFields.length > 0, 'adminFields array is not empty');
t.end();
});
test('validate fields', function(t) {
var adminFields = require('../../../helper/adminFields').expectedFields;
t.assert(adminFields instanceof Array, 'adminFields is an array');
t.assert(adminFields.length > 0, 'adminFields array is not empty');
t.end();
});
};
module.exports.tests.lookupExistance = function(test, common) {
test('all expected fields in schema', function(t) {
var expectedFields = require('../../../helper/adminFields').expectedFields;
var schemaMock = { mappings: { _default_: { properties: {} } } };
// inject all expected fields into schema mock
expectedFields.forEach(function (field) {
schemaMock.mappings._default_.properties[field] = {};
});
var adminFields = proxyquire('../../../helper/adminFields', {'pelias-schema': schemaMock});
t.deepEquals(adminFields.availableFields, adminFields.expectedFields, 'all expected fields are returned');
t.end();
});
test('some expected fields in schema', function(t) {
var expectedFields = require('../../../helper/adminFields').expectedFields.slice(0, 3);
var schemaMock = { mappings: { _default_: { properties: {} } } };
// inject all expected fields into schema mock
expectedFields.forEach(function (field) {
schemaMock.mappings._default_.properties[field] = {};
});
var adminFields = proxyquire('../../../helper/adminFields', {'pelias-schema': schemaMock});
t.deepEquals(adminFields.availableFields, expectedFields, 'only matching expected fields are returned');
t.end();
});
test('no expected fields in schema', function(t) {
var schemaMock = { mappings: { _default_: { properties: { foo: {} } } } };
var loggerMock = { get: function (name) {
t.equal(name, 'api');
return {
error: function () {}
};
}};
var adminFields = proxyquire('../../../helper/adminFields',
{
'pelias-schema': schemaMock,
'pelias-logger': loggerMock
});
t.deepEquals([], adminFields.availableFields, 'no admin fields found');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('adminFields: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

2
test/unit/query/reverse.js

@ -53,7 +53,7 @@ var sort = [
{ {
'_script': { '_script': {
'params': { 'params': {
'category_weights': category_weights 'category_weights': category_weights.default
}, },
'file': category, 'file': category,
'type': 'number', 'type': 'number',

257
test/unit/query/search.js

@ -54,7 +54,7 @@ var sort = [
{ {
'_script': { '_script': {
'params': { 'params': {
'category_weights': category_weights 'category_weights': category_weights.default
}, },
'file': category, 'file': category,
'type': 'number', 'type': 'number',
@ -267,27 +267,72 @@ module.exports.tests.query = function(test, common) {
'should': [ 'should': [
{ {
'match': { 'match': {
'address.number': 123 'address.number': {
'query': 123,
'boost': 1
}
} }
}, },
{ {
'match': { 'match': {
'address.street': 'main st' 'address.street': {
'query': 'main st',
'boost': 3
}
} }
}, },
{ {
'match': { 'match': {
'address.zip': 10010 'address.zip': {
'query': 10010,
'boost': 3
}
} }
}, },
{ {
'match': { 'match': {
'admin1_abbr': 'NY' 'admin1_abbr': {
'query': 'NY',
'boost': 3
}
} }
}, },
{ {
'match': { 'match': {
'alpha3': 'USA' 'alpha3': {
'query': 'USA',
'boost': 5
}
}
},
{
match: {
admin0: 'new york'
}
},
{
match: {
admin1: 'new york'
}
},
{
match: {
admin2: 'new york'
}
},
{
match: {
local_admin: 'new york'
}
},
{
match: {
locality: 'new york'
}
},
{
match: {
neighborhood: 'new york'
} }
}, },
{ {
@ -416,7 +461,7 @@ module.exports.tests.query = function(test, common) {
'should': [ 'should': [
{ {
'match': { 'match': {
'admin2': 'new york' 'admin0': 'new york'
} }
}, },
{ {
@ -431,12 +476,22 @@ module.exports.tests.query = function(test, common) {
}, },
{ {
'match': { 'match': {
'admin0': 'new york' 'admin2': 'new york'
} }
}, },
{ {
'match': { 'match': {
'alpha3': 'new york' 'local_admin': 'new york'
}
},
{
'match': {
'locality': 'new york'
}
},
{
'match': {
'neighborhood': 'new york'
} }
}, },
{ {
@ -501,7 +556,8 @@ module.exports.tests.query = function(test, common) {
'category_weights': { 'category_weights': {
'transport:air': 2, 'transport:air': 2,
'transport:air:aerodrome': 2, 'transport:air:aerodrome': 2,
'transport:air:airport': 2 'transport:air:airport': 2,
'admin': 2
} }
}, },
'file': 'category', 'file': 'category',
@ -534,10 +590,189 @@ module.exports.tests.query = function(test, common) {
], ],
'track_scores': true 'track_scores': true
}; };
t.deepEqual(query, expected, 'valid search query');
t.end();
});
test('valid query with regions in address', function(t) {
var partial_address = '1 water st manhattan ny';
var query = generate({ input: partial_address,
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10,
details: true,
parsed_input: parser(partial_address),
default_layers_set: true
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'name.default': '1 water st'
}
}
],
'should': [
{
match: {
'address.number': {
'query': 1,
'boost': 1
}
}
},
{
match: {
'address.street': {
'query': 'water st',
'boost': 3
}
}
},
{
'match': {
'admin1_abbr': {
'query': 'NY',
'boost': 3
}
}
},
{
'match': {
'admin0': 'manhattan'
}
},
{
'match': {
'admin1': 'manhattan'
}
},
{
'match': {
'admin2': 'manhattan'
}
},
{
match: {
local_admin: 'manhattan'
}
},
{
match: {
locality: 'manhattan'
}
},
{
match: {
neighborhood: 'manhattan'
}
},
{
'match': {
'phrase.default': '1 water st'
}
}
]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};
t.deepEqual(query, expected, 'valid search query'); t.deepEqual(query, expected, 'valid search query');
t.end(); t.end();
}); });
}; };
module.exports.all = function (tape, common) { module.exports.all = function (tape, common) {

2
test/unit/query/sort.js

@ -53,7 +53,7 @@ var expected = [
{ {
'_script': { '_script': {
'params': { 'params': {
'category_weights': category_weights 'category_weights': category_weights.default
}, },
'file': category, 'file': category,
'type': 'number', 'type': 'number',

3
test/unit/run.js

@ -19,7 +19,8 @@ var tests = [
require('./query/reverse'), require('./query/reverse'),
require('./helper/query_parser'), require('./helper/query_parser'),
require('./helper/geojsonify'), require('./helper/geojsonify'),
require('./helper/outputSchema') require('./helper/outputSchema'),
require('./helper/adminFields'),
]; ];
tests.map(function(t) { tests.map(function(t) {

Loading…
Cancel
Save