diff --git a/helper/address_weights.js b/helper/address_weights.js new file mode 100644 index 00000000..6b15ab4c --- /dev/null +++ b/helper/address_weights.js @@ -0,0 +1,13 @@ +/** + * These values specify how much a document that matches certain parts of an address + * should be boosted in elasticsearch results. + */ + +module.exports = { + number: 1, + street: 3, + zip: 3, + admin2: 2, + admin1_abbr: 3, + alpha3: 5 +}; diff --git a/helper/adminFields.js b/helper/adminFields.js new file mode 100644 index 00000000..de1dd009 --- /dev/null +++ b/helper/adminFields.js @@ -0,0 +1,42 @@ +var peliasSchema = require('pelias-schema'); +var peliasLogger = require( 'pelias-logger' ).get( 'api' ); + +var ADMIN_FIELDS = [ + 'admin0', + 'admin1', + 'admin1_abbr', + 'admin2', + 'local_admin', + 'locality', + 'neighborhood' +]; + +/** + * Get all admin fields that were expected and also found in schema + * + * @param {Object} [schema] optional: for testing only + * @param {Array} [expectedFields] optional: for testing only + * @param {Object} [logger] optional: for testing only + * @returns {Array.} + */ +function getAvailableAdminFields(schema, expectedFields, logger) { + + schema = schema || peliasSchema; + expectedFields = expectedFields || ADMIN_FIELDS; + logger = logger || peliasLogger; + + var actualFields = Object.keys(schema.mappings._default_.properties); + + // check if expected fields are actually in current schema + var available = expectedFields.filter(function (field) { + return (actualFields.indexOf(field) !== -1); + }); + + if (available.length === 0) { + logger.error('helper/adminFields: no expected admin fields found in schema'); + } + + return available; +} + +module.exports = getAvailableAdminFields; \ No newline at end of file diff --git a/helper/category_weights.js b/helper/category_weights.js index 535ad2f2..5176b70e 100644 --- a/helper/category_weights.js +++ b/helper/category_weights.js @@ -3,7 +3,14 @@ * should be boosted in elasticsearch results. */ -module.exports = { +module.exports.default = { + 'transport:air': 2, + 'transport:air:aerodrome': 2, + 'transport:air:airport': 2, + 'admin': 2 +}; + +module.exports.address = { 'transport:air': 2, 'transport:air:aerodrome': 2, 'transport:air:airport': 2 diff --git a/package.json b/package.json index dcf30f49..a9c3c7e1 100644 --- a/package.json +++ b/package.json @@ -33,9 +33,11 @@ "elasticsearch": ">=1.2.1" }, "dependencies": { + "addressit": "1.3.0", "async": "^0.9.0", "cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve", "express": "^4.8.8", + "extend": "2.0.1", "geojson": "^0.2.1", "geojson-extent": "^0.3.1", "geopipes-elasticsearch-backend": "^0.2.0", @@ -44,10 +46,9 @@ "microtime": "1.4.0", "morgan": "1.5.2", "pelias-config": "^0.1.4", - "extend": "2.0.1", - "addressit": "1.3.0", "pelias-esclient": "0.0.25", "pelias-logger": "^0.0.8", + "pelias-schema": "1.0.0", "pelias-suggester-pipeline": "2.0.2", "through2": "0.6.5" }, diff --git a/query/search.js b/query/search.js index 8e34b404..df1b3017 100644 --- a/query/search.js +++ b/query/search.js @@ -1,6 +1,9 @@ var queries = require('geopipes-elasticsearch-backend').queries, - sort = require('../query/sort'); + sort = require('../query/sort'), + adminFields = require('../helper/adminFields')(), + addressWeights = require('../helper/address_weights'); + function generate( params ){ var centroid = null; @@ -27,23 +30,6 @@ function generate( params ){ }; if (params.parsed_input) { - - query.query.filtered.query.bool.should = []; - - var unmatched_admin_fields = []; - // qb stands for query builder - var qb = function(unmatched_admin_fields, value) { - if (value) { - unmatched_admin_fields.forEach(function(admin_field) { - var match = {}; - match[admin_field] = value; - query.query.filtered.query.bool.should.push({ - 'match': match - }); - }); - } - }; - // update input if (params.parsed_input.number && params.parsed_input.street) { input = params.parsed_input.number + ' ' + params.parsed_input.street; @@ -51,52 +37,7 @@ function generate( params ){ input = params.parsed_input.name; } - // address - // number, street, postalcode - if (params.parsed_input.number) { - qb(['address.number'], params.parsed_input.number); - } - if (params.parsed_input.street) { - qb(['address.street'], params.parsed_input.street); - } - if (params.parsed_input.postalcode) { - qb(['address.zip'], params.parsed_input.postalcode); - } - - // city - // admin2, locality, local_admin, neighborhood - if (params.parsed_input.city) { - qb(['admin2'], params.parsed_input.admin2); - } else { - unmatched_admin_fields.push('admin2'); - } - - // state - // admin1, admin1_abbr - if (params.parsed_input.state) { - qb(['admin1_abbr'], params.parsed_input.state); - } else { - unmatched_admin_fields.push('admin1', 'admin1_abbr'); - } - - // country - // admin0, alpha3 - if (params.parsed_input.country) { - qb(['alpha3'], params.parsed_input.country); - } else { - unmatched_admin_fields.push('admin0', 'alpha3'); - } - - var input_regions = params.parsed_input.regions ? params.parsed_input.regions.join(' ') : undefined; - // if no address was identified and input suggests some admin info in it - if (unmatched_admin_fields.length === 5 && input_regions !== params.input) { - if (params.parsed_input.admin_parts) { - qb(unmatched_admin_fields, params.parsed_input.admin_parts); - } else { - qb(unmatched_admin_fields, input_regions); - } - } - + addParsedMatch(query, input, params.parsed_input); } // add search condition to distance query @@ -119,4 +60,121 @@ function generate( params ){ return query; } +/** + * Traverse the parsed input object, containing all the address parts detected in query string. + * Add matches to query for each identifiable component. + * + * @param {Object} query + * @param {string} defaultInput + * @param {Object} parsedInput + */ +function addParsedMatch(query, defaultInput, parsedInput) { + query.query.filtered.query.bool.should = query.query.filtered.query.bool.should || []; + + // copy expected admin fields so we can remove them as we parse the address + var unmatchedAdminFields = adminFields.slice(); + + // address + // number, street, postalcode + addMatch(query, unmatchedAdminFields, 'address.number', parsedInput.number, addressWeights.number); + addMatch(query, unmatchedAdminFields, 'address.street', parsedInput.street, addressWeights.street); + addMatch(query, unmatchedAdminFields, 'address.zip', parsedInput.postalcode, addressWeights.zip); + + // city + // admin2, locality, local_admin, neighborhood + addMatch(query, unmatchedAdminFields, 'admin2', parsedInput.city, addressWeights.admin2); + + // state + // admin1, admin1_abbr + addMatch(query, unmatchedAdminFields, 'admin1_abbr', parsedInput.state, addressWeights.admin1_abbr); + + // country + // admin0, alpha3 + addMatch(query, unmatchedAdminFields, 'alpha3', parsedInput.country, addressWeights.alpha3); + + addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput, defaultInput); +} + +/** + * Check for additional admin fields in the parsed input, and if any was found + * combine into single string and match against all unmatched admin fields. + * + * @param {Object} query + * @param {Array} unmatchedAdminFields + * @param {Object} parsedInput + * @param {string} defaultInput + */ +function addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput, defaultInput) { + if (unmatchedAdminFields.length === 0 ) { + return; + } + + var leftovers = []; + + if (parsedInput.admin_parts) { + leftovers.push(parsedInput.admin_parts); + } + else if (parsedInput.regions) { + leftovers.push(parsedInput.regions); + } + + if (leftovers.length === 0) { + return; + } + + leftovers = leftovers.join(' '); + + // if there are additional regions/admin_parts found + if (leftovers !== defaultInput) { + unmatchedAdminFields.forEach(function (key) { + // combine all the leftover parts into one string + addMatch(query, [], key, leftovers); + }); + } +} + +/** + * Add key:value match to query. Apply boost if specified. + * + * @param {Object} query + * @param {Array} unmatched + * @param {string} key + * @param {string|number|undefined} value + * @param {number|undefined} [boost] optional + */ +function addMatch(query, unmatched, key, value, boost) { // jshint ignore:line + if (typeof value === 'undefined') { + return; + } + + var match = {}; + + if (boost) { + match[key] = { + query: value, + boost: boost + }; + } + else { + match[key] = value; + } + + query.query.filtered.query.bool.should.push({ 'match': match }); + + removeFromUnmatched(unmatched, key); +} + +/** + * If key is found in unmatched list, remove it from the array + * + * @param {Array} unmatched + * @param {string} key + */ +function removeFromUnmatched(unmatched, key) { + var index = unmatched.indexOf(key); + if (index !== -1) { + unmatched.splice(index, 1); + } +} + module.exports = generate; diff --git a/query/sort.js b/query/sort.js index 13f530c9..aaca76e5 100644 --- a/query/sort.js +++ b/query/sort.js @@ -43,7 +43,7 @@ module.exports = function( params ){ { '_script': { 'params': { - 'category_weights': category_weights + 'category_weights': getCategoryWeights(params) }, 'file': category, 'type': 'number', @@ -64,3 +64,12 @@ module.exports = function( params ){ return scriptsConfig; }; + +function getCategoryWeights(params) { + if (params && params.hasOwnProperty('parsed_input') && + (params.parsed_input.hasOwnProperty('number') || + params.parsed_input.hasOwnProperty('street'))) { + return category_weights.address; + } + return category_weights.default; +} \ No newline at end of file diff --git a/test/unit/helper/adminFields.js b/test/unit/helper/adminFields.js new file mode 100644 index 00000000..8780b267 --- /dev/null +++ b/test/unit/helper/adminFields.js @@ -0,0 +1,84 @@ +var adminFields = require('../../../helper/adminFields'); + +module.exports.tests = {}; + +module.exports.tests.interface = function(test, common) { + test('validate fields', function(t) { + t.assert(adminFields instanceof Function, 'adminFields is a function'); + t.assert(adminFields() instanceof Array, 'adminFields() returns an array'); + t.assert(adminFields().length > 0, 'adminFields array is not empty'); + t.end(); + }); +}; + +module.exports.tests.lookupExistance = function(test, common) { + test('all expected fields in schema', function(t) { + + var expectedFields = [ + 'one', + 'two', + 'three', + 'four' + ]; + var schema = { mappings: { _default_: { properties: {} } } }; + + // inject all expected fields into schema mock + expectedFields.forEach(function (field) { + schema.mappings._default_.properties[field] = {}; + }); + + var res = adminFields(schema, expectedFields); + + t.deepEquals(res, expectedFields, 'all expected fields are returned'); + t.end(); + }); + + test('some expected fields in schema', function(t) { + + var expectedFields = [ + 'one', + 'two', + 'three', + 'four' + ]; + var schema = { mappings: { _default_: { properties: {} } } }; + + // inject only some of the expected fields into schema mock + expectedFields.slice(0, 3).forEach(function (field) { + schema.mappings._default_.properties[field] = {}; + }); + + var res = adminFields(schema, expectedFields); + + t.deepEquals(res, expectedFields.slice(0, 3), 'only matching expected fields are returned'); + t.end(); + }); + + test('no expected fields in schema', function(t) { + + var schema = { mappings: { _default_: { properties: { foo: {} } } } }; + + var logErrorCalled = false; + var logger = { + error: function () { + logErrorCalled = true; + }}; + + var res = adminFields(schema, undefined, logger); + + t.deepEquals(res, [], 'no admin fields found'); + t.assert(logErrorCalled, 'log error called'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('adminFields: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; \ No newline at end of file diff --git a/test/unit/query/reverse.js b/test/unit/query/reverse.js index a84bf1c7..e200b832 100644 --- a/test/unit/query/reverse.js +++ b/test/unit/query/reverse.js @@ -53,7 +53,7 @@ var sort = [ { '_script': { 'params': { - 'category_weights': category_weights + 'category_weights': category_weights.default }, 'file': category, 'type': 'number', diff --git a/test/unit/query/search.js b/test/unit/query/search.js index dcc9d745..ed3d1a6d 100644 --- a/test/unit/query/search.js +++ b/test/unit/query/search.js @@ -7,8 +7,10 @@ var category = 'category'; var parser = require('../../../helper/query_parser'); var category_weights = require('../../../helper/category_weights'); var admin_weights = require('../../../helper/admin_weights'); +var address_weights = require('../../../helper/address_weights'); var weights = require('pelias-suggester-pipeline').weights; + module.exports.tests = {}; module.exports.tests.interface = function(test, common) { @@ -54,7 +56,7 @@ var sort = [ { '_script': { 'params': { - 'category_weights': category_weights + 'category_weights': category_weights.default }, 'file': category, 'type': 'number', @@ -267,27 +269,72 @@ module.exports.tests.query = function(test, common) { 'should': [ { 'match': { - 'address.number': 123 + 'address.number': { + 'query': 123, + 'boost': address_weights.number + } } }, { 'match': { - 'address.street': 'main st' + 'address.street': { + 'query': 'main st', + 'boost': address_weights.street + } } }, { 'match': { - 'address.zip': 10010 + 'address.zip': { + 'query': 10010, + 'boost': address_weights.zip + } } }, { 'match': { - 'admin1_abbr': 'NY' + 'admin1_abbr': { + 'query': 'NY', + 'boost': address_weights.admin1_abbr + } } }, { 'match': { - 'alpha3': 'USA' + 'alpha3': { + 'query': 'USA', + 'boost': address_weights.alpha3 + } + } + }, + { + match: { + admin0: 'new york' + } + }, + { + match: { + admin1: 'new york' + } + }, + { + match: { + admin2: 'new york' + } + }, + { + match: { + local_admin: 'new york' + } + }, + { + match: { + locality: 'new york' + } + }, + { + match: { + neighborhood: 'new york' } }, { @@ -416,7 +463,7 @@ module.exports.tests.query = function(test, common) { 'should': [ { 'match': { - 'admin2': 'new york' + 'admin0': 'new york' } }, { @@ -431,12 +478,22 @@ module.exports.tests.query = function(test, common) { }, { 'match': { - 'admin0': 'new york' + 'admin2': 'new york' + } + }, + { + 'match': { + 'local_admin': 'new york' + } + }, + { + 'match': { + 'locality': 'new york' } }, { 'match': { - 'alpha3': 'new york' + 'neighborhood': 'new york' } }, { @@ -501,7 +558,8 @@ module.exports.tests.query = function(test, common) { 'category_weights': { 'transport:air': 2, 'transport:air:aerodrome': 2, - 'transport:air:airport': 2 + 'transport:air:airport': 2, + 'admin': 2 } }, 'file': 'category', @@ -534,10 +592,189 @@ module.exports.tests.query = function(test, common) { ], 'track_scores': true }; - + t.deepEqual(query, expected, 'valid search query'); t.end(); }); + + test('valid query with regions in address', function(t) { + var partial_address = '1 water st manhattan ny'; + var query = generate({ input: partial_address, + layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', + 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], + size: 10, + details: true, + parsed_input: parser(partial_address), + default_layers_set: true + }); + + var expected = { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'must': [ + { + 'match': { + 'name.default': '1 water st' + } + } + ], + 'should': [ + { + match: { + 'address.number': { + 'query': 1, + 'boost': address_weights.number + } + } + }, + { + match: { + 'address.street': { + 'query': 'water st', + 'boost': address_weights.street + } + } + }, + { + 'match': { + 'admin1_abbr': { + 'query': 'NY', + 'boost': address_weights.admin1_abbr + } + } + }, + { + 'match': { + 'admin0': 'manhattan' + } + }, + { + 'match': { + 'admin1': 'manhattan' + } + }, + { + 'match': { + 'admin2': 'manhattan' + } + }, + { + match: { + local_admin: 'manhattan' + } + }, + { + match: { + locality: 'manhattan' + } + }, + { + match: { + neighborhood: 'manhattan' + } + }, + { + 'match': { + 'phrase.default': '1 water st' + } + } + ] + } + }, + 'filter': { + 'bool': { + 'must': [] + } + } + } + }, + 'size': 10, + 'sort': [ + '_score', + { + '_script': { + 'file': 'admin_boost', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'file': 'popularity', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'file': 'population', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'params': { + 'weights': { + 'admin0': 4, + 'admin1': 3, + 'admin2': 2, + 'local_admin': 1, + 'locality': 1, + 'neighborhood': 1 + } + }, + 'file': 'weights', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'params': { + 'category_weights': { + 'transport:air': 2, + 'transport:air:aerodrome': 2, + 'transport:air:airport': 2 + } + }, + 'file': 'category', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'params': { + 'weights': { + 'geoname': 0, + 'address': 4, + 'osmnode': 6, + 'osmway': 6, + 'poi-address': 8, + 'neighborhood': 10, + 'local_admin': 12, + 'locality': 12, + 'admin2': 12, + 'admin1': 14, + 'admin0': 2 + } + }, + 'file': 'weights', + 'type': 'number', + 'order': 'desc' + } + } + ], + 'track_scores': true + }; + + t.deepEqual(query, expected, 'valid search query'); + t.end(); + }); + }; module.exports.all = function (tape, common) { diff --git a/test/unit/query/sort.js b/test/unit/query/sort.js index 1a2fe5d1..f534d8ae 100644 --- a/test/unit/query/sort.js +++ b/test/unit/query/sort.js @@ -53,7 +53,7 @@ var expected = [ { '_script': { 'params': { - 'category_weights': category_weights + 'category_weights': category_weights.default }, 'file': category, 'type': 'number', diff --git a/test/unit/run.js b/test/unit/run.js index eb7cad06..75051927 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -19,7 +19,8 @@ var tests = [ require('./query/reverse'), require('./helper/query_parser'), require('./helper/geojsonify'), - require('./helper/outputSchema') + require('./helper/outputSchema'), + require('./helper/adminFields'), ]; tests.map(function(t) {