From df66dccbc2f792614af9850fc2e0da570ef0e04a Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Tue, 14 Jul 2015 14:18:45 +0200 Subject: [PATCH 1/8] run webserver on all available cores, resolves #6 --- index.js | 29 ++++++++++++++++------------- package.json | 1 + 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/index.js b/index.js index 2e2e47bf..5f043b26 100644 --- a/index.js +++ b/index.js @@ -1,17 +1,20 @@ -var cluster = require('cluster'), - app = require('./app'), - multicore = false, - port = ( process.env.PORT || 3100 ); +var Cluster = require('cluster2'), + app = require('./app'), + port = ( process.env.PORT || 3100 ), + multicore = true; /** cluster webserver across all cores **/ -// if( multicore ){ - // @todo: not finished yet - // cluster(app) - // .use(cluster.stats()) - // .listen( process.env.PORT || 3100 ); -// } -if (!multicore){ - console.log( 'listening on ' + port ); - app.listen( process.env.PORT || 3100 ); +if( multicore ){ + var c = new Cluster({ port: port }); + c.listen(function(cb){ + console.log( 'worker: listening on ' + port ); + cb(app); + }); } + +/** run server on the default setup (single core) **/ +else { + console.log( 'listening on ' + port ); + app.listen( port ); +} \ No newline at end of file diff --git a/package.json b/package.json index b59a6ffc..39bd8473 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ }, "dependencies": { "async": "^0.9.0", + "cluster2": "^0.4.26", "express": "^4.8.8", "geojson": "^0.2.1", "geojson-extent": "^0.3.1", From d8e86d5f17133eedb8fde96f3fe754a99cb0a373 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 27 Jul 2015 16:47:55 +0200 Subject: [PATCH 2/8] phrase slop query modifications --- query/search.js | 12 ++++++-- test/unit/query/search.js | 60 ++++++++++++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 12 deletions(-) diff --git a/query/search.js b/query/search.js index 8e34b404..49945534 100644 --- a/query/search.js +++ b/query/search.js @@ -102,7 +102,10 @@ function generate( params ){ // add search condition to distance query query.query.filtered.query.bool.must.push({ 'match': { - 'name.default': input + 'name.default': { + 'query': input, + 'analyzer': 'peliasOneEdgeGram' + } } }); @@ -110,7 +113,12 @@ function generate( params ){ // note: this is required for shingle/phrase matching query.query.filtered.query.bool.should.push({ 'match': { - 'phrase.default': input + 'phrase.default': { + 'query': input, + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2 + } } }); diff --git a/test/unit/query/search.js b/test/unit/query/search.js index dcc9d745..d3dccce9 100644 --- a/test/unit/query/search.js +++ b/test/unit/query/search.js @@ -80,12 +80,20 @@ var expected = { 'bool': { 'must': [{ 'match': { - 'name.default': 'test' + 'name.default': { + 'query': 'test', + 'analyzer': 'peliasOneEdgeGram' + } } }], 'should': [{ 'match': { - 'phrase.default': 'test' + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2 + } } }] } @@ -162,12 +170,20 @@ module.exports.tests.query = function(test, common) { 'bool': { 'must': [{ 'match': { - 'name.default': 'test' + 'name.default': { + 'query': 'test', + 'analyzer': 'peliasOneEdgeGram' + } } }], 'should': [{ 'match': { - 'phrase.default': 'test' + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2 + } } }] } @@ -202,12 +218,20 @@ module.exports.tests.query = function(test, common) { 'bool': { 'must': [{ 'match': { - 'name.default': 'test' + 'name.default': { + 'query': 'test', + 'analyzer': 'peliasOneEdgeGram' + } } }], 'should': [{ 'match': { - 'phrase.default': 'test' + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2 + } } }] } @@ -260,7 +284,10 @@ module.exports.tests.query = function(test, common) { 'must': [ { 'match': { - 'name.default': '123 main st' + 'name.default': { + 'query': '123 main st', + 'analyzer': 'peliasOneEdgeGram' + } } } ], @@ -292,7 +319,12 @@ module.exports.tests.query = function(test, common) { }, { 'match': { - 'phrase.default': '123 main st' + 'phrase.default': { + 'query': '123 main st', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2 + } } } ] @@ -409,7 +441,10 @@ module.exports.tests.query = function(test, common) { 'must': [ { 'match': { - 'name.default': 'soho grand' + 'name.default': { + 'query': 'soho grand', + 'analyzer': 'peliasOneEdgeGram' + } } } ], @@ -441,7 +476,12 @@ module.exports.tests.query = function(test, common) { }, { 'match': { - 'phrase.default': 'soho grand' + 'phrase.default': { + 'query': 'soho grand', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2 + } } } ] From e3efeb66d18374524f89696754db0a024c27dc73 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 3 Aug 2015 12:01:32 +0200 Subject: [PATCH 3/8] change dependency to git url until PR merged upstream, test travis against 0.10,0.12 --- .travis.yml | 1 + package.json | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4ca5cafc..f2a4e06a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,4 +2,5 @@ language: node_js script: "npm run unit" node_js: - "0.10" + - "0.12" sudo: false \ No newline at end of file diff --git a/package.json b/package.json index 4610d0e4..dcf30f49 100644 --- a/package.json +++ b/package.json @@ -34,7 +34,7 @@ }, "dependencies": { "async": "^0.9.0", - "cluster2": "^0.4.26", + "cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve", "express": "^4.8.8", "geojson": "^0.2.1", "geojson-extent": "^0.3.1", From ab436d5dfb509e6af39f8295d70edd3c357ffc10 Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Mon, 10 Aug 2015 16:46:19 -0400 Subject: [PATCH 4/8] Search query builder not checking all available admin values Fixes #187 --- helper/address_weights.js | 13 ++ helper/adminFields.js | 30 ++++ helper/category_weights.js | 9 +- package.json | 5 +- query/search.js | 132 ++++++++-------- query/sort.js | 11 +- test/unit/helper/adminFields.js | 84 +++++++++++ test/unit/query/reverse.js | 2 +- test/unit/query/search.js | 257 ++++++++++++++++++++++++++++++-- test/unit/query/sort.js | 2 +- test/unit/run.js | 3 +- 11 files changed, 466 insertions(+), 82 deletions(-) create mode 100644 helper/address_weights.js create mode 100644 helper/adminFields.js create mode 100644 test/unit/helper/adminFields.js diff --git a/helper/address_weights.js b/helper/address_weights.js new file mode 100644 index 00000000..6b15ab4c --- /dev/null +++ b/helper/address_weights.js @@ -0,0 +1,13 @@ +/** + * These values specify how much a document that matches certain parts of an address + * should be boosted in elasticsearch results. + */ + +module.exports = { + number: 1, + street: 3, + zip: 3, + admin2: 2, + admin1_abbr: 3, + alpha3: 5 +}; diff --git a/helper/adminFields.js b/helper/adminFields.js new file mode 100644 index 00000000..aa96c203 --- /dev/null +++ b/helper/adminFields.js @@ -0,0 +1,30 @@ +var schema = require('pelias-schema'); +var logger = require( 'pelias-logger' ).get( 'api' ); + +var ADMIN_FIELDS = [ + 'admin0', + 'admin1', + 'admin1_abbr', + 'admin2', + 'local_admin', + 'locality', + 'neighborhood' +]; + +function getAvailableAdminFields() { + var actualFields = Object.keys(schema.mappings._default_.properties); + + // check if expected fields are actually in current schema + var available = ADMIN_FIELDS.filter(function (field) { + return (actualFields.indexOf(field) !== -1); + }); + + if (available.length === 0) { + logger.error('helper/adminFields: no expected admin fields found in schema'); + } + + return available; +} + +module.exports.availableFields = getAvailableAdminFields(); +module.exports.expectedFields = ADMIN_FIELDS; \ No newline at end of file diff --git a/helper/category_weights.js b/helper/category_weights.js index 535ad2f2..5176b70e 100644 --- a/helper/category_weights.js +++ b/helper/category_weights.js @@ -3,7 +3,14 @@ * should be boosted in elasticsearch results. */ -module.exports = { +module.exports.default = { + 'transport:air': 2, + 'transport:air:aerodrome': 2, + 'transport:air:airport': 2, + 'admin': 2 +}; + +module.exports.address = { 'transport:air': 2, 'transport:air:aerodrome': 2, 'transport:air:airport': 2 diff --git a/package.json b/package.json index dcf30f49..a9c3c7e1 100644 --- a/package.json +++ b/package.json @@ -33,9 +33,11 @@ "elasticsearch": ">=1.2.1" }, "dependencies": { + "addressit": "1.3.0", "async": "^0.9.0", "cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve", "express": "^4.8.8", + "extend": "2.0.1", "geojson": "^0.2.1", "geojson-extent": "^0.3.1", "geopipes-elasticsearch-backend": "^0.2.0", @@ -44,10 +46,9 @@ "microtime": "1.4.0", "morgan": "1.5.2", "pelias-config": "^0.1.4", - "extend": "2.0.1", - "addressit": "1.3.0", "pelias-esclient": "0.0.25", "pelias-logger": "^0.0.8", + "pelias-schema": "1.0.0", "pelias-suggester-pipeline": "2.0.2", "through2": "0.6.5" }, diff --git a/query/search.js b/query/search.js index 8e34b404..41a4bff6 100644 --- a/query/search.js +++ b/query/search.js @@ -1,6 +1,9 @@ var queries = require('geopipes-elasticsearch-backend').queries, - sort = require('../query/sort'); + sort = require('../query/sort'), + adminFields = require('../helper/adminFields').availableFields, + addressWeights = require('../helper/address_weights'); + function generate( params ){ var centroid = null; @@ -27,22 +30,7 @@ function generate( params ){ }; if (params.parsed_input) { - - query.query.filtered.query.bool.should = []; - - var unmatched_admin_fields = []; - // qb stands for query builder - var qb = function(unmatched_admin_fields, value) { - if (value) { - unmatched_admin_fields.forEach(function(admin_field) { - var match = {}; - match[admin_field] = value; - query.query.filtered.query.bool.should.push({ - 'match': match - }); - }); - } - }; + addParsedMatch(query, input, params.parsed_input); // update input if (params.parsed_input.number && params.parsed_input.street) { @@ -50,53 +38,6 @@ function generate( params ){ } else if (params.parsed_input.admin_parts) { input = params.parsed_input.name; } - - // address - // number, street, postalcode - if (params.parsed_input.number) { - qb(['address.number'], params.parsed_input.number); - } - if (params.parsed_input.street) { - qb(['address.street'], params.parsed_input.street); - } - if (params.parsed_input.postalcode) { - qb(['address.zip'], params.parsed_input.postalcode); - } - - // city - // admin2, locality, local_admin, neighborhood - if (params.parsed_input.city) { - qb(['admin2'], params.parsed_input.admin2); - } else { - unmatched_admin_fields.push('admin2'); - } - - // state - // admin1, admin1_abbr - if (params.parsed_input.state) { - qb(['admin1_abbr'], params.parsed_input.state); - } else { - unmatched_admin_fields.push('admin1', 'admin1_abbr'); - } - - // country - // admin0, alpha3 - if (params.parsed_input.country) { - qb(['alpha3'], params.parsed_input.country); - } else { - unmatched_admin_fields.push('admin0', 'alpha3'); - } - - var input_regions = params.parsed_input.regions ? params.parsed_input.regions.join(' ') : undefined; - // if no address was identified and input suggests some admin info in it - if (unmatched_admin_fields.length === 5 && input_regions !== params.input) { - if (params.parsed_input.admin_parts) { - qb(unmatched_admin_fields, params.parsed_input.admin_parts); - } else { - qb(unmatched_admin_fields, input_regions); - } - } - } // add search condition to distance query @@ -119,4 +60,67 @@ function generate( params ){ return query; } +function addParsedMatch(query, defaultInput, parsedInput) { + query.query.filtered.query.bool.should = []; + + // copy expected admin fields so we can remove them as we parse the address + var unmatchedAdminFields = adminFields.slice(); + + // address + // number, street, postalcode + addMatch(query, unmatchedAdminFields, 'address.number', parsedInput.number, addressWeights.number); + addMatch(query, unmatchedAdminFields, 'address.street', parsedInput.street, addressWeights.street); + addMatch(query, unmatchedAdminFields, 'address.zip', parsedInput.postalcode, addressWeights.zip); + + // city + // admin2, locality, local_admin, neighborhood + addMatch(query, unmatchedAdminFields, 'admin2', parsedInput.admin2, addressWeights.admin2); + + // state + // admin1, admin1_abbr + addMatch(query, unmatchedAdminFields, 'admin1_abbr', parsedInput.state, addressWeights.admin1_abbr); + + // country + // admin0, alpha3 + addMatch(query, unmatchedAdminFields, 'alpha3', parsedInput.country, addressWeights.alpha3); + + var inputRegions = parsedInput.regions ? parsedInput.regions.join(' ') : undefined; + // if no address was identified and input suggests some admin info in it + if (unmatchedAdminFields.length > 0 && inputRegions !== defaultInput) { + unmatchedAdminFields.forEach(function (key) { + if (parsedInput.admin_parts) { + addMatch(query, [], key, parsedInput.admin_parts); + } + else { + addMatch(query, [], key, inputRegions); + } + }); + } +} + +function addMatch(query, unmatched, key, value, boost) { // jshint ignore:line + if (typeof value === 'undefined') { + return; + } + + var match = {}; + + if (boost) { + match[key] = { + query: value, + boost: boost + }; + } + else { + match[key] = value; + } + + query.query.filtered.query.bool.should.push({ 'match': match }); + + var index = unmatched.indexOf(key); + if (index !== -1) { + unmatched.splice(index, 1); + } +} + module.exports = generate; diff --git a/query/sort.js b/query/sort.js index 13f530c9..aaca76e5 100644 --- a/query/sort.js +++ b/query/sort.js @@ -43,7 +43,7 @@ module.exports = function( params ){ { '_script': { 'params': { - 'category_weights': category_weights + 'category_weights': getCategoryWeights(params) }, 'file': category, 'type': 'number', @@ -64,3 +64,12 @@ module.exports = function( params ){ return scriptsConfig; }; + +function getCategoryWeights(params) { + if (params && params.hasOwnProperty('parsed_input') && + (params.parsed_input.hasOwnProperty('number') || + params.parsed_input.hasOwnProperty('street'))) { + return category_weights.address; + } + return category_weights.default; +} \ No newline at end of file diff --git a/test/unit/helper/adminFields.js b/test/unit/helper/adminFields.js new file mode 100644 index 00000000..9d72b934 --- /dev/null +++ b/test/unit/helper/adminFields.js @@ -0,0 +1,84 @@ +var proxyquire = require('proxyquire'); + +module.exports.tests = {}; + +module.exports.tests.interface = function(test, common) { + test('validate fields', function(t) { + var adminFields = require('../../../helper/adminFields').availableFields; + t.assert(adminFields instanceof Array, 'adminFields is an array'); + t.assert(adminFields.length > 0, 'adminFields array is not empty'); + t.end(); + }); + test('validate fields', function(t) { + var adminFields = require('../../../helper/adminFields').expectedFields; + t.assert(adminFields instanceof Array, 'adminFields is an array'); + t.assert(adminFields.length > 0, 'adminFields array is not empty'); + t.end(); + }); +}; + +module.exports.tests.lookupExistance = function(test, common) { + test('all expected fields in schema', function(t) { + + var expectedFields = require('../../../helper/adminFields').expectedFields; + var schemaMock = { mappings: { _default_: { properties: {} } } }; + + // inject all expected fields into schema mock + expectedFields.forEach(function (field) { + schemaMock.mappings._default_.properties[field] = {}; + }); + + var adminFields = proxyquire('../../../helper/adminFields', {'pelias-schema': schemaMock}); + + t.deepEquals(adminFields.availableFields, adminFields.expectedFields, 'all expected fields are returned'); + t.end(); + }); + + test('some expected fields in schema', function(t) { + + var expectedFields = require('../../../helper/adminFields').expectedFields.slice(0, 3); + var schemaMock = { mappings: { _default_: { properties: {} } } }; + + // inject all expected fields into schema mock + expectedFields.forEach(function (field) { + schemaMock.mappings._default_.properties[field] = {}; + }); + + var adminFields = proxyquire('../../../helper/adminFields', {'pelias-schema': schemaMock}); + + t.deepEquals(adminFields.availableFields, expectedFields, 'only matching expected fields are returned'); + t.end(); + }); + + test('no expected fields in schema', function(t) { + + var schemaMock = { mappings: { _default_: { properties: { foo: {} } } } }; + + var loggerMock = { get: function (name) { + t.equal(name, 'api'); + return { + error: function () {} + }; + }}; + + var adminFields = proxyquire('../../../helper/adminFields', + { + 'pelias-schema': schemaMock, + 'pelias-logger': loggerMock + }); + + t.deepEquals([], adminFields.availableFields, 'no admin fields found'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('adminFields: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; \ No newline at end of file diff --git a/test/unit/query/reverse.js b/test/unit/query/reverse.js index a84bf1c7..e200b832 100644 --- a/test/unit/query/reverse.js +++ b/test/unit/query/reverse.js @@ -53,7 +53,7 @@ var sort = [ { '_script': { 'params': { - 'category_weights': category_weights + 'category_weights': category_weights.default }, 'file': category, 'type': 'number', diff --git a/test/unit/query/search.js b/test/unit/query/search.js index dcc9d745..0413706e 100644 --- a/test/unit/query/search.js +++ b/test/unit/query/search.js @@ -54,7 +54,7 @@ var sort = [ { '_script': { 'params': { - 'category_weights': category_weights + 'category_weights': category_weights.default }, 'file': category, 'type': 'number', @@ -267,27 +267,72 @@ module.exports.tests.query = function(test, common) { 'should': [ { 'match': { - 'address.number': 123 + 'address.number': { + 'query': 123, + 'boost': 1 + } } }, { 'match': { - 'address.street': 'main st' + 'address.street': { + 'query': 'main st', + 'boost': 3 + } } }, { 'match': { - 'address.zip': 10010 + 'address.zip': { + 'query': 10010, + 'boost': 3 + } } }, { 'match': { - 'admin1_abbr': 'NY' + 'admin1_abbr': { + 'query': 'NY', + 'boost': 3 + } } }, { 'match': { - 'alpha3': 'USA' + 'alpha3': { + 'query': 'USA', + 'boost': 5 + } + } + }, + { + match: { + admin0: 'new york' + } + }, + { + match: { + admin1: 'new york' + } + }, + { + match: { + admin2: 'new york' + } + }, + { + match: { + local_admin: 'new york' + } + }, + { + match: { + locality: 'new york' + } + }, + { + match: { + neighborhood: 'new york' } }, { @@ -416,7 +461,7 @@ module.exports.tests.query = function(test, common) { 'should': [ { 'match': { - 'admin2': 'new york' + 'admin0': 'new york' } }, { @@ -431,12 +476,22 @@ module.exports.tests.query = function(test, common) { }, { 'match': { - 'admin0': 'new york' + 'admin2': 'new york' } }, { 'match': { - 'alpha3': 'new york' + 'local_admin': 'new york' + } + }, + { + 'match': { + 'locality': 'new york' + } + }, + { + 'match': { + 'neighborhood': 'new york' } }, { @@ -501,7 +556,8 @@ module.exports.tests.query = function(test, common) { 'category_weights': { 'transport:air': 2, 'transport:air:aerodrome': 2, - 'transport:air:airport': 2 + 'transport:air:airport': 2, + 'admin': 2 } }, 'file': 'category', @@ -534,10 +590,189 @@ module.exports.tests.query = function(test, common) { ], 'track_scores': true }; - + + t.deepEqual(query, expected, 'valid search query'); + t.end(); + }); + + test('valid query with regions in address', function(t) { + var partial_address = '1 water st manhattan ny'; + var query = generate({ input: partial_address, + layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', + 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], + size: 10, + details: true, + parsed_input: parser(partial_address), + default_layers_set: true + }); + + var expected = { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'must': [ + { + 'match': { + 'name.default': '1 water st' + } + } + ], + 'should': [ + { + match: { + 'address.number': { + 'query': 1, + 'boost': 1 + } + } + }, + { + match: { + 'address.street': { + 'query': 'water st', + 'boost': 3 + } + } + }, + { + 'match': { + 'admin1_abbr': { + 'query': 'NY', + 'boost': 3 + } + } + }, + { + 'match': { + 'admin0': 'manhattan' + } + }, + { + 'match': { + 'admin1': 'manhattan' + } + }, + { + 'match': { + 'admin2': 'manhattan' + } + }, + { + match: { + local_admin: 'manhattan' + } + }, + { + match: { + locality: 'manhattan' + } + }, + { + match: { + neighborhood: 'manhattan' + } + }, + { + 'match': { + 'phrase.default': '1 water st' + } + } + ] + } + }, + 'filter': { + 'bool': { + 'must': [] + } + } + } + }, + 'size': 10, + 'sort': [ + '_score', + { + '_script': { + 'file': 'admin_boost', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'file': 'popularity', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'file': 'population', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'params': { + 'weights': { + 'admin0': 4, + 'admin1': 3, + 'admin2': 2, + 'local_admin': 1, + 'locality': 1, + 'neighborhood': 1 + } + }, + 'file': 'weights', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'params': { + 'category_weights': { + 'transport:air': 2, + 'transport:air:aerodrome': 2, + 'transport:air:airport': 2 + } + }, + 'file': 'category', + 'type': 'number', + 'order': 'desc' + } + }, + { + '_script': { + 'params': { + 'weights': { + 'geoname': 0, + 'address': 4, + 'osmnode': 6, + 'osmway': 6, + 'poi-address': 8, + 'neighborhood': 10, + 'local_admin': 12, + 'locality': 12, + 'admin2': 12, + 'admin1': 14, + 'admin0': 2 + } + }, + 'file': 'weights', + 'type': 'number', + 'order': 'desc' + } + } + ], + 'track_scores': true + }; + t.deepEqual(query, expected, 'valid search query'); t.end(); }); + }; module.exports.all = function (tape, common) { diff --git a/test/unit/query/sort.js b/test/unit/query/sort.js index 1a2fe5d1..f534d8ae 100644 --- a/test/unit/query/sort.js +++ b/test/unit/query/sort.js @@ -53,7 +53,7 @@ var expected = [ { '_script': { 'params': { - 'category_weights': category_weights + 'category_weights': category_weights.default }, 'file': category, 'type': 'number', diff --git a/test/unit/run.js b/test/unit/run.js index eb7cad06..75051927 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -19,7 +19,8 @@ var tests = [ require('./query/reverse'), require('./helper/query_parser'), require('./helper/geojsonify'), - require('./helper/outputSchema') + require('./helper/outputSchema'), + require('./helper/adminFields'), ]; tests.map(function(t) { From 743d8efa54dac6e745382b5164762dc64393e6c5 Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Thu, 13 Aug 2015 12:00:14 -0400 Subject: [PATCH 5/8] Remove proxyquire from adminField test Add comments and refactor a bit more for clarity in query/search.js --- helper/adminFields.js | 24 ++++++++--- query/search.js | 76 +++++++++++++++++++++++++++------ test/unit/helper/adminFields.js | 72 +++++++++++++++---------------- 3 files changed, 118 insertions(+), 54 deletions(-) diff --git a/helper/adminFields.js b/helper/adminFields.js index aa96c203..de1dd009 100644 --- a/helper/adminFields.js +++ b/helper/adminFields.js @@ -1,5 +1,5 @@ -var schema = require('pelias-schema'); -var logger = require( 'pelias-logger' ).get( 'api' ); +var peliasSchema = require('pelias-schema'); +var peliasLogger = require( 'pelias-logger' ).get( 'api' ); var ADMIN_FIELDS = [ 'admin0', @@ -11,11 +11,24 @@ var ADMIN_FIELDS = [ 'neighborhood' ]; -function getAvailableAdminFields() { +/** + * Get all admin fields that were expected and also found in schema + * + * @param {Object} [schema] optional: for testing only + * @param {Array} [expectedFields] optional: for testing only + * @param {Object} [logger] optional: for testing only + * @returns {Array.} + */ +function getAvailableAdminFields(schema, expectedFields, logger) { + + schema = schema || peliasSchema; + expectedFields = expectedFields || ADMIN_FIELDS; + logger = logger || peliasLogger; + var actualFields = Object.keys(schema.mappings._default_.properties); // check if expected fields are actually in current schema - var available = ADMIN_FIELDS.filter(function (field) { + var available = expectedFields.filter(function (field) { return (actualFields.indexOf(field) !== -1); }); @@ -26,5 +39,4 @@ function getAvailableAdminFields() { return available; } -module.exports.availableFields = getAvailableAdminFields(); -module.exports.expectedFields = ADMIN_FIELDS; \ No newline at end of file +module.exports = getAvailableAdminFields; \ No newline at end of file diff --git a/query/search.js b/query/search.js index 41a4bff6..f11b472a 100644 --- a/query/search.js +++ b/query/search.js @@ -1,7 +1,7 @@ var queries = require('geopipes-elasticsearch-backend').queries, sort = require('../query/sort'), - adminFields = require('../helper/adminFields').availableFields, + adminFields = require('../helper/adminFields')(), addressWeights = require('../helper/address_weights'); @@ -60,8 +60,16 @@ function generate( params ){ return query; } +/** + * Traverse the parsed input object, containing all the address parts detected in query string. + * Add matches to query for each identifiable component. + * + * @param {Object} query + * @param {string} defaultInput + * @param {Object} parsedInput + */ function addParsedMatch(query, defaultInput, parsedInput) { - query.query.filtered.query.bool.should = []; + query.query.filtered.query.bool.should = query.query.filtered.query.bool.should || []; // copy expected admin fields so we can remove them as we parse the address var unmatchedAdminFields = adminFields.slice(); @@ -74,7 +82,7 @@ function addParsedMatch(query, defaultInput, parsedInput) { // city // admin2, locality, local_admin, neighborhood - addMatch(query, unmatchedAdminFields, 'admin2', parsedInput.admin2, addressWeights.admin2); + addMatch(query, unmatchedAdminFields, 'admin2', parsedInput.city, addressWeights.admin2); // state // admin1, admin1_abbr @@ -84,20 +92,54 @@ function addParsedMatch(query, defaultInput, parsedInput) { // admin0, alpha3 addMatch(query, unmatchedAdminFields, 'alpha3', parsedInput.country, addressWeights.alpha3); - var inputRegions = parsedInput.regions ? parsedInput.regions.join(' ') : undefined; - // if no address was identified and input suggests some admin info in it - if (unmatchedAdminFields.length > 0 && inputRegions !== defaultInput) { + addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput, defaultInput); +} + +/** + * Check for additional admin fields in the parsed input, and if any was found + * combine into single string and match against all unmatched admin fields. + * + * @param {Object} query + * @param {Array} unmatchedAdminFields + * @param {Object} parsedInput + * @param {string} defaultInput + */ +function addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput, defaultInput) { + if (unmatchedAdminFields.length === 0 ) { + return; + } + + var leftovers = []; + + if (parsedInput.admin_parts) { + leftovers.push(parsedInput.admin_parts); + } + else if (parsedInput.regions) { + leftovers.push(parsedInput.regions); + } + + if (leftovers.length === 0) { + return; + } + + // if there are additional regions/admin_parts found + if (leftovers !== defaultInput) { unmatchedAdminFields.forEach(function (key) { - if (parsedInput.admin_parts) { - addMatch(query, [], key, parsedInput.admin_parts); - } - else { - addMatch(query, [], key, inputRegions); - } + // combine all the leftover parts into one string + addMatch(query, [], key, leftovers.join(' ')); }); } } +/** + * Add key:value match to query. Apply boost if specified. + * + * @param {Object} query + * @param {Array} unmatched + * @param {string} key + * @param {string|number|undefined} value + * @param {number|undefined} [boost] optional + */ function addMatch(query, unmatched, key, value, boost) { // jshint ignore:line if (typeof value === 'undefined') { return; @@ -117,6 +159,16 @@ function addMatch(query, unmatched, key, value, boost) { // jshint ignore:line query.query.filtered.query.bool.should.push({ 'match': match }); + removeFromUnmatched(unmatched, key); +} + +/** + * If key is found in unmatched list, remove it from the array + * + * @param {Array} unmatched + * @param {string} key + */ +function removeFromUnmatched(unmatched, key) { var index = unmatched.indexOf(key); if (index !== -1) { unmatched.splice(index, 1); diff --git a/test/unit/helper/adminFields.js b/test/unit/helper/adminFields.js index 9d72b934..8780b267 100644 --- a/test/unit/helper/adminFields.js +++ b/test/unit/helper/adminFields.js @@ -1,18 +1,12 @@ -var proxyquire = require('proxyquire'); +var adminFields = require('../../../helper/adminFields'); module.exports.tests = {}; module.exports.tests.interface = function(test, common) { test('validate fields', function(t) { - var adminFields = require('../../../helper/adminFields').availableFields; - t.assert(adminFields instanceof Array, 'adminFields is an array'); - t.assert(adminFields.length > 0, 'adminFields array is not empty'); - t.end(); - }); - test('validate fields', function(t) { - var adminFields = require('../../../helper/adminFields').expectedFields; - t.assert(adminFields instanceof Array, 'adminFields is an array'); - t.assert(adminFields.length > 0, 'adminFields array is not empty'); + t.assert(adminFields instanceof Function, 'adminFields is a function'); + t.assert(adminFields() instanceof Array, 'adminFields() returns an array'); + t.assert(adminFields().length > 0, 'adminFields array is not empty'); t.end(); }); }; @@ -20,54 +14,60 @@ module.exports.tests.interface = function(test, common) { module.exports.tests.lookupExistance = function(test, common) { test('all expected fields in schema', function(t) { - var expectedFields = require('../../../helper/adminFields').expectedFields; - var schemaMock = { mappings: { _default_: { properties: {} } } }; + var expectedFields = [ + 'one', + 'two', + 'three', + 'four' + ]; + var schema = { mappings: { _default_: { properties: {} } } }; // inject all expected fields into schema mock expectedFields.forEach(function (field) { - schemaMock.mappings._default_.properties[field] = {}; + schema.mappings._default_.properties[field] = {}; }); - var adminFields = proxyquire('../../../helper/adminFields', {'pelias-schema': schemaMock}); + var res = adminFields(schema, expectedFields); - t.deepEquals(adminFields.availableFields, adminFields.expectedFields, 'all expected fields are returned'); + t.deepEquals(res, expectedFields, 'all expected fields are returned'); t.end(); }); test('some expected fields in schema', function(t) { - var expectedFields = require('../../../helper/adminFields').expectedFields.slice(0, 3); - var schemaMock = { mappings: { _default_: { properties: {} } } }; - - // inject all expected fields into schema mock - expectedFields.forEach(function (field) { - schemaMock.mappings._default_.properties[field] = {}; + var expectedFields = [ + 'one', + 'two', + 'three', + 'four' + ]; + var schema = { mappings: { _default_: { properties: {} } } }; + + // inject only some of the expected fields into schema mock + expectedFields.slice(0, 3).forEach(function (field) { + schema.mappings._default_.properties[field] = {}; }); - var adminFields = proxyquire('../../../helper/adminFields', {'pelias-schema': schemaMock}); + var res = adminFields(schema, expectedFields); - t.deepEquals(adminFields.availableFields, expectedFields, 'only matching expected fields are returned'); + t.deepEquals(res, expectedFields.slice(0, 3), 'only matching expected fields are returned'); t.end(); }); test('no expected fields in schema', function(t) { - var schemaMock = { mappings: { _default_: { properties: { foo: {} } } } }; + var schema = { mappings: { _default_: { properties: { foo: {} } } } }; - var loggerMock = { get: function (name) { - t.equal(name, 'api'); - return { - error: function () {} - }; - }}; + var logErrorCalled = false; + var logger = { + error: function () { + logErrorCalled = true; + }}; - var adminFields = proxyquire('../../../helper/adminFields', - { - 'pelias-schema': schemaMock, - 'pelias-logger': loggerMock - }); + var res = adminFields(schema, undefined, logger); - t.deepEquals([], adminFields.availableFields, 'no admin fields found'); + t.deepEquals(res, [], 'no admin fields found'); + t.assert(logErrorCalled, 'log error called'); t.end(); }); }; From 49d439425e2ab6cc65476f86df0f56e27aeeaf94 Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Thu, 13 Aug 2015 12:05:21 -0400 Subject: [PATCH 6/8] Use address_weights.js in tests instead of hardcoding boost values --- test/unit/query/search.js | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/test/unit/query/search.js b/test/unit/query/search.js index 0413706e..ed3d1a6d 100644 --- a/test/unit/query/search.js +++ b/test/unit/query/search.js @@ -7,8 +7,10 @@ var category = 'category'; var parser = require('../../../helper/query_parser'); var category_weights = require('../../../helper/category_weights'); var admin_weights = require('../../../helper/admin_weights'); +var address_weights = require('../../../helper/address_weights'); var weights = require('pelias-suggester-pipeline').weights; + module.exports.tests = {}; module.exports.tests.interface = function(test, common) { @@ -269,7 +271,7 @@ module.exports.tests.query = function(test, common) { 'match': { 'address.number': { 'query': 123, - 'boost': 1 + 'boost': address_weights.number } } }, @@ -277,7 +279,7 @@ module.exports.tests.query = function(test, common) { 'match': { 'address.street': { 'query': 'main st', - 'boost': 3 + 'boost': address_weights.street } } }, @@ -285,7 +287,7 @@ module.exports.tests.query = function(test, common) { 'match': { 'address.zip': { 'query': 10010, - 'boost': 3 + 'boost': address_weights.zip } } }, @@ -293,7 +295,7 @@ module.exports.tests.query = function(test, common) { 'match': { 'admin1_abbr': { 'query': 'NY', - 'boost': 3 + 'boost': address_weights.admin1_abbr } } }, @@ -301,7 +303,7 @@ module.exports.tests.query = function(test, common) { 'match': { 'alpha3': { 'query': 'USA', - 'boost': 5 + 'boost': address_weights.alpha3 } } }, @@ -623,7 +625,7 @@ module.exports.tests.query = function(test, common) { match: { 'address.number': { 'query': 1, - 'boost': 1 + 'boost': address_weights.number } } }, @@ -631,7 +633,7 @@ module.exports.tests.query = function(test, common) { match: { 'address.street': { 'query': 'water st', - 'boost': 3 + 'boost': address_weights.street } } }, @@ -639,7 +641,7 @@ module.exports.tests.query = function(test, common) { 'match': { 'admin1_abbr': { 'query': 'NY', - 'boost': 3 + 'boost': address_weights.admin1_abbr } } }, From d7871747e13ba7b474c9f31f1cbe825a41033172 Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Thu, 13 Aug 2015 13:34:54 -0400 Subject: [PATCH 7/8] Fix bug introduced by moving leftovers.join into the if() statement --- query/search.js | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/query/search.js b/query/search.js index f11b472a..df1b3017 100644 --- a/query/search.js +++ b/query/search.js @@ -30,14 +30,14 @@ function generate( params ){ }; if (params.parsed_input) { - addParsedMatch(query, input, params.parsed_input); - // update input if (params.parsed_input.number && params.parsed_input.street) { input = params.parsed_input.number + ' ' + params.parsed_input.street; } else if (params.parsed_input.admin_parts) { input = params.parsed_input.name; } + + addParsedMatch(query, input, params.parsed_input); } // add search condition to distance query @@ -122,11 +122,13 @@ function addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput return; } + leftovers = leftovers.join(' '); + // if there are additional regions/admin_parts found if (leftovers !== defaultInput) { unmatchedAdminFields.forEach(function (key) { // combine all the leftover parts into one string - addMatch(query, [], key, leftovers.join(' ')); + addMatch(query, [], key, leftovers); }); } } From 776b0ce1e1c9c56ba49a6ec6a6992b8f05252aa4 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Fri, 28 Aug 2015 14:43:17 +0200 Subject: [PATCH 8/8] update unit test --- test/unit/query/search.js | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/test/unit/query/search.js b/test/unit/query/search.js index fb533ec8..f2e6d9dc 100644 --- a/test/unit/query/search.js +++ b/test/unit/query/search.js @@ -656,13 +656,16 @@ module.exports.tests.query = function(test, common) { 'must': [ { 'match': { - 'name.default': '1 water st' + 'name.default': { + 'query': '1 water st', + 'analyzer': 'peliasOneEdgeGram' + } } } ], 'should': [ { - match: { + 'match': { 'address.number': { 'query': 1, 'boost': address_weights.number @@ -670,7 +673,7 @@ module.exports.tests.query = function(test, common) { } }, { - match: { + 'match': { 'address.street': { 'query': 'water st', 'boost': address_weights.street @@ -701,23 +704,28 @@ module.exports.tests.query = function(test, common) { } }, { - match: { - local_admin: 'manhattan' + 'match': { + 'local_admin': 'manhattan' } }, { - match: { - locality: 'manhattan' + 'match': { + 'locality': 'manhattan' } }, { - match: { - neighborhood: 'manhattan' + 'match': { + 'neighborhood': 'manhattan' } }, { 'match': { - 'phrase.default': '1 water st' + 'phrase.default': { + 'query': '1 water st', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2 + } } } ]