Browse Source

Merge pull request #209 from pelias/master

merge api@master in to api@production
improved_bias
Diana Shkolnikov 10 years ago
parent
commit
69e4478f21
  1. 1
      .travis.yml
  2. 13
      helper/address_weights.js
  3. 42
      helper/adminFields.js
  4. 9
      helper/category_weights.js
  5. 29
      index.js
  6. 6
      package.json
  7. 198
      query/search.js
  8. 11
      query/sort.js
  9. 84
      test/unit/helper/adminFields.js
  10. 2
      test/unit/query/reverse.js
  11. 327
      test/unit/query/search.js
  12. 2
      test/unit/query/sort.js
  13. 3
      test/unit/run.js

1
.travis.yml

@ -2,4 +2,5 @@ language: node_js
script: "npm run unit" script: "npm run unit"
node_js: node_js:
- "0.10" - "0.10"
- "0.12"
sudo: false sudo: false

13
helper/address_weights.js

@ -0,0 +1,13 @@
/**
* These values specify how much a document that matches certain parts of an address
* should be boosted in elasticsearch results.
*/
module.exports = {
number: 1,
street: 3,
zip: 3,
admin2: 2,
admin1_abbr: 3,
alpha3: 5
};

42
helper/adminFields.js

@ -0,0 +1,42 @@
var peliasSchema = require('pelias-schema');
var peliasLogger = require( 'pelias-logger' ).get( 'api' );
var ADMIN_FIELDS = [
'admin0',
'admin1',
'admin1_abbr',
'admin2',
'local_admin',
'locality',
'neighborhood'
];
/**
* Get all admin fields that were expected and also found in schema
*
* @param {Object} [schema] optional: for testing only
* @param {Array} [expectedFields] optional: for testing only
* @param {Object} [logger] optional: for testing only
* @returns {Array.<string>}
*/
function getAvailableAdminFields(schema, expectedFields, logger) {
schema = schema || peliasSchema;
expectedFields = expectedFields || ADMIN_FIELDS;
logger = logger || peliasLogger;
var actualFields = Object.keys(schema.mappings._default_.properties);
// check if expected fields are actually in current schema
var available = expectedFields.filter(function (field) {
return (actualFields.indexOf(field) !== -1);
});
if (available.length === 0) {
logger.error('helper/adminFields: no expected admin fields found in schema');
}
return available;
}
module.exports = getAvailableAdminFields;

9
helper/category_weights.js

@ -3,7 +3,14 @@
* should be boosted in elasticsearch results. * should be boosted in elasticsearch results.
*/ */
module.exports = { module.exports.default = {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2,
'admin': 2
};
module.exports.address = {
'transport:air': 2, 'transport:air': 2,
'transport:air:aerodrome': 2, 'transport:air:aerodrome': 2,
'transport:air:airport': 2 'transport:air:airport': 2

29
index.js

@ -1,17 +1,20 @@
var cluster = require('cluster'), var Cluster = require('cluster2'),
app = require('./app'), app = require('./app'),
multicore = false, port = ( process.env.PORT || 3100 ),
port = ( process.env.PORT || 3100 ); multicore = true;
/** cluster webserver across all cores **/ /** cluster webserver across all cores **/
// if( multicore ){ if( multicore ){
// @todo: not finished yet var c = new Cluster({ port: port });
// cluster(app) c.listen(function(cb){
// .use(cluster.stats()) console.log( 'worker: listening on ' + port );
// .listen( process.env.PORT || 3100 ); cb(app);
// } });
if (!multicore){
console.log( 'listening on ' + port );
app.listen( process.env.PORT || 3100 );
} }
/** run server on the default setup (single core) **/
else {
console.log( 'listening on ' + port );
app.listen( port );
}

6
package.json

@ -33,8 +33,11 @@
"elasticsearch": ">=1.2.1" "elasticsearch": ">=1.2.1"
}, },
"dependencies": { "dependencies": {
"addressit": "1.3.0",
"async": "^0.9.0", "async": "^0.9.0",
"cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve",
"express": "^4.8.8", "express": "^4.8.8",
"extend": "2.0.1",
"geojson": "^0.2.1", "geojson": "^0.2.1",
"geojson-extent": "^0.3.1", "geojson-extent": "^0.3.1",
"geopipes-elasticsearch-backend": "^0.2.0", "geopipes-elasticsearch-backend": "^0.2.0",
@ -43,10 +46,9 @@
"microtime": "1.4.0", "microtime": "1.4.0",
"morgan": "1.5.2", "morgan": "1.5.2",
"pelias-config": "^0.1.4", "pelias-config": "^0.1.4",
"extend": "2.0.1",
"addressit": "1.3.0",
"pelias-esclient": "0.0.25", "pelias-esclient": "0.0.25",
"pelias-logger": "^0.0.8", "pelias-logger": "^0.0.8",
"pelias-schema": "1.0.0",
"pelias-suggester-pipeline": "2.0.2", "pelias-suggester-pipeline": "2.0.2",
"through2": "0.6.5" "through2": "0.6.5"
}, },

198
query/search.js

@ -1,6 +1,9 @@
var queries = require('geopipes-elasticsearch-backend').queries, var queries = require('geopipes-elasticsearch-backend').queries,
sort = require('../query/sort'); sort = require('../query/sort'),
adminFields = require('../helper/adminFields')(),
addressWeights = require('../helper/address_weights');
function generate( params ){ function generate( params ){
var centroid = null; var centroid = null;
@ -27,23 +30,6 @@ function generate( params ){
}; };
if (params.parsed_input) { if (params.parsed_input) {
query.query.filtered.query.bool.should = [];
var unmatched_admin_fields = [];
// qb stands for query builder
var qb = function(unmatched_admin_fields, value) {
if (value) {
unmatched_admin_fields.forEach(function(admin_field) {
var match = {};
match[admin_field] = value;
query.query.filtered.query.bool.should.push({
'match': match
});
});
}
};
// update input // update input
if (params.parsed_input.number && params.parsed_input.street) { if (params.parsed_input.number && params.parsed_input.street) {
input = params.parsed_input.number + ' ' + params.parsed_input.street; input = params.parsed_input.number + ' ' + params.parsed_input.street;
@ -51,58 +37,16 @@ function generate( params ){
input = params.parsed_input.name; input = params.parsed_input.name;
} }
// address addParsedMatch(query, input, params.parsed_input);
// number, street, postalcode
if (params.parsed_input.number) {
qb(['address.number'], params.parsed_input.number);
}
if (params.parsed_input.street) {
qb(['address.street'], params.parsed_input.street);
}
if (params.parsed_input.postalcode) {
qb(['address.zip'], params.parsed_input.postalcode);
}
// city
// admin2, locality, local_admin, neighborhood
if (params.parsed_input.city) {
qb(['admin2'], params.parsed_input.admin2);
} else {
unmatched_admin_fields.push('admin2');
}
// state
// admin1, admin1_abbr
if (params.parsed_input.state) {
qb(['admin1_abbr'], params.parsed_input.state);
} else {
unmatched_admin_fields.push('admin1', 'admin1_abbr');
}
// country
// admin0, alpha3
if (params.parsed_input.country) {
qb(['alpha3'], params.parsed_input.country);
} else {
unmatched_admin_fields.push('admin0', 'alpha3');
}
var input_regions = params.parsed_input.regions ? params.parsed_input.regions.join(' ') : undefined;
// if no address was identified and input suggests some admin info in it
if (unmatched_admin_fields.length === 5 && input_regions !== params.input) {
if (params.parsed_input.admin_parts) {
qb(unmatched_admin_fields, params.parsed_input.admin_parts);
} else {
qb(unmatched_admin_fields, input_regions);
}
}
} }
// add search condition to distance query // add search condition to distance query
query.query.filtered.query.bool.must.push({ query.query.filtered.query.bool.must.push({
'match': { 'match': {
'name.default': input 'name.default': {
'query': input,
'analyzer': 'peliasOneEdgeGram'
}
} }
}); });
@ -110,7 +54,12 @@ function generate( params ){
// note: this is required for shingle/phrase matching // note: this is required for shingle/phrase matching
query.query.filtered.query.bool.should.push({ query.query.filtered.query.bool.should.push({
'match': { 'match': {
'phrase.default': input 'phrase.default': {
'query': input,
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
} }
}); });
@ -119,4 +68,121 @@ function generate( params ){
return query; return query;
} }
/**
* Traverse the parsed input object, containing all the address parts detected in query string.
* Add matches to query for each identifiable component.
*
* @param {Object} query
* @param {string} defaultInput
* @param {Object} parsedInput
*/
function addParsedMatch(query, defaultInput, parsedInput) {
query.query.filtered.query.bool.should = query.query.filtered.query.bool.should || [];
// copy expected admin fields so we can remove them as we parse the address
var unmatchedAdminFields = adminFields.slice();
// address
// number, street, postalcode
addMatch(query, unmatchedAdminFields, 'address.number', parsedInput.number, addressWeights.number);
addMatch(query, unmatchedAdminFields, 'address.street', parsedInput.street, addressWeights.street);
addMatch(query, unmatchedAdminFields, 'address.zip', parsedInput.postalcode, addressWeights.zip);
// city
// admin2, locality, local_admin, neighborhood
addMatch(query, unmatchedAdminFields, 'admin2', parsedInput.city, addressWeights.admin2);
// state
// admin1, admin1_abbr
addMatch(query, unmatchedAdminFields, 'admin1_abbr', parsedInput.state, addressWeights.admin1_abbr);
// country
// admin0, alpha3
addMatch(query, unmatchedAdminFields, 'alpha3', parsedInput.country, addressWeights.alpha3);
addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput, defaultInput);
}
/**
* Check for additional admin fields in the parsed input, and if any was found
* combine into single string and match against all unmatched admin fields.
*
* @param {Object} query
* @param {Array} unmatchedAdminFields
* @param {Object} parsedInput
* @param {string} defaultInput
*/
function addUnmatchedAdminFieldsToQuery(query, unmatchedAdminFields, parsedInput, defaultInput) {
if (unmatchedAdminFields.length === 0 ) {
return;
}
var leftovers = [];
if (parsedInput.admin_parts) {
leftovers.push(parsedInput.admin_parts);
}
else if (parsedInput.regions) {
leftovers.push(parsedInput.regions);
}
if (leftovers.length === 0) {
return;
}
leftovers = leftovers.join(' ');
// if there are additional regions/admin_parts found
if (leftovers !== defaultInput) {
unmatchedAdminFields.forEach(function (key) {
// combine all the leftover parts into one string
addMatch(query, [], key, leftovers);
});
}
}
/**
* Add key:value match to query. Apply boost if specified.
*
* @param {Object} query
* @param {Array} unmatched
* @param {string} key
* @param {string|number|undefined} value
* @param {number|undefined} [boost] optional
*/
function addMatch(query, unmatched, key, value, boost) { // jshint ignore:line
if (typeof value === 'undefined') {
return;
}
var match = {};
if (boost) {
match[key] = {
query: value,
boost: boost
};
}
else {
match[key] = value;
}
query.query.filtered.query.bool.should.push({ 'match': match });
removeFromUnmatched(unmatched, key);
}
/**
* If key is found in unmatched list, remove it from the array
*
* @param {Array} unmatched
* @param {string} key
*/
function removeFromUnmatched(unmatched, key) {
var index = unmatched.indexOf(key);
if (index !== -1) {
unmatched.splice(index, 1);
}
}
module.exports = generate; module.exports = generate;

11
query/sort.js

@ -43,7 +43,7 @@ module.exports = function( params ){
{ {
'_script': { '_script': {
'params': { 'params': {
'category_weights': category_weights 'category_weights': getCategoryWeights(params)
}, },
'file': category, 'file': category,
'type': 'number', 'type': 'number',
@ -64,3 +64,12 @@ module.exports = function( params ){
return scriptsConfig; return scriptsConfig;
}; };
function getCategoryWeights(params) {
if (params && params.hasOwnProperty('parsed_input') &&
(params.parsed_input.hasOwnProperty('number') ||
params.parsed_input.hasOwnProperty('street'))) {
return category_weights.address;
}
return category_weights.default;
}

84
test/unit/helper/adminFields.js

@ -0,0 +1,84 @@
var adminFields = require('../../../helper/adminFields');
module.exports.tests = {};
module.exports.tests.interface = function(test, common) {
test('validate fields', function(t) {
t.assert(adminFields instanceof Function, 'adminFields is a function');
t.assert(adminFields() instanceof Array, 'adminFields() returns an array');
t.assert(adminFields().length > 0, 'adminFields array is not empty');
t.end();
});
};
module.exports.tests.lookupExistance = function(test, common) {
test('all expected fields in schema', function(t) {
var expectedFields = [
'one',
'two',
'three',
'four'
];
var schema = { mappings: { _default_: { properties: {} } } };
// inject all expected fields into schema mock
expectedFields.forEach(function (field) {
schema.mappings._default_.properties[field] = {};
});
var res = adminFields(schema, expectedFields);
t.deepEquals(res, expectedFields, 'all expected fields are returned');
t.end();
});
test('some expected fields in schema', function(t) {
var expectedFields = [
'one',
'two',
'three',
'four'
];
var schema = { mappings: { _default_: { properties: {} } } };
// inject only some of the expected fields into schema mock
expectedFields.slice(0, 3).forEach(function (field) {
schema.mappings._default_.properties[field] = {};
});
var res = adminFields(schema, expectedFields);
t.deepEquals(res, expectedFields.slice(0, 3), 'only matching expected fields are returned');
t.end();
});
test('no expected fields in schema', function(t) {
var schema = { mappings: { _default_: { properties: { foo: {} } } } };
var logErrorCalled = false;
var logger = {
error: function () {
logErrorCalled = true;
}};
var res = adminFields(schema, undefined, logger);
t.deepEquals(res, [], 'no admin fields found');
t.assert(logErrorCalled, 'log error called');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('adminFields: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

2
test/unit/query/reverse.js

@ -53,7 +53,7 @@ var sort = [
{ {
'_script': { '_script': {
'params': { 'params': {
'category_weights': category_weights 'category_weights': category_weights.default
}, },
'file': category, 'file': category,
'type': 'number', 'type': 'number',

327
test/unit/query/search.js

@ -7,8 +7,10 @@ var category = 'category';
var parser = require('../../../helper/query_parser'); var parser = require('../../../helper/query_parser');
var category_weights = require('../../../helper/category_weights'); var category_weights = require('../../../helper/category_weights');
var admin_weights = require('../../../helper/admin_weights'); var admin_weights = require('../../../helper/admin_weights');
var address_weights = require('../../../helper/address_weights');
var weights = require('pelias-suggester-pipeline').weights; var weights = require('pelias-suggester-pipeline').weights;
module.exports.tests = {}; module.exports.tests = {};
module.exports.tests.interface = function(test, common) { module.exports.tests.interface = function(test, common) {
@ -54,7 +56,7 @@ var sort = [
{ {
'_script': { '_script': {
'params': { 'params': {
'category_weights': category_weights 'category_weights': category_weights.default
}, },
'file': category, 'file': category,
'type': 'number', 'type': 'number',
@ -80,12 +82,20 @@ var expected = {
'bool': { 'bool': {
'must': [{ 'must': [{
'match': { 'match': {
'name.default': 'test' 'name.default': {
'query': 'test',
'analyzer': 'peliasOneEdgeGram'
}
} }
}], }],
'should': [{ 'should': [{
'match': { 'match': {
'phrase.default': 'test' 'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
} }
}] }]
} }
@ -162,12 +172,20 @@ module.exports.tests.query = function(test, common) {
'bool': { 'bool': {
'must': [{ 'must': [{
'match': { 'match': {
'name.default': 'test' 'name.default': {
'query': 'test',
'analyzer': 'peliasOneEdgeGram'
}
} }
}], }],
'should': [{ 'should': [{
'match': { 'match': {
'phrase.default': 'test' 'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
} }
}] }]
} }
@ -202,12 +220,20 @@ module.exports.tests.query = function(test, common) {
'bool': { 'bool': {
'must': [{ 'must': [{
'match': { 'match': {
'name.default': 'test' 'name.default': {
'query': 'test',
'analyzer': 'peliasOneEdgeGram'
}
} }
}], }],
'should': [{ 'should': [{
'match': { 'match': {
'phrase.default': 'test' 'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
} }
}] }]
} }
@ -260,39 +286,92 @@ module.exports.tests.query = function(test, common) {
'must': [ 'must': [
{ {
'match': { 'match': {
'name.default': '123 main st' 'name.default': {
'query': '123 main st',
'analyzer': 'peliasOneEdgeGram'
}
} }
} }
], ],
'should': [ 'should': [
{ {
'match': { 'match': {
'address.number': 123 'address.number': {
'query': 123,
'boost': address_weights.number
}
} }
}, },
{ {
'match': { 'match': {
'address.street': 'main st' 'address.street': {
'query': 'main st',
'boost': address_weights.street
}
} }
}, },
{ {
'match': { 'match': {
'address.zip': 10010 'address.zip': {
'query': 10010,
'boost': address_weights.zip
}
} }
}, },
{ {
'match': { 'match': {
'admin1_abbr': 'NY' 'admin1_abbr': {
'query': 'NY',
'boost': address_weights.admin1_abbr
}
} }
}, },
{ {
'match': { 'match': {
'alpha3': 'USA' 'alpha3': {
'query': 'USA',
'boost': address_weights.alpha3
}
}
},
{
match: {
admin0: 'new york'
}
},
{
match: {
admin1: 'new york'
}
},
{
match: {
admin2: 'new york'
}
},
{
match: {
local_admin: 'new york'
}
},
{
match: {
locality: 'new york'
}
},
{
match: {
neighborhood: 'new york'
} }
}, },
{ {
'match': { 'match': {
'phrase.default': '123 main st' 'phrase.default': {
'query': '123 main st',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
} }
} }
] ]
@ -409,14 +488,17 @@ module.exports.tests.query = function(test, common) {
'must': [ 'must': [
{ {
'match': { 'match': {
'name.default': 'soho grand' 'name.default': {
'query': 'soho grand',
'analyzer': 'peliasOneEdgeGram'
}
} }
} }
], ],
'should': [ 'should': [
{ {
'match': { 'match': {
'admin2': 'new york' 'admin0': 'new york'
} }
}, },
{ {
@ -431,17 +513,32 @@ module.exports.tests.query = function(test, common) {
}, },
{ {
'match': { 'match': {
'admin0': 'new york' 'admin2': 'new york'
}
},
{
'match': {
'local_admin': 'new york'
}
},
{
'match': {
'locality': 'new york'
} }
}, },
{ {
'match': { 'match': {
'alpha3': 'new york' 'neighborhood': 'new york'
} }
}, },
{ {
'match': { 'match': {
'phrase.default': 'soho grand' 'phrase.default': {
'query': 'soho grand',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
} }
} }
] ]
@ -501,7 +598,8 @@ module.exports.tests.query = function(test, common) {
'category_weights': { 'category_weights': {
'transport:air': 2, 'transport:air': 2,
'transport:air:aerodrome': 2, 'transport:air:aerodrome': 2,
'transport:air:airport': 2 'transport:air:airport': 2,
'admin': 2
} }
}, },
'file': 'category', 'file': 'category',
@ -534,10 +632,197 @@ module.exports.tests.query = function(test, common) {
], ],
'track_scores': true 'track_scores': true
}; };
t.deepEqual(query, expected, 'valid search query'); t.deepEqual(query, expected, 'valid search query');
t.end(); t.end();
}); });
test('valid query with regions in address', function(t) {
var partial_address = '1 water st manhattan ny';
var query = generate({ input: partial_address,
layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood',
'locality', 'local_admin', 'osmaddress', 'openaddresses' ],
size: 10,
details: true,
parsed_input: parser(partial_address),
default_layers_set: true
});
var expected = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'name.default': {
'query': '1 water st',
'analyzer': 'peliasOneEdgeGram'
}
}
}
],
'should': [
{
'match': {
'address.number': {
'query': 1,
'boost': address_weights.number
}
}
},
{
'match': {
'address.street': {
'query': 'water st',
'boost': address_weights.street
}
}
},
{
'match': {
'admin1_abbr': {
'query': 'NY',
'boost': address_weights.admin1_abbr
}
}
},
{
'match': {
'admin0': 'manhattan'
}
},
{
'match': {
'admin1': 'manhattan'
}
},
{
'match': {
'admin2': 'manhattan'
}
},
{
'match': {
'local_admin': 'manhattan'
}
},
{
'match': {
'locality': 'manhattan'
}
},
{
'match': {
'neighborhood': 'manhattan'
}
},
{
'match': {
'phrase.default': {
'query': '1 water st',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'slop': 2
}
}
}
]
}
},
'filter': {
'bool': {
'must': []
}
}
}
},
'size': 10,
'sort': [
'_score',
{
'_script': {
'file': 'admin_boost',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'popularity',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'file': 'population',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'admin0': 4,
'admin1': 3,
'admin2': 2,
'local_admin': 1,
'locality': 1,
'neighborhood': 1
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'category_weights': {
'transport:air': 2,
'transport:air:aerodrome': 2,
'transport:air:airport': 2
}
},
'file': 'category',
'type': 'number',
'order': 'desc'
}
},
{
'_script': {
'params': {
'weights': {
'geoname': 0,
'address': 4,
'osmnode': 6,
'osmway': 6,
'poi-address': 8,
'neighborhood': 10,
'local_admin': 12,
'locality': 12,
'admin2': 12,
'admin1': 14,
'admin0': 2
}
},
'file': 'weights',
'type': 'number',
'order': 'desc'
}
}
],
'track_scores': true
};
t.deepEqual(query, expected, 'valid search query');
t.end();
});
}; };
module.exports.all = function (tape, common) { module.exports.all = function (tape, common) {

2
test/unit/query/sort.js

@ -53,7 +53,7 @@ var expected = [
{ {
'_script': { '_script': {
'params': { 'params': {
'category_weights': category_weights 'category_weights': category_weights.default
}, },
'file': category, 'file': category,
'type': 'number', 'type': 'number',

3
test/unit/run.js

@ -19,7 +19,8 @@ var tests = [
require('./query/reverse'), require('./query/reverse'),
require('./helper/query_parser'), require('./helper/query_parser'),
require('./helper/geojsonify'), require('./helper/geojsonify'),
require('./helper/outputSchema') require('./helper/outputSchema'),
require('./helper/adminFields'),
]; ];
tests.map(function(t) { tests.map(function(t) {

Loading…
Cancel
Save