diff --git a/.travis.yml b/.travis.yml index 7c327439..b80c98ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,6 @@ cache: notifications: email: false node_js: - - 0.12 - 4 - 6 matrix: diff --git a/Dockerfile b/Dockerfile index 5b07acd6..a90bda50 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM node:0.12 +FROM node:4.6.0 MAINTAINER Pelias EXPOSE 3100 diff --git a/README.md b/README.md index a28ceb52..2f6e6627 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,8 @@ See the [Mapzen Search documentation](https://mapzen.com/documentation/search/). ## Install Dependencies +Note: Pelias requires Node.js v4 or newer + ```bash npm install ``` diff --git a/index.js b/index.js index 42116f53..e5404586 100644 --- a/index.js +++ b/index.js @@ -1,33 +1,7 @@ -var cluster = require('cluster'), - app = require('./app'), - port = ( process.env.PORT || 3100 ), - // when pelias/api#601 is done this can be changed to `true` - multicore = false; - -/** cluster webserver across all cores **/ -if( multicore ){ - - var numCPUs = require('os').cpus().length; - if( cluster.isMaster ){ - - // fork workers - for (var i = 0; i < numCPUs; i++) { - cluster.fork(); - } - - cluster.on('exit', function( worker, code, signal ){ - console.log('worker ' + worker.process.pid + ' died'); - }); - - } else { - app.listen( port ); - console.log( 'worker: listening on ' + port ); - } -} +var app = require('./app'), + port = ( process.env.PORT || 3100 ); /** run server on the default setup (single core) **/ -else { - console.log( 'listening on ' + port ); - app.listen( port ); -} +console.log( 'listening on ' + port ); +app.listen( port ); diff --git a/middleware/dedupe.js b/middleware/dedupe.js index b40f1806..3b125d0d 100644 --- a/middleware/dedupe.js +++ b/middleware/dedupe.js @@ -16,11 +16,41 @@ function dedupeResults(req, res, next) { var uniqueResults = []; _.some(res.data, function (hit) { - if (uniqueResults.length === 0 || _.every(uniqueResults, isDifferent.bind(null, hit)) ) { + + if (_.isEmpty(uniqueResults)) { uniqueResults.push(hit); } else { - logger.info('[dupe]', { query: req.clean.text, hit: hit.name.default + ' ' + hit.source + ':' + hit._id }); + // if there are multiple items in results, loop through them to find a dupe + // save off the index of the dupe if found + var dupeIndex = uniqueResults.findIndex(function (elem, index, array) { + return !isDifferent(elem, hit); + }); + + // if a dupe is not found, just add to results and move on + if (dupeIndex === -1) { + uniqueResults.push(hit); + } + // if dupe was found, we need to check which of the records is preferred + // since the order in which Elasticsearch returns identical text matches is arbitrary + // of course, if the new one is preferred we should replace previous with new + else if (isPreferred(uniqueResults[dupeIndex], hit)) { + logger.info('[dupe][replacing]', { + query: req.clean.text, + previous: uniqueResults[dupeIndex].source, + hit: hit.name.default + ' ' + hit.source + ':' + hit._id + }); + // replace previous dupe item with current hit + uniqueResults[dupeIndex] = hit; + } + // if not preferred over existing, just log and move on + else { + logger.info('[dupe][skipping]', { + query: req.clean.text, + previous: uniqueResults[dupeIndex].source, + hit: hit.name.default + ' ' + hit.source + ':' + hit._id + }); + } } // stop looping when requested size has been reached in uniqueResults @@ -32,4 +62,19 @@ function dedupeResults(req, res, next) { next(); } +function isPreferred(existing, candidateReplacement) { + // NOTE: we are assuming here that the layer for both records is the same + + //bind the trumps function to the data items to keep the rest of the function clean + var trumpsFunc = trumps.bind(null, existing, candidateReplacement); + + return trumpsFunc('geonames', 'whosonfirst') || // WOF has bbox and is generally preferred + trumpsFunc('openstreetmap', 'openaddresses') || // addresses are better in OA + trumpsFunc('whosonfirst', 'openstreetmap'); // venues are better in OSM, at this time +} + +function trumps(existing, candidateReplacement, loserSource, winnerSource) { + return existing.source === loserSource && candidateReplacement.source === winnerSource; +} + module.exports = setup; diff --git a/middleware/geocodeJSON.js b/middleware/geocodeJSON.js index 3b5170dd..442e017e 100644 --- a/middleware/geocodeJSON.js +++ b/middleware/geocodeJSON.js @@ -43,7 +43,7 @@ function convertToGeocodeJSON(req, res, next, opts) { // REQUIRED. A semver.org compliant version number. Describes the version of // the GeocodeJSON spec that is implemented by this instance. - res.body.geocoding.version = '0.1'; + res.body.geocoding.version = '0.2'; // OPTIONAL. Default: null. The attribution of the data. In case of multiple sources, // and then multiple attributions, can be an object with one key by source. diff --git a/package.json b/package.json index 8d1025c9..0443f632 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,7 @@ "url": "https://github.com/pelias/api/issues" }, "engines": { - "node": ">=0.10.26" + "node": ">=4.0.0" }, "dependencies": { "addressit": "1.4.0", @@ -41,7 +41,7 @@ "elasticsearch": "^11.0.0", "elasticsearch-exceptions": "0.0.4", "express": "^4.8.8", - "express-http-proxy": "^0.7.0", + "express-http-proxy": "^0.10.0", "extend": "3.0.0", "geojson": "^0.4.0", "geojson-extent": "^0.3.1", @@ -51,12 +51,12 @@ "lodash": "^4.5.0", "markdown": "0.5.0", "morgan": "1.7.0", - "pelias-categories": "1.0.0", - "pelias-config": "2.1.0", - "pelias-logger": "0.0.8", - "pelias-model": "4.2.0", - "pelias-query": "8.6.0", - "pelias-text-analyzer": "1.3.0", + "pelias-categories": "1.1.0", + "pelias-config": "2.3.0", + "pelias-logger": "0.1.0", + "pelias-model": "4.3.0", + "pelias-query": "8.8.0", + "pelias-text-analyzer": "1.4.0", "stats-lite": "2.0.3", "through2": "2.0.1" }, diff --git a/query/search.js b/query/search.js index 29dea8b9..bed5a63c 100644 --- a/query/search.js +++ b/query/search.js @@ -59,7 +59,7 @@ function generateQuery( clean ){ // size if( clean.querySize ) { - vs.var( 'size', 50 ); + vs.var( 'size', clean.querySize ); } // focus point diff --git a/query/search_defaults.js b/query/search_defaults.js index 4e6a7ffb..c0d2a6a0 100644 --- a/query/search_defaults.js +++ b/query/search_defaults.js @@ -92,6 +92,9 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'population:field': 'population', 'population:modifier': 'log1p', 'population:max_boost': 20, - 'population:weight': 2 + 'population:weight': 2, + + 'boost:address': 10, + 'boost:street': 5 }); diff --git a/test/unit/fixture/search_boundary_country.js b/test/unit/fixture/search_boundary_country.js index f622e1f5..ee6427a0 100644 --- a/test/unit/fixture/search_boundary_country.js +++ b/test/unit/fixture/search_boundary_country.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -73,19 +74,9 @@ module.exports = { 'boost_mode': 'multiply' } }, - 'size': 50, + 'size': 10, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_fallback.js b/test/unit/fixture/search_fallback.js index f7983da9..857e47e9 100644 --- a/test/unit/fixture/search_fallback.js +++ b/test/unit/fixture/search_fallback.js @@ -98,6 +98,7 @@ module.exports = { { 'bool': { '_name': 'fallback.address', + 'boost': 10, 'must': [ { 'match_phrase': { @@ -195,6 +196,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -790,16 +792,6 @@ module.exports = { 'size': 20, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_linguistic_bbox.js b/test/unit/fixture/search_linguistic_bbox.js index 52c73c3c..e74f6c79 100644 --- a/test/unit/fixture/search_linguistic_bbox.js +++ b/test/unit/fixture/search_linguistic_bbox.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -76,19 +77,9 @@ module.exports = { 'boost_mode': 'multiply' } }, - 'size': 50, + 'size': 10, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_linguistic_focus.js b/test/unit/fixture/search_linguistic_focus.js index 33c62c1e..a63400b3 100644 --- a/test/unit/fixture/search_linguistic_focus.js +++ b/test/unit/fixture/search_linguistic_focus.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -79,19 +80,9 @@ module.exports = { 'boost_mode': 'multiply' } }, - 'size': 50, + 'size': 10, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_linguistic_focus_bbox.js b/test/unit/fixture/search_linguistic_focus_bbox.js index 717c27ac..7f6c8528 100644 --- a/test/unit/fixture/search_linguistic_focus_bbox.js +++ b/test/unit/fixture/search_linguistic_focus_bbox.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -90,19 +91,9 @@ module.exports = { 'boost_mode': 'multiply' } }, - 'size': 50, + 'size': 10, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_linguistic_focus_null_island.js b/test/unit/fixture/search_linguistic_focus_null_island.js index 20f72ceb..da12154a 100644 --- a/test/unit/fixture/search_linguistic_focus_null_island.js +++ b/test/unit/fixture/search_linguistic_focus_null_island.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -79,19 +80,9 @@ module.exports = { 'boost_mode': 'multiply' } }, - 'size': 50, + 'size': 10, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_linguistic_only.js b/test/unit/fixture/search_linguistic_only.js index 8c377f71..0117e03b 100644 --- a/test/unit/fixture/search_linguistic_only.js +++ b/test/unit/fixture/search_linguistic_only.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -65,19 +66,9 @@ module.exports = { 'boost_mode': 'multiply' } }, - 'size': 50, + 'size': 10, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_linguistic_viewport.js b/test/unit/fixture/search_linguistic_viewport.js index 8c377f71..0117e03b 100644 --- a/test/unit/fixture/search_linguistic_viewport.js +++ b/test/unit/fixture/search_linguistic_viewport.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -65,19 +66,9 @@ module.exports = { 'boost_mode': 'multiply' } }, - 'size': 50, + 'size': 10, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_linguistic_viewport_min_diagonal.js b/test/unit/fixture/search_linguistic_viewport_min_diagonal.js index 8c377f71..0117e03b 100644 --- a/test/unit/fixture/search_linguistic_viewport_min_diagonal.js +++ b/test/unit/fixture/search_linguistic_viewport_min_diagonal.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -65,19 +66,9 @@ module.exports = { 'boost_mode': 'multiply' } }, - 'size': 50, + 'size': 10, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_with_category_filtering.js b/test/unit/fixture/search_with_category_filtering.js index 9913b19c..05f9ec4e 100644 --- a/test/unit/fixture/search_with_category_filtering.js +++ b/test/unit/fixture/search_with_category_filtering.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -69,16 +70,6 @@ module.exports = { 'size': 20, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/fixture/search_with_source_filtering.js b/test/unit/fixture/search_with_source_filtering.js index 78889325..0e9010c2 100644 --- a/test/unit/fixture/search_with_source_filtering.js +++ b/test/unit/fixture/search_with_source_filtering.js @@ -9,6 +9,7 @@ module.exports = { { 'bool': { '_name': 'fallback.street', + 'boost': 5, 'must': [ { 'match_phrase': { @@ -68,16 +69,6 @@ module.exports = { 'size': 20, 'track_scores': true, 'sort': [ - { - 'population': { - 'order': 'desc' - } - }, - { - 'popularity': { - 'order': 'desc' - } - }, '_score' ] }; diff --git a/test/unit/middleware/dedupe.js b/test/unit/middleware/dedupe.js index b8100955..291d404a 100644 --- a/test/unit/middleware/dedupe.js +++ b/test/unit/middleware/dedupe.js @@ -58,6 +58,136 @@ module.exports.tests.dedupe = function(test, common) { }); }; +module.exports.tests.trump = function(test, common) { + test('whosonfirst trumps geonames, replace', function (t) { + var req = { + clean: { + text: 'Lancaster', + size: 100 + } + }; + var res = { + data: [ + { + 'name': { 'default': 'Lancaster' }, + 'source': 'geonames', + 'source_id': '123456', + 'layer': 'locality' + }, + { + 'name': { 'default': 'Lancaster' }, + 'source': 'whosonfirst', + 'source_id': '654321', + 'layer': 'locality' + } + ] + }; + + var expectedCount = 1; + dedupe(req, res, function () { + t.equal(res.data.length, expectedCount, 'results have fewer items than before'); + t.deepEqual(res.data[0].source, 'whosonfirst', 'whosonfirst result won'); + t.end(); + }); + }); + + test('whosonfirst trumps geonames, no replace', function (t) { + var req = { + clean: { + text: 'Lancaster', + size: 100 + } + }; + var res = { + data: [ + { + 'name': { 'default': 'Lancaster' }, + 'source': 'whosonfirst', + 'source_id': '123456', + 'layer': 'locality' + }, + { + 'name': { 'default': 'Lancaster' }, + 'source': 'geonames', + 'source_id': '654321', + 'layer': 'locality' + } + ] + }; + + var expectedCount = 1; + dedupe(req, res, function () { + t.equal(res.data.length, expectedCount, 'results have fewer items than before'); + t.deepEqual(res.data[0].source, 'whosonfirst', 'whosonfirst result won'); + t.end(); + }); + }); + + test('openstreetmap trumps whosonfirst venues', function (t) { + var req = { + clean: { + text: 'Lancaster Dairy Farm', + size: 100 + } + }; + var res = { + data: [ + { + 'name': { 'default': 'Lancaster Dairy Farm' }, + 'source': 'openstreetmap', + 'source_id': '123456', + 'layer': 'venue' + }, + { + 'name': { 'default': 'Lancaster Dairy Farm' }, + 'source': 'whosonfirst', + 'source_id': '654321', + 'layer': 'venue' + } + ] + }; + + var expectedCount = 1; + dedupe(req, res, function () { + t.equal(res.data.length, expectedCount, 'results have fewer items than before'); + t.deepEqual(res.data[0].source, 'openstreetmap', 'openstreetmap result won'); + t.end(); + }); + }); + + test('openaddresses trumps openstreetmap', function (t) { + var req = { + clean: { + text: '100 Main St', + size: 100 + } + }; + var res = { + data: [ + { + 'name': { 'default': '100 Main St' }, + 'source': 'openstreetmap', + 'source_id': '123456', + 'layer': 'address' + }, + { + 'name': { 'default': '100 Main St' }, + 'source': 'openaddresses', + 'source_id': '654321', + 'layer': 'address' + } + ] + }; + + var expectedCount = 1; + dedupe(req, res, function () { + t.equal(res.data.length, expectedCount, 'results have fewer items than before'); + t.deepEqual(res.data[0].source, 'openaddresses', 'openaddresses result won'); + t.end(); + }); + }); +}; + module.exports.all = function (tape, common) { function test(name, testFunction) {