Browse Source

Merge branch 'master' into staging

pull/706/head
Julian Simioni 8 years ago
parent
commit
ce47d730a4
No known key found for this signature in database
GPG Key ID: B9EEB0C6EE0910A1
  1. 1
      .travis.yml
  2. 2
      Dockerfile
  3. 2
      README.md
  4. 34
      index.js
  5. 49
      middleware/dedupe.js
  6. 2
      middleware/geocodeJSON.js
  7. 16
      package.json
  8. 2
      query/search.js
  9. 5
      query/search_defaults.js
  10. 13
      test/unit/fixture/search_boundary_country.js
  11. 12
      test/unit/fixture/search_fallback.js
  12. 13
      test/unit/fixture/search_linguistic_bbox.js
  13. 13
      test/unit/fixture/search_linguistic_focus.js
  14. 13
      test/unit/fixture/search_linguistic_focus_bbox.js
  15. 13
      test/unit/fixture/search_linguistic_focus_null_island.js
  16. 13
      test/unit/fixture/search_linguistic_only.js
  17. 13
      test/unit/fixture/search_linguistic_viewport.js
  18. 13
      test/unit/fixture/search_linguistic_viewport_min_diagonal.js
  19. 11
      test/unit/fixture/search_with_category_filtering.js
  20. 11
      test/unit/fixture/search_with_source_filtering.js
  21. 130
      test/unit/middleware/dedupe.js

1
.travis.yml

@ -6,7 +6,6 @@ cache:
notifications: notifications:
email: false email: false
node_js: node_js:
- 0.12
- 4 - 4
- 6 - 6
matrix: matrix:

2
Dockerfile

@ -1,4 +1,4 @@
FROM node:0.12 FROM node:4.6.0
MAINTAINER Pelias MAINTAINER Pelias
EXPOSE 3100 EXPOSE 3100

2
README.md

@ -16,6 +16,8 @@ See the [Mapzen Search documentation](https://mapzen.com/documentation/search/).
## Install Dependencies ## Install Dependencies
Note: Pelias requires Node.js v4 or newer
```bash ```bash
npm install npm install
``` ```

34
index.js

@ -1,33 +1,7 @@
var cluster = require('cluster'), var app = require('./app'),
app = require('./app'), port = ( process.env.PORT || 3100 );
port = ( process.env.PORT || 3100 ),
// when pelias/api#601 is done this can be changed to `true`
multicore = false;
/** cluster webserver across all cores **/
if( multicore ){
var numCPUs = require('os').cpus().length;
if( cluster.isMaster ){
// fork workers
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', function( worker, code, signal ){
console.log('worker ' + worker.process.pid + ' died');
});
} else {
app.listen( port );
console.log( 'worker: listening on ' + port );
}
}
/** run server on the default setup (single core) **/ /** run server on the default setup (single core) **/
else { console.log( 'listening on ' + port );
console.log( 'listening on ' + port ); app.listen( port );
app.listen( port );
}

49
middleware/dedupe.js

@ -16,11 +16,41 @@ function dedupeResults(req, res, next) {
var uniqueResults = []; var uniqueResults = [];
_.some(res.data, function (hit) { _.some(res.data, function (hit) {
if (uniqueResults.length === 0 || _.every(uniqueResults, isDifferent.bind(null, hit)) ) {
if (_.isEmpty(uniqueResults)) {
uniqueResults.push(hit); uniqueResults.push(hit);
} }
else { else {
logger.info('[dupe]', { query: req.clean.text, hit: hit.name.default + ' ' + hit.source + ':' + hit._id }); // if there are multiple items in results, loop through them to find a dupe
// save off the index of the dupe if found
var dupeIndex = uniqueResults.findIndex(function (elem, index, array) {
return !isDifferent(elem, hit);
});
// if a dupe is not found, just add to results and move on
if (dupeIndex === -1) {
uniqueResults.push(hit);
}
// if dupe was found, we need to check which of the records is preferred
// since the order in which Elasticsearch returns identical text matches is arbitrary
// of course, if the new one is preferred we should replace previous with new
else if (isPreferred(uniqueResults[dupeIndex], hit)) {
logger.info('[dupe][replacing]', {
query: req.clean.text,
previous: uniqueResults[dupeIndex].source,
hit: hit.name.default + ' ' + hit.source + ':' + hit._id
});
// replace previous dupe item with current hit
uniqueResults[dupeIndex] = hit;
}
// if not preferred over existing, just log and move on
else {
logger.info('[dupe][skipping]', {
query: req.clean.text,
previous: uniqueResults[dupeIndex].source,
hit: hit.name.default + ' ' + hit.source + ':' + hit._id
});
}
} }
// stop looping when requested size has been reached in uniqueResults // stop looping when requested size has been reached in uniqueResults
@ -32,4 +62,19 @@ function dedupeResults(req, res, next) {
next(); next();
} }
function isPreferred(existing, candidateReplacement) {
// NOTE: we are assuming here that the layer for both records is the same
//bind the trumps function to the data items to keep the rest of the function clean
var trumpsFunc = trumps.bind(null, existing, candidateReplacement);
return trumpsFunc('geonames', 'whosonfirst') || // WOF has bbox and is generally preferred
trumpsFunc('openstreetmap', 'openaddresses') || // addresses are better in OA
trumpsFunc('whosonfirst', 'openstreetmap'); // venues are better in OSM, at this time
}
function trumps(existing, candidateReplacement, loserSource, winnerSource) {
return existing.source === loserSource && candidateReplacement.source === winnerSource;
}
module.exports = setup; module.exports = setup;

2
middleware/geocodeJSON.js

@ -43,7 +43,7 @@ function convertToGeocodeJSON(req, res, next, opts) {
// REQUIRED. A semver.org compliant version number. Describes the version of // REQUIRED. A semver.org compliant version number. Describes the version of
// the GeocodeJSON spec that is implemented by this instance. // the GeocodeJSON spec that is implemented by this instance.
res.body.geocoding.version = '0.1'; res.body.geocoding.version = '0.2';
// OPTIONAL. Default: null. The attribution of the data. In case of multiple sources, // OPTIONAL. Default: null. The attribution of the data. In case of multiple sources,
// and then multiple attributions, can be an object with one key by source. // and then multiple attributions, can be an object with one key by source.

16
package.json

@ -32,7 +32,7 @@
"url": "https://github.com/pelias/api/issues" "url": "https://github.com/pelias/api/issues"
}, },
"engines": { "engines": {
"node": ">=0.10.26" "node": ">=4.0.0"
}, },
"dependencies": { "dependencies": {
"addressit": "1.4.0", "addressit": "1.4.0",
@ -41,7 +41,7 @@
"elasticsearch": "^11.0.0", "elasticsearch": "^11.0.0",
"elasticsearch-exceptions": "0.0.4", "elasticsearch-exceptions": "0.0.4",
"express": "^4.8.8", "express": "^4.8.8",
"express-http-proxy": "^0.7.0", "express-http-proxy": "^0.10.0",
"extend": "3.0.0", "extend": "3.0.0",
"geojson": "^0.4.0", "geojson": "^0.4.0",
"geojson-extent": "^0.3.1", "geojson-extent": "^0.3.1",
@ -51,12 +51,12 @@
"lodash": "^4.5.0", "lodash": "^4.5.0",
"markdown": "0.5.0", "markdown": "0.5.0",
"morgan": "1.7.0", "morgan": "1.7.0",
"pelias-categories": "1.0.0", "pelias-categories": "1.1.0",
"pelias-config": "2.1.0", "pelias-config": "2.3.0",
"pelias-logger": "0.0.8", "pelias-logger": "0.1.0",
"pelias-model": "4.2.0", "pelias-model": "4.3.0",
"pelias-query": "8.6.0", "pelias-query": "8.8.0",
"pelias-text-analyzer": "1.3.0", "pelias-text-analyzer": "1.4.0",
"stats-lite": "2.0.3", "stats-lite": "2.0.3",
"through2": "2.0.1" "through2": "2.0.1"
}, },

2
query/search.js

@ -59,7 +59,7 @@ function generateQuery( clean ){
// size // size
if( clean.querySize ) { if( clean.querySize ) {
vs.var( 'size', 50 ); vs.var( 'size', clean.querySize );
} }
// focus point // focus point

5
query/search_defaults.js

@ -92,6 +92,9 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'population:field': 'population', 'population:field': 'population',
'population:modifier': 'log1p', 'population:modifier': 'log1p',
'population:max_boost': 20, 'population:max_boost': 20,
'population:weight': 2 'population:weight': 2,
'boost:address': 10,
'boost:street': 5
}); });

13
test/unit/fixture/search_boundary_country.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -73,19 +74,9 @@ module.exports = {
'boost_mode': 'multiply' 'boost_mode': 'multiply'
} }
}, },
'size': 50, 'size': 10,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

12
test/unit/fixture/search_fallback.js

@ -98,6 +98,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.address', '_name': 'fallback.address',
'boost': 10,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -195,6 +196,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -790,16 +792,6 @@ module.exports = {
'size': 20, 'size': 20,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

13
test/unit/fixture/search_linguistic_bbox.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -76,19 +77,9 @@ module.exports = {
'boost_mode': 'multiply' 'boost_mode': 'multiply'
} }
}, },
'size': 50, 'size': 10,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

13
test/unit/fixture/search_linguistic_focus.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -79,19 +80,9 @@ module.exports = {
'boost_mode': 'multiply' 'boost_mode': 'multiply'
} }
}, },
'size': 50, 'size': 10,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

13
test/unit/fixture/search_linguistic_focus_bbox.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -90,19 +91,9 @@ module.exports = {
'boost_mode': 'multiply' 'boost_mode': 'multiply'
} }
}, },
'size': 50, 'size': 10,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

13
test/unit/fixture/search_linguistic_focus_null_island.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -79,19 +80,9 @@ module.exports = {
'boost_mode': 'multiply' 'boost_mode': 'multiply'
} }
}, },
'size': 50, 'size': 10,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

13
test/unit/fixture/search_linguistic_only.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -65,19 +66,9 @@ module.exports = {
'boost_mode': 'multiply' 'boost_mode': 'multiply'
} }
}, },
'size': 50, 'size': 10,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

13
test/unit/fixture/search_linguistic_viewport.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -65,19 +66,9 @@ module.exports = {
'boost_mode': 'multiply' 'boost_mode': 'multiply'
} }
}, },
'size': 50, 'size': 10,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

13
test/unit/fixture/search_linguistic_viewport_min_diagonal.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -65,19 +66,9 @@ module.exports = {
'boost_mode': 'multiply' 'boost_mode': 'multiply'
} }
}, },
'size': 50, 'size': 10,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

11
test/unit/fixture/search_with_category_filtering.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -69,16 +70,6 @@ module.exports = {
'size': 20, 'size': 20,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

11
test/unit/fixture/search_with_source_filtering.js

@ -9,6 +9,7 @@ module.exports = {
{ {
'bool': { 'bool': {
'_name': 'fallback.street', '_name': 'fallback.street',
'boost': 5,
'must': [ 'must': [
{ {
'match_phrase': { 'match_phrase': {
@ -68,16 +69,6 @@ module.exports = {
'size': 20, 'size': 20,
'track_scores': true, 'track_scores': true,
'sort': [ 'sort': [
{
'population': {
'order': 'desc'
}
},
{
'popularity': {
'order': 'desc'
}
},
'_score' '_score'
] ]
}; };

130
test/unit/middleware/dedupe.js

@ -58,6 +58,136 @@ module.exports.tests.dedupe = function(test, common) {
}); });
}; };
module.exports.tests.trump = function(test, common) {
test('whosonfirst trumps geonames, replace', function (t) {
var req = {
clean: {
text: 'Lancaster',
size: 100
}
};
var res = {
data: [
{
'name': { 'default': 'Lancaster' },
'source': 'geonames',
'source_id': '123456',
'layer': 'locality'
},
{
'name': { 'default': 'Lancaster' },
'source': 'whosonfirst',
'source_id': '654321',
'layer': 'locality'
}
]
};
var expectedCount = 1;
dedupe(req, res, function () {
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
t.deepEqual(res.data[0].source, 'whosonfirst', 'whosonfirst result won');
t.end();
});
});
test('whosonfirst trumps geonames, no replace', function (t) {
var req = {
clean: {
text: 'Lancaster',
size: 100
}
};
var res = {
data: [
{
'name': { 'default': 'Lancaster' },
'source': 'whosonfirst',
'source_id': '123456',
'layer': 'locality'
},
{
'name': { 'default': 'Lancaster' },
'source': 'geonames',
'source_id': '654321',
'layer': 'locality'
}
]
};
var expectedCount = 1;
dedupe(req, res, function () {
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
t.deepEqual(res.data[0].source, 'whosonfirst', 'whosonfirst result won');
t.end();
});
});
test('openstreetmap trumps whosonfirst venues', function (t) {
var req = {
clean: {
text: 'Lancaster Dairy Farm',
size: 100
}
};
var res = {
data: [
{
'name': { 'default': 'Lancaster Dairy Farm' },
'source': 'openstreetmap',
'source_id': '123456',
'layer': 'venue'
},
{
'name': { 'default': 'Lancaster Dairy Farm' },
'source': 'whosonfirst',
'source_id': '654321',
'layer': 'venue'
}
]
};
var expectedCount = 1;
dedupe(req, res, function () {
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
t.deepEqual(res.data[0].source, 'openstreetmap', 'openstreetmap result won');
t.end();
});
});
test('openaddresses trumps openstreetmap', function (t) {
var req = {
clean: {
text: '100 Main St',
size: 100
}
};
var res = {
data: [
{
'name': { 'default': '100 Main St' },
'source': 'openstreetmap',
'source_id': '123456',
'layer': 'address'
},
{
'name': { 'default': '100 Main St' },
'source': 'openaddresses',
'source_id': '654321',
'layer': 'address'
}
]
};
var expectedCount = 1;
dedupe(req, res, function () {
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
t.deepEqual(res.data[0].source, 'openaddresses', 'openaddresses result won');
t.end();
});
});
};
module.exports.all = function (tape, common) { module.exports.all = function (tape, common) {
function test(name, testFunction) { function test(name, testFunction) {

Loading…
Cancel
Save