diff --git a/README.md b/README.md index bda2627a..6f4a1fa6 100644 --- a/README.md +++ b/README.md @@ -37,9 +37,53 @@ The API ships with several convenience commands (runnable via `npm`): ## pelias-config The API recognizes the following properties under the top-level `api` key in your `pelias.json` config file: - * `accessLog`: (*optional*) The name of the format to use for access logs; may be any one of the +|parameter|required|default|description| +|---|---|---|---| +|`host`|*yes*||specifies the url under which the http service is to run| +|`textAnalyzer`|*no*|*addressit*|can be either `libpostal` or `addressit` however will soon be **deprecated** and only `libpostal` will be supported going forward| +|`indexName`|*no*|*pelias*|name of the Elasticsearch index to be used when building queries| +|`legacyUrl`|*no*||the url to redirect to in case the user does not specify a version such as `v1` +|`relativeScores`|*no*|true|if set to true, confidence scores will be normalized, realistically at this point setting this to false is not tested or desirable +|`accessLog`|*no*||name of the format to use for access logs; may be any one of the [predefined values](https://github.com/expressjs/morgan#predefined-formats) in the `morgan` package. Defaults to - `"common"`; if set to `false`, or an otherwise falsy value, disables access-logging entirely. + `"common"`; if set to `false`, or an otherwise falsy value, disables access-logging entirely.| +|`pipService`|*yes*||full url to the pip service to be used for coarse reverse queries. if missing, which is not recommended, the service will default to using nearby lookups instead of point-in-polygon.| + +Example configuration file would look something like this: + +``` +{ + "esclient": { + "keepAlive": true, + "requestTimeout": "1200000", + "hosts": [ + { + "protocol": "http", + "host": "somesemachine.elb.amazonaws.com", + "port": 9200 + } + ] + }, + "api": { + "host": "localhost:3100/v1/", + "indexName": "foobar", + "legacyUrl": "pelias.mapzen.com", + "relativeScores": true, + "textAnalyzer": "libpostal", + "pipService": "http://mypipservice.com/3000" + }, + "interpolation": { + "client": { + "adapter": "http", + "host": "internal-pelias-interpolation-dev-130430937.us-east-1.elb.amazonaws.com" + } + }, + "logger": { + "level": "debug" + } +} +``` + ## Contributing diff --git a/middleware/dedupe.js b/middleware/dedupe.js index 3b125d0d..129d157e 100644 --- a/middleware/dedupe.js +++ b/middleware/dedupe.js @@ -65,6 +65,14 @@ function dedupeResults(req, res, next) { function isPreferred(existing, candidateReplacement) { // NOTE: we are assuming here that the layer for both records is the same + var isOA = _.flow(_.property('source'), _.eq.bind(null, 'openaddresses')); + var hasZip = _.bind(_.has, null, _.bind.placeholder, 'address_parts.zip'); + + // https://github.com/pelias/api/issues/872 + if (isOA(existing) && isOA(candidateReplacement)) { + return hasZip(candidateReplacement) && !hasZip(existing); + } + //bind the trumps function to the data items to keep the rest of the function clean var trumpsFunc = trumps.bind(null, existing, candidateReplacement); diff --git a/middleware/sizeCalculator.js b/middleware/sizeCalculator.js index 88334d3b..69409e45 100644 --- a/middleware/sizeCalculator.js +++ b/middleware/sizeCalculator.js @@ -2,6 +2,8 @@ var _ = require('lodash'); var SIZE_PADDING = 2; +var MIN_QUERY_SIZE = 20; + /** * Utility for calculating query result size * incorporating padding for dedupe process @@ -24,12 +26,7 @@ function setup() { * @returns {number} */ function calculateSize(cleanSize) { - switch (cleanSize || 1) { - case 1: - return 1; - default: - return cleanSize * SIZE_PADDING; - } + return Math.max(MIN_QUERY_SIZE, cleanSize * SIZE_PADDING); } -module.exports = setup; \ No newline at end of file +module.exports = setup; diff --git a/package.json b/package.json index bb4b6c28..92948580 100644 --- a/package.json +++ b/package.json @@ -37,14 +37,14 @@ "node": ">=4.0.0" }, "dependencies": { - "addressit": "1.4.0", + "addressit": "1.5.0", "async": "^2.0.0", "check-types": "^7.0.0", "elasticsearch": "^12.0.1", "elasticsearch-exceptions": "0.0.4", "express": "^4.8.8", "express-http-proxy": "^0.11.0", - "extend": "3.0.0", + "extend": "^3.0.1", "geojson": "^0.4.0", "geojson-extent": "^0.3.1", "geolib": "^2.0.18", @@ -55,14 +55,14 @@ "lodash": "^4.5.0", "markdown": "0.5.0", "morgan": "1.8.1", - "pelias-config": "2.9.0", + "pelias-config": "2.10.0", "pelias-categories": "1.2.0", "pelias-labels": "1.6.0", "pelias-logger": "0.2.0", "pelias-mock-logger": "^1.0.1", - "pelias-model": "4.6.0", + "pelias-model": "4.8.1", "pelias-query": "8.15.0", - "pelias-text-analyzer": "1.8.0", + "pelias-text-analyzer": "1.8.2", "predicates": "^1.0.1", "retry": "^0.10.1", "request": "^2.79.0", @@ -84,7 +84,7 @@ "tap-dot": "1.0.5", "tape": "^4.5.1", "tmp": "0.0.31", - "uglify-js": "^2.6.2" + "uglify-js": "^3.0.4" }, "pre-commit": [ "lint", diff --git a/test/unit/helper/sizeCalculator.js b/test/unit/helper/sizeCalculator.js index 41d854d4..6c8d8d22 100644 --- a/test/unit/helper/sizeCalculator.js +++ b/test/unit/helper/sizeCalculator.js @@ -25,7 +25,7 @@ module.exports.tests.valid = function(test, common) { test('size=0', function (t) { setup(0); calcSize(req, {}, function () { - t.equal(req.clean.querySize, 1); + t.equal(req.clean.querySize, 20); t.end(); }); }); @@ -33,7 +33,7 @@ module.exports.tests.valid = function(test, common) { test('size=1', function (t) { setup(1); calcSize(req, {}, function () { - t.equal(req.clean.querySize, 1); + t.equal(req.clean.querySize, 20); t.end(); }); }); @@ -46,6 +46,14 @@ module.exports.tests.valid = function(test, common) { }); }); + test('size=20', function (t) { + setup(20); + calcSize(req, {}, function () { + t.equal(req.clean.querySize, 40); + t.end(); + }); + }); + test('no size', function (t) { setup(); calcSize(req, {}, function () { diff --git a/test/unit/middleware/dedupe.js b/test/unit/middleware/dedupe.js index 291d404a..c3821035 100644 --- a/test/unit/middleware/dedupe.js +++ b/test/unit/middleware/dedupe.js @@ -186,6 +186,42 @@ module.exports.tests.trump = function(test, common) { t.end(); }); }); + + test('openaddresses with zip trumps openaddresses without zip', function (t) { + var req = { + clean: { + text: '100 Main St', + size: 100 + } + }; + var res = { + data: [ + { + 'name': { 'default': '100 Main St' }, + 'source': 'openaddresses', + 'source_id': '123456', + 'layer': 'address', + 'address_parts': {} + }, + { + 'name': { 'default': '100 Main St' }, + 'source': 'openaddresses', + 'source_id': '654321', + 'layer': 'address', + 'address_parts': { + 'zip': '54321' + } + } + ] + }; + + var expectedCount = 1; + dedupe(req, res, function () { + t.equal(res.data.length, expectedCount, 'results have fewer items than before'); + t.deepEqual(res.data[0].source_id, '654321', 'openaddresses result with zip won'); + t.end(); + }); + }); }; module.exports.all = function (tape, common) {