Browse Source

Merge pull request #895 from pelias/staging

Merge staging into production
pull/1005/head
Diana Shkolnikov 8 years ago committed by GitHub
parent
commit
d1bc06b2ac
  1. 48
      README.md
  2. 8
      middleware/dedupe.js
  3. 11
      middleware/sizeCalculator.js
  4. 12
      package.json
  5. 12
      test/unit/helper/sizeCalculator.js
  6. 36
      test/unit/middleware/dedupe.js

48
README.md

@ -37,9 +37,53 @@ The API ships with several convenience commands (runnable via `npm`):
## pelias-config
The API recognizes the following properties under the top-level `api` key in your `pelias.json` config file:
* `accessLog`: (*optional*) The name of the format to use for access logs; may be any one of the
|parameter|required|default|description|
|---|---|---|---|
|`host`|*yes*||specifies the url under which the http service is to run|
|`textAnalyzer`|*no*|*addressit*|can be either `libpostal` or `addressit` however will soon be **deprecated** and only `libpostal` will be supported going forward|
|`indexName`|*no*|*pelias*|name of the Elasticsearch index to be used when building queries|
|`legacyUrl`|*no*||the url to redirect to in case the user does not specify a version such as `v1`
|`relativeScores`|*no*|true|if set to true, confidence scores will be normalized, realistically at this point setting this to false is not tested or desirable
|`accessLog`|*no*||name of the format to use for access logs; may be any one of the
[predefined values](https://github.com/expressjs/morgan#predefined-formats) in the `morgan` package. Defaults to
`"common"`; if set to `false`, or an otherwise falsy value, disables access-logging entirely.
`"common"`; if set to `false`, or an otherwise falsy value, disables access-logging entirely.|
|`pipService`|*yes*||full url to the pip service to be used for coarse reverse queries. if missing, which is not recommended, the service will default to using nearby lookups instead of point-in-polygon.|
Example configuration file would look something like this:
```
{
"esclient": {
"keepAlive": true,
"requestTimeout": "1200000",
"hosts": [
{
"protocol": "http",
"host": "somesemachine.elb.amazonaws.com",
"port": 9200
}
]
},
"api": {
"host": "localhost:3100/v1/",
"indexName": "foobar",
"legacyUrl": "pelias.mapzen.com",
"relativeScores": true,
"textAnalyzer": "libpostal",
"pipService": "http://mypipservice.com/3000"
},
"interpolation": {
"client": {
"adapter": "http",
"host": "internal-pelias-interpolation-dev-130430937.us-east-1.elb.amazonaws.com"
}
},
"logger": {
"level": "debug"
}
}
```
## Contributing

8
middleware/dedupe.js

@ -65,6 +65,14 @@ function dedupeResults(req, res, next) {
function isPreferred(existing, candidateReplacement) {
// NOTE: we are assuming here that the layer for both records is the same
var isOA = _.flow(_.property('source'), _.eq.bind(null, 'openaddresses'));
var hasZip = _.bind(_.has, null, _.bind.placeholder, 'address_parts.zip');
// https://github.com/pelias/api/issues/872
if (isOA(existing) && isOA(candidateReplacement)) {
return hasZip(candidateReplacement) && !hasZip(existing);
}
//bind the trumps function to the data items to keep the rest of the function clean
var trumpsFunc = trumps.bind(null, existing, candidateReplacement);

11
middleware/sizeCalculator.js

@ -2,6 +2,8 @@ var _ = require('lodash');
var SIZE_PADDING = 2;
var MIN_QUERY_SIZE = 20;
/**
* Utility for calculating query result size
* incorporating padding for dedupe process
@ -24,12 +26,7 @@ function setup() {
* @returns {number}
*/
function calculateSize(cleanSize) {
switch (cleanSize || 1) {
case 1:
return 1;
default:
return cleanSize * SIZE_PADDING;
}
return Math.max(MIN_QUERY_SIZE, cleanSize * SIZE_PADDING);
}
module.exports = setup;
module.exports = setup;

12
package.json

@ -37,14 +37,14 @@
"node": ">=4.0.0"
},
"dependencies": {
"addressit": "1.4.0",
"addressit": "1.5.0",
"async": "^2.0.0",
"check-types": "^7.0.0",
"elasticsearch": "^12.0.1",
"elasticsearch-exceptions": "0.0.4",
"express": "^4.8.8",
"express-http-proxy": "^0.11.0",
"extend": "3.0.0",
"extend": "^3.0.1",
"geojson": "^0.4.0",
"geojson-extent": "^0.3.1",
"geolib": "^2.0.18",
@ -55,14 +55,14 @@
"lodash": "^4.5.0",
"markdown": "0.5.0",
"morgan": "1.8.1",
"pelias-config": "2.9.0",
"pelias-config": "2.10.0",
"pelias-categories": "1.2.0",
"pelias-labels": "1.6.0",
"pelias-logger": "0.2.0",
"pelias-mock-logger": "^1.0.1",
"pelias-model": "4.6.0",
"pelias-model": "4.8.1",
"pelias-query": "8.15.0",
"pelias-text-analyzer": "1.8.0",
"pelias-text-analyzer": "1.8.2",
"predicates": "^1.0.1",
"retry": "^0.10.1",
"request": "^2.79.0",
@ -84,7 +84,7 @@
"tap-dot": "1.0.5",
"tape": "^4.5.1",
"tmp": "0.0.31",
"uglify-js": "^2.6.2"
"uglify-js": "^3.0.4"
},
"pre-commit": [
"lint",

12
test/unit/helper/sizeCalculator.js

@ -25,7 +25,7 @@ module.exports.tests.valid = function(test, common) {
test('size=0', function (t) {
setup(0);
calcSize(req, {}, function () {
t.equal(req.clean.querySize, 1);
t.equal(req.clean.querySize, 20);
t.end();
});
});
@ -33,7 +33,7 @@ module.exports.tests.valid = function(test, common) {
test('size=1', function (t) {
setup(1);
calcSize(req, {}, function () {
t.equal(req.clean.querySize, 1);
t.equal(req.clean.querySize, 20);
t.end();
});
});
@ -46,6 +46,14 @@ module.exports.tests.valid = function(test, common) {
});
});
test('size=20', function (t) {
setup(20);
calcSize(req, {}, function () {
t.equal(req.clean.querySize, 40);
t.end();
});
});
test('no size', function (t) {
setup();
calcSize(req, {}, function () {

36
test/unit/middleware/dedupe.js

@ -186,6 +186,42 @@ module.exports.tests.trump = function(test, common) {
t.end();
});
});
test('openaddresses with zip trumps openaddresses without zip', function (t) {
var req = {
clean: {
text: '100 Main St',
size: 100
}
};
var res = {
data: [
{
'name': { 'default': '100 Main St' },
'source': 'openaddresses',
'source_id': '123456',
'layer': 'address',
'address_parts': {}
},
{
'name': { 'default': '100 Main St' },
'source': 'openaddresses',
'source_id': '654321',
'layer': 'address',
'address_parts': {
'zip': '54321'
}
}
]
};
var expectedCount = 1;
dedupe(req, res, function () {
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
t.deepEqual(res.data[0].source_id, '654321', 'openaddresses result with zip won');
t.end();
});
});
};
module.exports.all = function (tape, common) {

Loading…
Cancel
Save