Browse Source

Merge remote-tracking branch 'pelias/master' into config-localnaming

Conflicts:
	middleware/localNamingConventions.js
pull/453/head
Vesa Meskanen 9 years ago
parent
commit
90ce532337
  1. 14
      .travis.yml
  2. 51
      helper/geojsonify.js
  3. 25
      middleware/confidenceScore.js
  4. 10
      middleware/dedupe.js
  5. 12
      middleware/localNamingConventions.js
  6. 38
      middleware/parseBBox.js
  7. 4
      middleware/renamePlacenames.js
  8. 2
      package.json
  9. 6
      query/autocomplete_defaults.js
  10. 6
      query/reverse_defaults.js
  11. 6
      query/search_defaults.js
  12. 7
      routes/v1.js
  13. 2
      test/unit/fixture/autocomplete_linguistic_with_admin.js
  14. 2
      test/unit/fixture/dedupe_elasticsearch_nonascii_results.js
  15. 22
      test/unit/fixture/dedupe_elasticsearch_results.js
  16. 6
      test/unit/fixture/search_full_address.js
  17. 4
      test/unit/fixture/search_regions_address.js
  18. 125
      test/unit/helper/geojsonify.js
  19. 4
      test/unit/middleware/localNamingConventions.js
  20. 65
      test/unit/middleware/parseBBox.js
  21. 1
      test/unit/run.js

14
.travis.yml

@ -3,18 +3,12 @@ language: node_js
node_js:
- 0.10
- 0.12
- 4.0
- 4.1
- 4.2
- 4.3
- 5.6
- 4.4
- 5.8
matrix:
allow_failures:
- node_js: 4.0
- node_js: 4.1
- node_js: 4.2
- node_js: 4.3
- node_js: 5.6
- node_js: 4.4
- node_js: 5.8
env:
global:
- CXX=g++-4.8

51
helper/geojsonify.js

@ -29,7 +29,8 @@ var DETAILS_PROPS = [
'locality_id',
'locality_a',
'neighbourhood',
'neighbourhood_id'
'neighbourhood_id',
'bounding_box'
];
@ -50,11 +51,16 @@ function geojsonifyPlaces( docs ){
return !!doc;
});
// get all the bounding_box corners as well as single points
// to be used for computing the overall bounding_box for the FeatureCollection
var extentPoints = extractExtentPoints(geodata);
// convert to geojson
var geojson = GeoJSON.parse( geodata, { Point: ['lat', 'lng'] });
var geojson = GeoJSON.parse( geodata, { Point: ['lat', 'lng'] });
var geojsonExtentPoints = GeoJSON.parse( extentPoints, { Point: ['lat', 'lng'] });
// bounding box calculations
computeBBox(geojson);
computeBBox(geojson, geojsonExtentPoints);
return geojson;
}
@ -105,23 +111,56 @@ function addLabel(src, dst) {
dst.label = labelGenerator(dst);
}
/**
* Collect all points from the geodata.
* If an item is a single point, just use that.
* If an item has a bounding box, add two corners of the box as individual points.
*
* @param {Array} geodata
* @returns {Array}
*/
function extractExtentPoints(geodata) {
var extentPoints = [];
geodata.forEach(function (place) {
if (place.bounding_box) {
extentPoints.push({
lng: place.bounding_box.min_lon,
lat: place.bounding_box.min_lat
});
extentPoints.push({
lng: place.bounding_box.max_lon,
lat: place.bounding_box.max_lat
});
}
else {
extentPoints.push({
lng: place.lng,
lat: place.lat
});
}
});
return extentPoints;
}
/**
* Compute bbox that encompasses all features in the result set.
* Set bbox property on the geojson object.
*
* @param {object} geojson
*/
function computeBBox(geojson) {
function computeBBox(geojson, geojsonExtentPoints) {
// @note: extent() sometimes throws Errors for unusual data
// eg: https://github.com/pelias/pelias/issues/84
try {
var bbox = extent( geojson );
var bbox = extent( geojsonExtentPoints );
if( !!bbox ){
geojson.bbox = bbox;
}
} catch( e ){
console.error( 'bbox error', e.message, e.stack );
console.error( 'geojson', JSON.stringify( geojson, null, 2 ) );
console.error( 'geojson', JSON.stringify( geojsonExtentPoints, null, 2 ) );
}
}

25
middleware/confidenceScore.js

@ -100,9 +100,10 @@ function checkForDealBreakers(req, hit) {
return true;
}
if (check.assigned(req.clean.parsed_text.postalcode) && check.assigned(hit.address) &&
req.clean.parsed_text.postalcode !== hit.address.zip) {
logger.debug('[confidence][deal-breaker]: postalcode !== zip (' + req.clean.parsed_text.postalcode + ' !== ' + hit.address.zip + ')');
if (check.assigned(req.clean.parsed_text.postalcode) && check.assigned(hit.address_parts) &&
req.clean.parsed_text.postalcode !== hit.address_parts.zip) {
logger.debug('[confidence][deal-breaker]: postalcode !== zip (' + req.clean.parsed_text.postalcode +
' !== ' + hit.address_parts.zip + ')');
return true;
}
}
@ -155,8 +156,8 @@ function checkName(text, parsed_text, hit) {
*/
function checkQueryType(text, hit) {
if (check.assigned(text) && check.assigned(text.number) &&
(check.undefined(hit.address) ||
(check.assigned(hit.address) && check.undefined(hit.address.number)))) {
(check.undefined(hit.address_parts) ||
(check.assigned(hit.address_parts) && check.undefined(hit.address_parts.number)))) {
return 0;
}
return 1;
@ -206,10 +207,10 @@ function propMatch(textProp, hitProp, expectEnriched) {
* @param {string} [text.state]
* @param {string} [text.country]
* @param {object} hit
* @param {object} [hit.address]
* @param {string|number} [hit.address.number]
* @param {string} [hit.address.street]
* @param {string|number} [hit.address.zip]
* @param {object} [hit.address_parts]
* @param {string|number} [hit.address_parts.number]
* @param {string} [hit.address_parts.street]
* @param {string|number} [hit.address_parts.zip]
* @param {Array} [hit.parent.region_a]
* @param {Array} [hit.parent.country_a]
* @returns {number}
@ -219,9 +220,9 @@ function checkAddress(text, hit) {
var res = 0;
if (check.assigned(text) && check.assigned(text.number) && check.assigned(text.street)) {
res += propMatch(text.number, (hit.address ? hit.address.number : null), false);
res += propMatch(text.street, (hit.address ? hit.address.street : null), false);
res += propMatch(text.postalcode, (hit.address ? hit.address.zip: null), true);
res += propMatch(text.number, (hit.address_parts ? hit.address_parts.number : null), false);
res += propMatch(text.street, (hit.address_parts ? hit.address_parts.street : null), false);
res += propMatch(text.postalcode, (hit.address_parts ? hit.address_parts.zip: null), true);
res += propMatch(text.state, hit.parent.region_a[0], true);
res += propMatch(text.country, hit.parent.country_a[0], true);

10
middleware/dedupe.js

@ -54,12 +54,12 @@ function isDifferent(item1, item2) {
propMatch(item1, item2, 'name');
}
if (item1.hasOwnProperty('address') && item2.hasOwnProperty('address')) {
propMatch(item1.address, item2.address, 'number');
propMatch(item1.address, item2.address, 'street');
propMatch(item1.address, item2.address, 'zip');
if (item1.hasOwnProperty('address_parts') && item2.hasOwnProperty('address_parts')) {
propMatch(item1.address_parts, item2.address_parts, 'number');
propMatch(item1.address_parts, item2.address_parts, 'street');
propMatch(item1.address_parts, item2.address_parts, 'zip');
}
else if (item1.address !== item2.address) {
else if (item1.address_parts !== item2.address_parts) {
throw new Error('different');
}
}

12
middleware/localNamingConventions.js

@ -38,9 +38,11 @@ function applyLocalNamingConventions(req, res, next) {
}
if (!flip){ return false; }
if( !place.hasOwnProperty('address') ){ return false; }
if( !place.address.hasOwnProperty('number') ){ return false; }
if( !place.address.hasOwnProperty('street') ){ return false; }
if( place.parent.country_a.indexOf('DEU') === -1 ){ return false; }
if( !place.hasOwnProperty('address_parts') ){ return false; }
if( !place.address_parts.hasOwnProperty('number') ){ return false; }
if( !place.address_parts.hasOwnProperty('street') ){ return false; }
return true;
})
.forEach( flipNumberAndStreet );
@ -51,8 +53,8 @@ function applyLocalNamingConventions(req, res, next) {
// flip the housenumber and street name
// eg. '101 Grolmanstraße' -> 'Grolmanstraße 101'
function flipNumberAndStreet(place) {
var standard = ( place.address.number + ' ' + place.address.street ),
flipped = ( place.address.street + ' ' + place.address.number );
var standard = ( place.address_parts.number + ' ' + place.address_parts.street ),
flipped = ( place.address_parts.street + ' ' + place.address_parts.number );
// flip street name and housenumber
if( place.name.default === standard ){

38
middleware/parseBBox.js

@ -0,0 +1,38 @@
var logger = require('pelias-logger').get('api');
/**
* Parses the bounding box property in docs, if one is found
*/
function setup() {
return function (req, res, next) {
// do nothing if no result data set
if (!res || !res.data) {
return next();
}
res.data = res.data.map(parseBBox);
next();
};
}
/*
* Parse the bbox property and form an object
*/
function parseBBox(place) {
if (place && place.bounding_box) {
try {
place.bounding_box = JSON.parse(place.bounding_box);
}
catch (err) {
logger.error('Invalid bounding_box json string:', place);
delete place.bounding_box;
}
}
return place;
}
module.exports = setup;

4
middleware/renamePlacenames.js

@ -58,9 +58,9 @@ function renamePlacenames(req, res, next) {
* Rename the fields in one record
*/
function renameOneRecord(place) {
if (place.address) {
if (place.address_parts) {
Object.keys(ADDRESS_PROPS).forEach(function (prop) {
place[ADDRESS_PROPS[prop]] = place.address[prop];
place[ADDRESS_PROPS[prop]] = place.address_parts[prop];
});
}

2
package.json

@ -53,7 +53,7 @@
"morgan": "1.7.0",
"pelias-config": "^1.0.1",
"pelias-logger": "^0.0.8",
"pelias-query": "^6.1.0",
"pelias-query": "6.2.0",
"pelias-suggester-pipeline": "2.0.4",
"stats-lite": "1.0.3",
"through2": "2.0.1"

6
query/autocomplete_defaults.js

@ -39,15 +39,15 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'function_score:boost_mode': 'multiply',
'address:housenumber:analyzer': 'peliasHousenumber',
'address:housenumber:field': 'address.number',
'address:housenumber:field': 'address_parts.number',
'address:housenumber:boost': 2,
'address:street:analyzer': 'peliasStreet',
'address:street:field': 'address.street',
'address:street:field': 'address_parts.street',
'address:street:boost': 5,
'address:postcode:analyzer': 'peliasZip',
'address:postcode:field': 'address.zip',
'address:postcode:field': 'address_parts.zip',
'address:postcode:boost': 2000,
'admin:country_a:analyzer': 'standard',

6
query/reverse_defaults.js

@ -39,15 +39,15 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'function_score:boost_mode': 'replace',
'address:housenumber:analyzer': 'peliasHousenumber',
'address:housenumber:field': 'address.number',
'address:housenumber:field': 'address_parts.number',
'address:housenumber:boost': 2,
'address:street:analyzer': 'peliasStreet',
'address:street:field': 'address.street',
'address:street:field': 'address_parts.street',
'address:street:boost': 5,
'address:postcode:analyzer': 'peliasZip',
'address:postcode:field': 'address.zip',
'address:postcode:field': 'address_parts.zip',
'address:postcode:boost': 3,
'admin:country_a:analyzer': 'standard',

6
query/search_defaults.js

@ -39,15 +39,15 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'function_score:boost_mode': 'replace',
'address:housenumber:analyzer': 'peliasHousenumber',
'address:housenumber:field': 'address.number',
'address:housenumber:field': 'address_parts.number',
'address:housenumber:boost': 2,
'address:street:analyzer': 'peliasStreet',
'address:street:field': 'address.street',
'address:street:field': 'address_parts.street',
'address:street:boost': 5,
'address:postcode:analyzer': 'peliasZip',
'address:postcode:field': 'address.zip',
'address:postcode:field': 'address_parts.zip',
'address:postcode:boost': 20,
'admin:country_a:analyzer': 'standard',

7
routes/v1.js

@ -34,7 +34,8 @@ var postProc = {
localNamingConventions: require('../middleware/localNamingConventions'),
renamePlacenames: require('../middleware/renamePlacenames'),
geocodeJSON: require('../middleware/geocodeJSON'),
sendJSON: require('../middleware/sendJSON')
sendJSON: require('../middleware/sendJSON'),
parseBoundingBox: require('../middleware/parseBBox')
};
/**
@ -65,6 +66,7 @@ function addRoutes(app, peliasConfig) {
postProc.dedupe(),
postProc.localNamingConventions(),
postProc.renamePlacenames(),
postProc.parseBoundingBox(),
postProc.geocodeJSON(peliasConfig, base),
postProc.sendJSON
]),
@ -76,6 +78,7 @@ function addRoutes(app, peliasConfig) {
postProc.dedupe(),
postProc.localNamingConventions(),
postProc.renamePlacenames(),
postProc.parseBoundingBox(),
postProc.geocodeJSON(peliasConfig, base),
postProc.sendJSON
]),
@ -90,6 +93,7 @@ function addRoutes(app, peliasConfig) {
postProc.dedupe(),
postProc.localNamingConventions(),
postProc.renamePlacenames(),
postProc.parseBoundingBox(),
postProc.geocodeJSON(peliasConfig, base),
postProc.sendJSON
]),
@ -98,6 +102,7 @@ function addRoutes(app, peliasConfig) {
controllers.place(),
postProc.localNamingConventions(),
postProc.renamePlacenames(),
postProc.parseBoundingBox(),
postProc.geocodeJSON(peliasConfig, base),
postProc.sendJSON
]),

2
test/unit/fixture/autocomplete_linguistic_with_admin.js

@ -106,7 +106,7 @@ module.exports = {
}
],
'score_mode': 'first',
'boost_mode': 'replace',
'boost_mode': 'replace'
}
},
{

2
test/unit/fixture/dedupe_elasticsearch_nonascii_results.js

@ -55,7 +55,7 @@ module.exports = [
'name': {
'default': '万里加油站'
},
'address': {
'address_parts': {
'street': 'S308',
'postalcode': '312044'
},

22
test/unit/fixture/dedupe_elasticsearch_results.js

@ -4,7 +4,7 @@ module.exports = [
'lon': -76.207456,
'lat': 40.039265
},
'address': {},
'address_parts': {},
'parent': {
'localadmin': ['East Lampeter'],
'region_a': ['PA'],
@ -31,7 +31,7 @@ module.exports = [
'lon': -76.207456,
'lat': 40.039265
},
'address': {},
'address_parts': {},
'parent': {
'localadmin': ['East Lampeter'],
'region_a': ['PA'],
@ -58,7 +58,7 @@ module.exports = [
'lon': -76.23246,
'lat': 39.99288
},
'address': {},
'address_parts': {},
'parent': {
'localadmin': ['West Lampeter'],
'region_a': ['PA'],
@ -85,7 +85,7 @@ module.exports = [
'lon': -76.20746,
'lat': 40.03927
},
'address': {},
'address_parts': {},
'parent': {
'localadmin': ['East Lampeter'],
'region_a': ['PA'],
@ -112,7 +112,7 @@ module.exports = [
'lon': -76.232457,
'lat': 39.992877
},
'address': {},
'address_parts': {},
'parent': {
'region': ['Pennsylvania'],
'locality': ['Lampeter'],
@ -139,7 +139,7 @@ module.exports = [
'lon': -76.207456,
'lat': 40.038987
},
'address': {},
'address_parts': {},
'parent': {
'region': ['Pennsylvania'],
'locality': ['Smoketown'],
@ -166,7 +166,7 @@ module.exports = [
'lon': -76.20746,
'lat': 40.03899
},
'address': {},
'address_parts': {},
'parent': {
'region': ['Pennsylvania'],
'locality': ['Smoketown'],
@ -193,7 +193,7 @@ module.exports = [
'lon': -94.167445,
'lat': 38.762788
},
'address': {},
'address_parts': {},
'parent': {
'region': ['Missouri'],
'locality': ['Strasburg'],
@ -219,7 +219,7 @@ module.exports = [
'lon': -78.36317,
'lat': 38.98445
},
'address': {},
'address_parts': {},
'name': {
'default': 'Strasburg High School'
},
@ -245,7 +245,7 @@ module.exports = [
'lon': -100.16516,
'lat': 46.13427
},
'address': {},
'address_parts': {},
'name': {
'default': 'Strasburg High School'
},
@ -271,7 +271,7 @@ module.exports = [
'lon': -81.532392,
'lat': 40.597578
},
'address': {},
'address_parts': {},
'name': {
'default': 'Strasburg High School'
},

6
test/unit/fixture/search_full_address.js

@ -77,7 +77,7 @@ module.exports = {
}
},{
'match': {
'address.number': {
'address_parts.number': {
'query': '123',
'boost': vs['address:housenumber:boost'],
'analyzer': vs['address:housenumber:analyzer']
@ -85,7 +85,7 @@ module.exports = {
}
}, {
'match': {
'address.street': {
'address_parts.street': {
'query': 'main st',
'boost': vs['address:street:boost'],
'analyzer': vs['address:street:analyzer']
@ -93,7 +93,7 @@ module.exports = {
}
}, {
'match': {
'address.zip': {
'address_parts.zip': {
'query': '10010',
'boost': vs['address:postcode:boost'],
'analyzer': vs['address:postcode:analyzer']

4
test/unit/fixture/search_regions_address.js

@ -77,7 +77,7 @@ module.exports = {
}
},{
'match': {
'address.number': {
'address_parts.number': {
'query': '1',
'boost': vs['address:housenumber:boost'],
'analyzer': vs['address:housenumber:analyzer']
@ -85,7 +85,7 @@ module.exports = {
}
}, {
'match': {
'address.street': {
'address_parts.street': {
'query': 'water st',
'boost': vs['address:street:boost'],
'analyzer': vs['address:street:analyzer']

125
test/unit/helper/geojsonify.js

@ -207,6 +207,131 @@ module.exports.tests.search = function(test, common) {
t.deepEqual(json, expected, 'all docs mapped');
t.end();
});
test('filtering out empty items', function (t) {
var input = [
{
'bounding_box': {
'min_lat': 40.6514712164,
'max_lat': 40.6737320588,
'min_lon': -73.8967895508,
'max_lon': -73.8665771484
},
'locality': [
'New York'
],
'source': 'whosonfirst',
'layer': 'neighbourhood',
'population': 173198,
'popularity': 495,
'center_point': {
'lon': -73.881319,
'lat': 40.663303
},
'name': {
'default': 'East New York'
},
'source_id': '85816607',
'category': [],
'_id': '85816607',
'_type': 'neighbourhood',
'_score': 21.434,
'confidence': 0.888,
'country': [
'United States'
],
'country_id': [
'85633793'
],
'country_a': [
'USA'
],
'region': [
'New York'
],
'region_id': [
'85688543'
],
'region_a': [
'NY'
],
'county': [
'Kings County'
],
'county_id': [
'102082361'
],
'county_a': [
null
],
'localadmin': [
'Brooklyn'
],
'localadmin_id': [
'404521211'
],
'localadmin_a': [
null
],
'locality_id': [
'85977539'
],
'locality_a': [
null
],
'neighbourhood': [],
'neighbourhood_id': []
}
];
var expected = {
'type': 'FeatureCollection',
'bbox': [-73.8967895508, 40.6514712164, -73.8665771484, 40.6737320588],
'features': [
{
'type': 'Feature',
'properties': {
'id': '85816607',
'gid': 'whosonfirst:neighbourhood:85816607',
'layer': 'neighbourhood',
'source': 'whosonfirst',
'name': 'East New York',
'confidence': 0.888,
'country': 'United States',
'country_id': '85633793',
'country_a': 'USA',
'region': 'New York',
'region_id': '85688543',
'region_a': 'NY',
'county': 'Kings County',
'county_id': '102082361',
'localadmin': 'Brooklyn',
'localadmin_id': '404521211',
'locality': 'New York',
'locality_id': '85977539',
'bounding_box': {
'min_lat': 40.6514712164,
'max_lat': 40.6737320588,
'min_lon': -73.8967895508,
'max_lon': -73.8665771484
},
'label': 'East New York, Brooklyn, NY, USA'
},
'geometry': {
'type': 'Point',
'coordinates': [
-73.881319,
40.663303
]
}
}
]
};
var json = geojsonify.search( input );
t.deepEqual(json, expected, 'all wanted properties exposed');
t.end();
});
};
module.exports.all = function (tape, common) {

4
test/unit/middleware/localNamingConventions.js

@ -10,7 +10,7 @@ module.exports.tests.flipNumberAndStreet = function(test, common) {
'_type': 'test',
'name': { 'default': '1 Main St' },
'center_point': { 'lon': -7.131521, 'lat': 54.428866 },
'address': {
'address_parts': {
'zip': 'BT77 0BG',
'number': '1',
'street': 'Main St'
@ -27,7 +27,7 @@ module.exports.tests.flipNumberAndStreet = function(test, common) {
'_type': 'test',
'name': { 'default': '23 Grolmanstraße' },
'center_point': { 'lon': 13.321487, 'lat': 52.506781 },
'address': {
'address_parts': {
'zip': '10623',
'number': '23',
'street': 'Grolmanstraße'

65
test/unit/middleware/parseBBox.js

@ -0,0 +1,65 @@
var parseBBox = require('../../../middleware/parseBBox')();
module.exports.tests = {};
module.exports.tests.computeDistance = function(test, common) {
test('valid bounding_box json', function(t) {
var res = {
data: [
{
bounding_box: '{"min_lat":40.6514712164,"max_lat":40.6737320588,"min_lon":-73.8967895508,"max_lon":-73.8665771484}'
}
]
};
var expected = {
data: [
{
bounding_box: {
min_lat: 40.6514712164,
max_lat: 40.6737320588,
min_lon: -73.8967895508,
max_lon: -73.8665771484
}
}
]
};
parseBBox({}, res, function () {
t.deepEquals(res, expected, 'correct bounding_box');
t.end();
});
});
test('invalid bounding_box json', function(t) {
var res = {
data: [
{
bounding_box: 'garbage json'
}
]
};
var expected = {
data: [
{}
]
};
parseBBox({}, res, function () {
t.deepEquals(res, expected, 'correct bounding_box');
t.end();
});
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('[middleware] parseBBox: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

1
test/unit/run.js

@ -27,6 +27,7 @@ var tests = [
require('./middleware/distance'),
require('./middleware/localNamingConventions'),
require('./middleware/dedupe'),
require('./middleware/parseBBox'),
require('./query/autocomplete'),
require('./query/autocomplete_defaults'),
require('./query/search_defaults'),

Loading…
Cancel
Save