Browse Source

Merge branch 'master' into addr_dedup

pull/1070/head
Julian Simioni 7 years ago committed by GitHub
parent
commit
39a40ee724
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      Dockerfile
  2. 21
      LICENSE
  3. 8
      package.json
  4. 2
      public/attribution.md
  5. 18
      sanitizer/_city_name_standardizer.js
  6. 2
      sanitizer/_deprecate_quattroshapes.js
  7. 2
      test/ciao/reverse/sources_deprecation_warning.coffee
  8. 2
      test/ciao/search/sources_deprecation_warning.coffee
  9. 82
      test/unit/sanitizer/_city_name_standardizer.js
  10. 4
      test/unit/sanitizer/_deprecate_quattroshapes.js

2
Dockerfile

@ -2,7 +2,7 @@
FROM pelias/baseimage
# maintainer information
LABEL maintainer="pelias@mapzen.com"
LABEL maintainer="pelias.team@gmail.com"
EXPOSE 3100

21
LICENSE

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2014 Mapzen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

8
package.json

@ -55,11 +55,11 @@
"markdown": "0.5.0",
"morgan": "^1.8.2",
"pelias-categories": "1.2.0",
"pelias-config": "2.13.0",
"pelias-config": "2.14.0",
"pelias-labels": "1.7.0",
"pelias-logger": "0.3.0",
"pelias-logger": "0.3.1",
"pelias-microservice-wrapper": "1.3.0",
"pelias-model": "5.2.0",
"pelias-model": "5.3.2",
"pelias-query": "9.1.1",
"pelias-sorting": "1.1.0",
"predicates": "^2.0.0",
@ -78,7 +78,7 @@
"precommit-hook": "^3.0.0",
"proxyquire": "^1.7.10",
"semantic-release": "^8.0.0",
"source-map": "^0.6.0",
"source-map": "^0.7.0",
"tap-dot": "1.0.5",
"tape": "^4.5.1",
"tmp": "0.0.33",

2
public/attribution.md

@ -4,4 +4,4 @@
* [OpenStreetMap](http://www.openstreetmap.org/copyright) © OpenStreetMap contributors under [ODbL](http://opendatacommons.org/licenses/odbl/)
* [OpenAddresses](http://openaddresses.io) under a [Creative Commons Zero](https://github.com/openaddresses/openaddresses/blob/master/sources/LICENSE) public domain designation
* [GeoNames](http://www.geonames.org/) under [CC-BY-3.0](https://creativecommons.org/licenses/by/2.0/)
* [WhosOnFirst](http://whosonfirst.mapzen.com) under [various licenses](https://github.com/whosonfirst/whosonfirst-data/blob/master/LICENSE.md)
* [WhosOnFirst](https://www.whosonfirst.org/) under [various licenses](https://github.com/whosonfirst/whosonfirst-data/blob/master/LICENSE.md)

18
sanitizer/_city_name_standardizer.js

@ -1,20 +1,18 @@
const _ = require('lodash');
// matches 'ft', 'mt', 'saint', and 'sainte' on word boundary
const mountSaintFort = /\b([fm]t|sainte?)\b/g;
// matches 'ft', 'mt' on word boundary
const mountFort = /\b([fm]t)\b/g;
const transliterations = {
'mt': 'mount',
'ft': 'fort',
'saint': 'st',
'sainte': 'ste'
'ft': 'fort'
};
function transliterate(match) {
return _.get(transliterations, match);
}
// transliterate ft/mt/saint/sainte to fort/mount/st/ste, respectively
// transliterate ft/mt to fort/mount, respectively
function _sanitize(raw, clean) {
// error & warning messages
// this function doesn't add any error or warning messages
@ -22,16 +20,16 @@ function _sanitize(raw, clean) {
// only try to transliterate if there is a city in parsed_text
if (!_.isEmpty(_.get(clean, 'parsed_text.city'))) {
// eg input: Ft. Saint Louis
// after 1. ft saint louis
// eg input: Ft. st Louis
// after 1. ft st louis
// after 2. fort st louis
// after 3. fort st louis
// 1. remove '.' that could abbreviate ft and mt (makes transliteration regex easier)
const periods_removed = _.toLower(clean.parsed_text.city).replace(/\b(mt|ft)\./g, '$1 ');
// 2. transliterate 'saint'->'st', etc
const transliterated = periods_removed.replace(mountSaintFort, transliterate);
// 2. transliterate 'ft'->'fort', etc
const transliterated = periods_removed.replace(mountFort, transliterate);
// 3. reduce whitespace sequences that can occur when removing periods down to a single space
const whitespace_normalized = _.trimEnd(transliterated.replace(/\s+/, ' '));

2
sanitizer/_deprecate_quattroshapes.js

@ -26,7 +26,7 @@ function _sanitize( raw, clean, opts ) {
'replaced by Who\'s on First, an actively maintained data project based on Quattroshapes' +
'Your existing queries WILL CONTINUE TO WORK for the foreseeable future, but results will ' +
'be coming from Who\'s on First and `sources=quattroshapes` will be interpreted as ' +
'`sources=whosonfirst`. If you have any questions, please email search@mapzen.com.');
'`sources=whosonfirst`. If you have any questions, please email pelias.team@gmail.com.');
// user requested 'quattroshapes', we will give them 'whosonfirst' instead.
sources = _.without(sources, 'quattroshapes', 'qs');

2
test/ciao/reverse/sources_deprecation_warning.coffee

@ -27,7 +27,7 @@ should.not.exist json.geocoding.errors
#? expected warnings
should.exist json.geocoding.warnings
json.geocoding.warnings.should.eql ['You are using Quattroshapes as a data source in this query. Quattroshapes has been disabled as a data source for Mapzen Search, and has beenreplaced by Who\'s on First, an actively maintained data project based on QuattroshapesYour existing queries WILL CONTINUE TO WORK for the foreseeable future, but results will be coming from Who\'s on First and `sources=quattroshapes` will be interpreted as `sources=whosonfirst`. If you have any questions, please email search@mapzen.com.' ]
json.geocoding.warnings.should.eql ['You are using Quattroshapes as a data source in this query. Quattroshapes has been disabled as a data source for Mapzen Search, and has beenreplaced by Who\'s on First, an actively maintained data project based on QuattroshapesYour existing queries WILL CONTINUE TO WORK for the foreseeable future, but results will be coming from Who\'s on First and `sources=quattroshapes` will be interpreted as `sources=whosonfirst`. If you have any questions, please email pelias.team@gmail.com.' ]
#? inputs
json.geocoding.query['size'].should.eql 10

2
test/ciao/search/sources_deprecation_warning.coffee

@ -27,7 +27,7 @@ should.not.exist json.geocoding.errors
#? expected warnings
should.exist json.geocoding.warnings
json.geocoding.warnings.should.eql ['You are using Quattroshapes as a data source in this query. Quattroshapes has been disabled as a data source for Mapzen Search, and has beenreplaced by Who\'s on First, an actively maintained data project based on QuattroshapesYour existing queries WILL CONTINUE TO WORK for the foreseeable future, but results will be coming from Who\'s on First and `sources=quattroshapes` will be interpreted as `sources=whosonfirst`. If you have any questions, please email search@mapzen.com.' ]
json.geocoding.warnings.should.eql ['You are using Quattroshapes as a data source in this query. Quattroshapes has been disabled as a data source for Mapzen Search, and has beenreplaced by Who\'s on First, an actively maintained data project based on QuattroshapesYour existing queries WILL CONTINUE TO WORK for the foreseeable future, but results will be coming from Who\'s on First and `sources=quattroshapes` will be interpreted as `sources=whosonfirst`. If you have any questions, please email pelias.team@gmail.com.' ]
#? inputs
json.geocoding.query['size'].should.eql 10

82
test/unit/sanitizer/_city_name_standardizer.js

@ -48,82 +48,6 @@ module.exports.tests.text_parser = function(test, common) {
});
test('\'saint\' should be abbreviated to \'st\' wherever it appears in the city', function(t) {
const raw = {};
const clean = {
parsed_text: {
query: 'saint query value',
neighbourhood: 'saint neighbourhood value',
borough: 'saint borough value',
city: 'SainT city sAiNt value saInt',
county: 'saint county value',
state: 'saint state value',
postalcode: 'saint postalcode value',
country: 'saint country value'
}
};
const expected_clean = {
parsed_text: {
query: 'saint query value',
neighbourhood: 'saint neighbourhood value',
borough: 'saint borough value',
city: 'st city st value st',
county: 'saint county value',
state: 'saint state value',
postalcode: 'saint postalcode value',
country: 'saint country value'
}
};
const messages = sanitizer.sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('\'sainte\' should be abbreviated to \'ste\' wherever it appears in the city', function(t) {
const raw = {};
const clean = {
parsed_text: {
query: 'sainte query value',
neighbourhood: 'sainte neighbourhood value',
borough: 'sainte borough value',
city: 'SaintE city sAinTe value saINte',
county: 'sainte county value',
state: 'sainte state value',
postalcode: 'sainte postalcode value',
country: 'sainte country value'
}
};
const expected_clean = {
parsed_text: {
query: 'sainte query value',
neighbourhood: 'sainte neighbourhood value',
borough: 'sainte borough value',
city: 'ste city ste value ste',
county: 'sainte county value',
state: 'sainte state value',
postalcode: 'sainte postalcode value',
country: 'sainte country value'
}
};
const messages = sanitizer.sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('\'ft\' should be expanded to \'fort\' wherever it appears in the city', function(t) {
const raw = {};
@ -200,18 +124,18 @@ module.exports.tests.text_parser = function(test, common) {
});
test('mixture of \'mt\', \'ft\', \'saint\', and \'sainte\' should be expanded/abbreviated', function(t) {
test('mixture of \'mt\', \'ft\' should be expanded', function(t) {
const raw = {};
const clean = {
parsed_text: {
city: 'mt. ft saint sainte mt ft.'
city: 'mt. ft mt ft.'
}
};
const expected_clean = {
parsed_text: {
city: 'mount fort st ste mount fort'
city: 'mount fort mount fort'
}
};

4
test/unit/sanitizer/_deprecate_quattroshapes.js

@ -15,7 +15,7 @@ module.exports.tests.warning_message_1 = function(test, common) {
'replaced by Who\'s on First, an actively maintained data project based on Quattroshapes' +
'Your existing queries WILL CONTINUE TO WORK for the foreseeable future, but results will ' +
'be coming from Who\'s on First and `sources=quattroshapes` will be interpreted as ' +
'`sources=whosonfirst`. If you have any questions, please email search@mapzen.com.']
'`sources=whosonfirst`. If you have any questions, please email pelias.team@gmail.com.']
}, 'warning emitted');
t.end();
@ -35,7 +35,7 @@ module.exports.tests.warning_message_2 = function(test, common) {
'replaced by Who\'s on First, an actively maintained data project based on Quattroshapes' +
'Your existing queries WILL CONTINUE TO WORK for the foreseeable future, but results will ' +
'be coming from Who\'s on First and `sources=quattroshapes` will be interpreted as ' +
'`sources=whosonfirst`. If you have any questions, please email search@mapzen.com.']
'`sources=whosonfirst`. If you have any questions, please email pelias.team@gmail.com.']
}, 'warning emitted');
t.end();

Loading…
Cancel
Save