Browse Source

Merge pull request #1060 from pelias/microserviceify-libpostal

Microserviceify libpostal
pull/1068/head
Stephen Hess 7 years ago committed by GitHub
parent
commit
0cc1eed8ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      .npmrc
  2. 2
      Dockerfile
  3. 2
      README.md
  4. 97
      controller/libpostal.js
  5. 75
      controller/structured_libpostal.js
  6. 1
      package.json
  7. 23
      routes/v1.js
  8. 44
      sanitizer/_synthesize_analysis.js
  9. 5
      schema.js
  10. 33
      service/configurations/Libpostal.js
  11. 335
      test/unit/controller/libpostal.js
  12. 440
      test/unit/controller/structured_libpostal.js
  13. 2
      test/unit/run.js
  14. 153
      test/unit/sanitizer/_synthesize_analysis.js
  15. 15
      test/unit/schema.js
  16. 99
      test/unit/service/configurations/Libpostal.js

1
.npmrc

@ -0,0 +1 @@
package-lock=false

2
Dockerfile

@ -1,5 +1,5 @@
# base image # base image
FROM pelias/libpostal_baseimage FROM pelias/baseimage
# maintainer information # maintainer information
LABEL maintainer="pelias@mapzen.com" LABEL maintainer="pelias@mapzen.com"

2
README.md

@ -40,7 +40,6 @@ The API recognizes the following properties under the top-level `api` key in you
|parameter|required|default|description| |parameter|required|default|description|
|---|---|---|---| |---|---|---|---|
|`host`|*yes*||specifies the url under which the http service is to run| |`host`|*yes*||specifies the url under which the http service is to run|
|`textAnalyzer`|*no*|*addressit*|can be either `libpostal` or `addressit` however will soon be **deprecated** and only `libpostal` will be supported going forward|
|`indexName`|*no*|*pelias*|name of the Elasticsearch index to be used when building queries| |`indexName`|*no*|*pelias*|name of the Elasticsearch index to be used when building queries|
|`relativeScores`|*no*|true|if set to true, confidence scores will be normalized, realistically at this point setting this to false is not tested or desirable |`relativeScores`|*no*|true|if set to true, confidence scores will be normalized, realistically at this point setting this to false is not tested or desirable
|`accessLog`|*no*||name of the format to use for access logs; may be any one of the [predefined values](https://github.com/expressjs/morgan#predefined-formats) in the `morgan` package. Defaults to `"common"`; if set to `false`, or an otherwise falsy value, disables access-logging entirely.| |`accessLog`|*no*||name of the format to use for access logs; may be any one of the [predefined values](https://github.com/expressjs/morgan#predefined-formats) in the `morgan` package. Defaults to `"common"`; if set to `false`, or an otherwise falsy value, disables access-logging entirely.|
@ -66,7 +65,6 @@ Example configuration file would look something like this:
"host": "localhost:3100/v1/", "host": "localhost:3100/v1/",
"indexName": "foobar", "indexName": "foobar",
"relativeScores": true, "relativeScores": true,
"textAnalyzer": "libpostal",
"services": { "services": {
"pip": { "pip": {
"url": "http://mypipservice.com:3000" "url": "http://mypipservice.com:3000"

97
controller/libpostal.js

@ -1,32 +1,109 @@
const text_analyzer = require('pelias-text-analyzer');
const _ = require('lodash'); const _ = require('lodash');
const iso3166 = require('iso3166-1'); const iso3166 = require('iso3166-1');
const Debug = require('../helper/debug'); const Debug = require('../helper/debug');
const debugLog = new Debug('controller:libpostal'); const debugLog = new Debug('controller:libpostal');
const logger = require('pelias-logger').get('api');
function setup(should_execute) { // mapping object from libpostal fields to pelias fields
var field_mapping = {
island: 'island',
category: 'category',
house: 'query',
house_number: 'number',
road: 'street',
suburb: 'neighbourhood',
city_district: 'borough',
city: 'city',
state_district: 'county',
state: 'state',
postcode: 'postalcode',
country: 'country'
};
// This controller calls the hosted libpostal service and converts the response
// to a generic format for later use. The hosted service returns an array like:
//
// ```
// [
// {
// label: 'house_number',
// value: '30'
// },
// {
// label: 'road',
// value: 'west 26th street'
// },
// {
// label: 'city',
// value: 'new york'
// },
// {
// label: 'state',
// value: 'ny'
// }
//]
// ```
//
// where `label` can be any of (currently):
// - house (generally interpreted as unknown, treated by pelias like a query term)
// - category (like "restaurants")
// - house_number
// - road
// - unit (apt or suite #)
// - suburb (like a neighbourhood)
// - city
// - city_district (like an NYC borough)
// - state_district (like a county)
// - state
// - postcode
// - country
//
// The Pelias query module is not concerned with unit.
//
function setup(libpostalService, should_execute) {
function controller( req, res, next ){ function controller( req, res, next ){
// bail early if req/res don't pass conditions for execution // bail early if req/res don't pass conditions for execution
if (!should_execute(req, res)) { if (!should_execute(req, res)) {
return next(); return next();
} }
const initialTime = debugLog.beginTimer(req); const initialTime = debugLog.beginTimer(req);
// parse text with query parser
const parsed_text = text_analyzer.parse(req.clean.text);
if (parsed_text !== undefined) { libpostalService(req, (err, response) => {
// if a known ISO2 country was parsed, convert it to ISO3 if (err) {
if (_.has(parsed_text, 'country') && iso3166.is2(_.toUpper(parsed_text.country))) { // push err.message or err onto req.errors
parsed_text.country = iso3166.to3(_.toUpper(parsed_text.country)); req.errors.push( _.get(err, 'message', err) );
}
} else if (_.some(_.countBy(response, o => o.label), count => count > 1)) {
logger.warn(`discarding libpostal parse of '${req.clean.text}' due to duplicate field assignments`);
return next();
} else if (_.isEmpty(response)) {
return next();
} else {
req.clean.parser = 'libpostal'; req.clean.parser = 'libpostal';
req.clean.parsed_text = parsed_text; req.clean.parsed_text = response.reduce(function(o, f) {
if (field_mapping.hasOwnProperty(f.label)) {
o[field_mapping[f.label]] = f.value;
}
return o;
}, {});
if (_.has(req.clean.parsed_text, 'country') && iso3166.is2(_.toUpper(req.clean.parsed_text.country))) {
req.clean.parsed_text.country = iso3166.to3(_.toUpper(req.clean.parsed_text.country));
}
debugLog.push(req, {parsed_text: req.clean.parsed_text}); debugLog.push(req, {parsed_text: req.clean.parsed_text});
} }
debugLog.stopTimer(req, initialTime); debugLog.stopTimer(req, initialTime);
return next(); return next();
});
} }
return controller; return controller;

75
controller/structured_libpostal.js

@ -0,0 +1,75 @@
const _ = require('lodash');
const Debug = require('../helper/debug');
const debugLog = new Debug('controller:libpostal');
const logger = require('pelias-logger').get('api');
// if there's a house_number in the libpostal response, return it
// otherwise return the postcode field (which may be undefined)
function findHouseNumberField(response) {
const house_number_field = response.find(f => f.label === 'house_number');
if (house_number_field) {
return house_number_field;
}
return response.find(f => f.label === 'postcode');
}
function setup(libpostalService, should_execute) {
function controller( req, res, next ){
// bail early if req/res don't pass conditions for execution
if (!should_execute(req, res)) {
return next();
}
const initialTime = debugLog.beginTimer(req);
libpostalService(req, (err, response) => {
if (err) {
// push err.message or err onto req.errors
req.errors.push( _.get(err, 'message', err) );
} else {
// figure out which field contains the probable house number, prefer house_number
// libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street
// so because we're treating the entire field as a street address, it's safe
// to assume that an identified postcode is actually a house number.
const house_number_field = findHouseNumberField(response);
// if we're fairly certain that libpostal identified a house number
// (from either the house_number or postcode field), place it into the
// number field and remove the first instance of that value from address
// and assign to street
// eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St
if (house_number_field) {
req.clean.parsed_text.number = house_number_field.value;
// remove the first instance of the number and trim whitespace
req.clean.parsed_text.street = _.trim(_.replace(req.clean.parsed_text.address, req.clean.parsed_text.number, ''));
} else {
// otherwise no house number was identifiable, so treat the entire input
// as a street
req.clean.parsed_text.street = req.clean.parsed_text.address;
}
// the address field no longer means anything since it's been parsed, so remove it
delete req.clean.parsed_text.address;
debugLog.push(req, {parsed_text: response});
}
debugLog.stopTimer(req, initialTime);
return next();
});
}
return controller;
}
module.exports = setup;

1
package.json

@ -62,7 +62,6 @@
"pelias-model": "5.2.0", "pelias-model": "5.2.0",
"pelias-query": "9.1.1", "pelias-query": "9.1.1",
"pelias-sorting": "1.1.0", "pelias-sorting": "1.1.0",
"pelias-text-analyzer": "1.10.2",
"predicates": "^2.0.0", "predicates": "^2.0.0",
"retry": "^0.10.1", "retry": "^0.10.1",
"stats-lite": "^2.0.4", "stats-lite": "^2.0.4",

23
routes/v1.js

@ -29,6 +29,7 @@ var controllers = {
coarse_reverse: require('../controller/coarse_reverse'), coarse_reverse: require('../controller/coarse_reverse'),
mdToHTML: require('../controller/markdownToHtml'), mdToHTML: require('../controller/markdownToHtml'),
libpostal: require('../controller/libpostal'), libpostal: require('../controller/libpostal'),
structured_libpostal: require('../controller/structured_libpostal'),
place: require('../controller/place'), place: require('../controller/place'),
placeholder: require('../controller/placeholder'), placeholder: require('../controller/placeholder'),
search: require('../controller/search'), search: require('../controller/search'),
@ -96,6 +97,7 @@ const PlaceHolder = require('../service/configurations/PlaceHolder');
const PointInPolygon = require('../service/configurations/PointInPolygon'); const PointInPolygon = require('../service/configurations/PointInPolygon');
const Language = require('../service/configurations/Language'); const Language = require('../service/configurations/Language');
const Interpolation = require('../service/configurations/Interpolation'); const Interpolation = require('../service/configurations/Interpolation');
const Libpostal = require('../service/configurations/Libpostal');
/** /**
* Append routes to app * Append routes to app
@ -122,6 +124,18 @@ function addRoutes(app, peliasConfig) {
const interpolationService = serviceWrapper(interpolationConfiguration); const interpolationService = serviceWrapper(interpolationConfiguration);
const isInterpolationEnabled = _.constant(interpolationConfiguration.isEnabled()); const isInterpolationEnabled = _.constant(interpolationConfiguration.isEnabled());
// standard libpostal should use req.clean.text for the `address` parameter
const libpostalConfiguration = new Libpostal(
_.defaultTo(peliasConfig.api.services.libpostal, {}),
_.property('clean.text'));
const libpostalService = serviceWrapper(libpostalConfiguration);
// structured libpostal should use req.clean.parsed_text.address for the `address` parameter
const structuredLibpostalConfiguration = new Libpostal(
_.defaultTo(peliasConfig.api.services.libpostal, {}),
_.property('clean.parsed_text.address'));
const structuredLibpostalService = serviceWrapper(structuredLibpostalConfiguration);
// fallback to coarse reverse when regular reverse didn't return anything // fallback to coarse reverse when regular reverse didn't return anything
const coarseReverseShouldExecute = all( const coarseReverseShouldExecute = all(
isPipServiceEnabled, not(hasRequestErrors), not(hasResponseData) isPipServiceEnabled, not(hasRequestErrors), not(hasResponseData)
@ -132,6 +146,12 @@ function addRoutes(app, peliasConfig) {
not(isRequestSourcesOnlyWhosOnFirst) not(isRequestSourcesOnlyWhosOnFirst)
); );
// for libpostal to execute for structured requests, req.clean.parsed_text.address must exist
const structuredLibpostalShouldExecute = all(
not(hasRequestErrors),
hasParsedTextProperties.all('address')
);
// execute placeholder if libpostal only parsed as admin-only and needs to // execute placeholder if libpostal only parsed as admin-only and needs to
// be geodisambiguated // be geodisambiguated
const placeholderGeodisambiguationShouldExecute = all( const placeholderGeodisambiguationShouldExecute = all(
@ -256,7 +276,7 @@ function addRoutes(app, peliasConfig) {
sanitizers.search.middleware(peliasConfig.api), sanitizers.search.middleware(peliasConfig.api),
middleware.requestLanguage, middleware.requestLanguage,
middleware.calcSize(), middleware.calcSize(),
controllers.libpostal(libpostalShouldExecute), controllers.libpostal(libpostalService, libpostalShouldExecute),
controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute), controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute),
controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute), controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute),
controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute), controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute),
@ -286,6 +306,7 @@ function addRoutes(app, peliasConfig) {
sanitizers.structured_geocoding.middleware(peliasConfig.api), sanitizers.structured_geocoding.middleware(peliasConfig.api),
middleware.requestLanguage, middleware.requestLanguage,
middleware.calcSize(), middleware.calcSize(),
controllers.structured_libpostal(structuredLibpostalService, structuredLibpostalShouldExecute),
controllers.search(peliasConfig.api, esclient, queries.structured_geocoding, not(hasResponseDataOrRequestErrors)), controllers.search(peliasConfig.api, esclient, queries.structured_geocoding, not(hasResponseDataOrRequestErrors)),
postProc.trimByGranularityStructured(), postProc.trimByGranularityStructured(),
postProc.distances('focus.point.'), postProc.distances('focus.point.'),

44
sanitizer/_synthesize_analysis.js

@ -1,5 +1,4 @@
const _ = require('lodash'); const _ = require('lodash');
const text_analyzer = require('pelias-text-analyzer');
const fields = { const fields = {
'venue': 'query', 'venue': 'query',
@ -17,20 +16,6 @@ function normalizeWhitespaceToSingleSpace(val) {
return _.replace(_.trim(val), /\s+/g, ' '); return _.replace(_.trim(val), /\s+/g, ' ');
} }
function isPostalCodeOnly(parsed_text) {
return Object.keys(parsed_text).length === 1 &&
parsed_text.hasOwnProperty('postalcode');
}
// figure out which field contains the probable house number, prefer number
// libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street
// so because we're treating the entire field as a street address, it's safe
// to assume that an identified postcode is actually a house number.
function getHouseNumberField(analyzed_address) {
// return the first field available in the libpostal response, undefined if none
return _.find(['number', 'postalcode'], _.partial(_.has, analyzed_address));
}
function _sanitize( raw, clean ){ function _sanitize( raw, clean ){
// error & warning messages // error & warning messages
@ -51,35 +36,8 @@ function _sanitize( raw, clean ){
`at least one of the following fields is required: ${Object.keys(fields).join(', ')}`); `at least one of the following fields is required: ${Object.keys(fields).join(', ')}`);
} }
if (clean.parsed_text.hasOwnProperty('address')) {
const analyzed_address = text_analyzer.parse(clean.parsed_text.address);
const house_number_field = getHouseNumberField(analyzed_address);
// if we're fairly certain that libpostal identified a house number
// (from either the house_number or postcode field), place it into the
// number field and remove the first instance of that value from address
// and assign to street
// eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St
if (house_number_field) {
clean.parsed_text.number = analyzed_address[house_number_field];
// remove the first instance of the number and trim whitespace
clean.parsed_text.street = _.trim(_.replace(clean.parsed_text.address, clean.parsed_text.number, ''));
} else {
// otherwise no house number was identifiable, so treat the entire input
// as a street
clean.parsed_text.street = clean.parsed_text.address;
}
// the address field no longer means anything since it's been parsed, so remove it
delete clean.parsed_text.address;
}
return messages; return messages;
} }
function _expected() { function _expected() {

5
schema.js

@ -41,6 +41,11 @@ module.exports = Joi.object().keys({
url: Joi.string().uri({ scheme: /https?/ }), url: Joi.string().uri({ scheme: /https?/ }),
timeout: Joi.number().integer().optional().default(250).min(0), timeout: Joi.number().integer().optional().default(250).min(0),
retries: Joi.number().integer().optional().default(3).min(0), retries: Joi.number().integer().optional().default(3).min(0),
}).unknown(false).requiredKeys('url'),
libpostal: Joi.object().keys({
url: Joi.string().uri({ scheme: /https?/ }),
timeout: Joi.number().integer().optional().default(250).min(0),
retries: Joi.number().integer().optional().default(3).min(0),
}).unknown(false).requiredKeys('url') }).unknown(false).requiredKeys('url')
}).unknown(false).default({}), // default api.services to an empty object }).unknown(false).default({}), // default api.services to an empty object
defaultParameters: Joi.object().keys({ defaultParameters: Joi.object().keys({

33
service/configurations/Libpostal.js

@ -0,0 +1,33 @@
'use strict';
const url = require('url');
const ServiceConfiguration = require('pelias-microservice-wrapper').ServiceConfiguration;
class Libpostal extends ServiceConfiguration {
constructor(o, propertyExtractor) {
super('libpostal', o);
// save off the propertyExtractor function
// this is used to extract a single property from req. eg:
// * _.property('clean.text')
// * _.property('clean.parsed_text.address')
// will return those properties from req
this.propertyExtractor = propertyExtractor;
}
getParameters(req) {
return {
address: this.propertyExtractor(req)
};
}
getUrl(req) {
return url.resolve(this.baseUrl, 'parse');
}
}
module.exports = Libpostal;

335
test/unit/controller/libpostal.js

@ -1,281 +1,304 @@
'use strict'; 'use strict';
const proxyquire = require('proxyquire').noCallThru(); const proxyquire = require('proxyquire').noCallThru();
const libpostal = require('../../../controller/libpostal');
const _ = require('lodash'); const _ = require('lodash');
const mock_logger = require('pelias-mock-logger');
module.exports.tests = {}; module.exports.tests = {};
module.exports.tests.interface = (test, common) => { module.exports.tests.interface = (test, common) => {
test('valid interface', t => { test('valid interface', (t) => {
const controller = proxyquire('../../../controller/libpostal', { t.equal(typeof libpostal, 'function', 'libpostal is a function');
'pelias-text-analyzer': { t.equal(typeof libpostal(), 'function', 'libpostal returns a controller');
parse: () => undefined
}
});
t.equal(typeof controller, 'function', 'libpostal is a function');
t.equal(typeof controller(), 'function', 'libpostal returns a controller');
t.end(); t.end();
}); });
};
module.exports.tests.early_exit_conditions = (test, common) => {
test('should_execute returning false should not call service', t => {
const service = () => {
t.fail('service should not have been called');
}; };
module.exports.tests.should_execute = (test, common) => { const should_execute = (req) => {
test('should_execute returning false should not call text-analyzer', t => {
const should_execute = (req, res) => {
// req and res should be passed to should_execute // req and res should be passed to should_execute
t.deepEquals(req, { t.deepEquals(req, {
clean: { clean: {
text: 'original query' text: 'original query'
} }
}); });
t.deepEquals(res, { b: 2 });
return false; return false;
}; };
const controller = proxyquire('../../../controller/libpostal', { const controller = libpostal(service, should_execute);
'pelias-text-analyzer': {
parse: () => {
t.fail('parse should not have been called');
}
}
})(should_execute);
const req = { const req = {
clean: { clean: {
text: 'original query' text: 'original query'
} }
}; };
const res = { b: 2 };
controller(req, res, () => { controller(req, undefined, () => {
t.deepEquals(req, { t.deepEquals(req, {
clean: { clean: {
text: 'original query' text: 'original query'
} }
}, 'req should not have been modified'); }, 'req should not have been modified');
t.deepEquals(res, { b: 2 });
t.end(); t.end();
}); });
}); });
test('should_execute returning false should not call text-analyzer', t => { };
t.plan(5);
const should_execute = (req, res) => { module.exports.tests.error_conditions = (test, common) => {
// req and res should be passed to should_execute test('service returning error should append and not modify req.clean', t => {
t.deepEquals(req, { const service = (req, callback) => {
clean: { callback('libpostal service error', []);
text: 'original query'
}
});
t.deepEquals(res, { b: 2 });
return true;
}; };
const controller = proxyquire('../../../controller/libpostal', { const controller = libpostal(service, () => true);
'pelias-text-analyzer': {
parse: (query) => {
t.equals(query, 'original query');
return undefined;
}
}
})(should_execute);
const req = { const req = {
clean: { clean: {
text: 'original query' text: 'original query'
} },
errors: []
}; };
const res = { b: 2 };
controller(req, res, () => { controller(req, undefined, () => {
t.deepEquals(req, { t.deepEquals(req, {
clean: { clean: {
text: 'original query' text: 'original query'
} },
errors: ['libpostal service error']
}, 'req should not have been modified'); }, 'req should not have been modified');
t.deepEquals(res, { b: 2 });
t.end(); t.end();
}); });
}); });
}; };
module.exports.tests.parse_is_called = (test, common) => { module.exports.tests.failure_conditions = (test, common) => {
test('parse returning undefined should not overwrite clean.parsed_text', t => { test('service returning 2 or more of a label should return undefined and log message', t => {
const controller = proxyquire('../../../controller/libpostal', { const logger = mock_logger();
'pelias-text-analyzer': {
parse: () => undefined
}
})(() => true);
const req = { const service = (req, callback) => {
clean: { const response = [
parsed_text: 'original parsed_text' {
} label: 'road',
}; value: 'road value 1'
const res = 'this is the response'; },
{
controller(req, res, () => { label: 'city',
t.deepEquals(req, { value: 'city value'
clean: { },
parsed_text: 'original parsed_text' {
label: 'road',
value: 'road value 2'
} }
}); ];
t.deepEquals(res, 'this is the response');
t.end();
});
}); callback(null, response);
};
test('parse returning something should overwrite clean.parsed_text', t => {
const controller = proxyquire('../../../controller/libpostal', { const controller = proxyquire('../../../controller/libpostal', {
'pelias-text-analyzer': { 'pelias-logger': logger
parse: () => 'replacement parsed_text' })(service, () => true);
}
})(() => true);
const req = { const req = {
clean: { clean: {
parsed_text: 'original parsed_text' text: 'query value'
} },
errors: []
}; };
const res = 'this is the response';
controller(req, res, () => { controller(req, undefined, () => {
t.ok(logger.isWarnMessage('discarding libpostal parse of \'query value\' due to duplicate field assignments'));
t.deepEquals(req, { t.deepEquals(req, {
clean: { clean: {
parser: 'libpostal', text: 'query value'
parsed_text: 'replacement parsed_text' },
} errors: []
}); }, 'req should not have been modified');
t.deepEquals(res, 'this is the response');
t.end(); t.end();
}); });
}); });
test('service returning empty array should not set parsed_text or parser', t => {
const logger = mock_logger();
const service = (req, callback) => {
callback(null, []);
}; };
module.exports.tests.iso2_conversion = (test, common) => {
test('no country in parse response should not leave country unset', t => {
const controller = proxyquire('../../../controller/libpostal', { const controller = proxyquire('../../../controller/libpostal', {
'pelias-text-analyzer': { 'pelias-logger': logger
parse: () => ({ })(service, () => true);
locality: 'this is the locality'
})
},
'iso3166-1': {
is2: () => t.fail('should not have been called'),
to3: () => t.fail('should not have been called')
}
})(() => true);
const req = { const req = {
clean: { clean: {
parsed_text: 'original parsed_text' text: 'query value'
} },
errors: []
}; };
const res = 'this is the response';
controller(req, res, () => { controller(req, undefined, () => {
t.deepEquals(req, { t.deepEquals(req, {
clean: { clean: {
parser: 'libpostal', text: 'query value'
parsed_text: { },
locality: 'this is the locality' errors: []
} }, 'req should not have been modified');
}
});
t.deepEquals(res, 'this is the response');
t.end(); t.end();
}); });
}); });
test('unknown country should not be converted', t => { };
t.plan(3);
const controller = proxyquire('../../../controller/libpostal', { module.exports.tests.success_conditions = (test, common) => {
'pelias-text-analyzer': { test('service returning valid response should convert and append', t => {
parse: () => ({ const service = (req, callback) => {
country: 'unknown country code' const response = [
}) {
label: 'island',
value: 'island value'
}, },
'iso3166-1': { {
is2: country => { label: 'category',
t.equals(country, 'UNKNOWN COUNTRY CODE'); value: 'category value'
return false; },
{
label: 'house',
value: 'house value'
},
{
label: 'house_number',
value: 'house_number value'
},
{
label: 'road',
value: 'road value'
},
{
label: 'suburb',
value: 'suburb value'
},
{
label: 'city_district',
value: 'city_district value'
}, },
to3: () => t.fail('should not have been called') {
label: 'city',
value: 'city value'
},
{
label: 'state_district',
value: 'state_district value'
},
{
label: 'state',
value: 'state value'
},
{
label: 'postcode',
value: 'postcode value'
},
{
label: 'country',
value: 'country value'
} }
})(() => true); ];
callback(null, response);
};
const controller = libpostal(service, () => true);
const req = { const req = {
clean: { clean: {
parsed_text: 'original parsed_text' text: 'original query'
} },
errors: []
}; };
const res = 'this is the response';
controller(req, res, () => { controller(req, undefined, () => {
t.deepEquals(req, { t.deepEquals(req, {
clean: { clean: {
text: 'original query',
parser: 'libpostal', parser: 'libpostal',
parsed_text: { parsed_text: {
country: 'unknown country code' island: 'island value',
category: 'category value',
query: 'house value',
number: 'house_number value',
street: 'road value',
neighbourhood: 'suburb value',
borough: 'city_district value',
city: 'city value',
county: 'state_district value',
state: 'state value',
postalcode: 'postcode value',
country: 'country value'
} }
} },
}); errors: []
t.deepEquals(res, 'this is the response'); }, 'req should not have been modified');
t.end(); t.end();
});
}); });
test('ISO2 country should be converted to ISO3', t => { });
t.plan(4);
const controller = proxyquire('../../../controller/libpostal', { test('ISO-2 country should be converted to ISO-3', t => {
'pelias-text-analyzer': { const service = (req, callback) => {
parse: () => ({ const response = [
country: 'ISO2 COUNTRY CODE' {
}) label: 'country',
}, value: 'ca'
'iso3166-1': {
is2: country => {
t.equals(country, 'ISO2 COUNTRY CODE');
return true;
},
to3: country => {
t.equals(country, 'ISO2 COUNTRY CODE');
return 'ISO3 COUNTRY CODE';
} }
} ];
})(() => true);
callback(null, response);
};
const controller = libpostal(service, () => true);
const req = { const req = {
clean: { clean: {
parsed_text: 'original parsed_text' text: 'original query'
} },
errors: []
}; };
const res = 'this is the response';
controller(req, res, () => { controller(req, undefined, () => {
t.deepEquals(req, { t.deepEquals(req, {
clean: { clean: {
text: 'original query',
parser: 'libpostal', parser: 'libpostal',
parsed_text: { parsed_text: {
country: 'ISO3 COUNTRY CODE' country: 'CAN'
} }
} },
}); errors: []
t.deepEquals(res, 'this is the response'); }, 'req should not have been modified');
t.end(); t.end();
}); });
}); });

440
test/unit/controller/structured_libpostal.js

@ -0,0 +1,440 @@
'use strict';
const proxyquire = require('proxyquire').noCallThru();
const libpostal = require('../../../controller/structured_libpostal');
const _ = require('lodash');
const mock_logger = require('pelias-mock-logger');
module.exports.tests = {};
module.exports.tests.interface = (test, common) => {
test('valid interface', (t) => {
t.equal(typeof libpostal, 'function', 'libpostal is a function');
t.equal(typeof libpostal(), 'function', 'libpostal returns a controller');
t.end();
});
};
module.exports.tests.early_exit_conditions = (test, common) => {
test('should_execute returning false should not call service', t => {
const service = () => {
t.fail('service should not have been called');
};
const should_execute = (req) => {
// req and res should be passed to should_execute
t.deepEquals(req, {
clean: {
text: 'original query'
}
});
return false;
};
const controller = libpostal(service, should_execute);
const req = {
clean: {
text: 'original query'
}
};
controller(req, undefined, () => {
t.deepEquals(req, {
clean: {
text: 'original query'
}
}, 'req should not have been modified');
t.end();
});
});
};
module.exports.tests.error_conditions = (test, common) => {
test('service returning error should append and not modify req.clean', t => {
const service = (req, callback) => {
callback('libpostal service error', []);
};
const controller = libpostal(service, () => true);
const req = {
clean: {
text: 'original query'
},
errors: []
};
controller(req, undefined, () => {
t.deepEquals(req, {
clean: {
text: 'original query'
},
errors: ['libpostal service error']
}, 'req should not have been modified');
t.end();
});
});
};
// module.exports.tests.failure_conditions = (test, common) => {
// test('service returning 2 or more of a label should return undefined and log message', t => {
// const logger = mock_logger();
//
// const service = (req, callback) => {
// const response = [
// {
// label: 'road',
// value: 'road value 1'
// },
// {
// label: 'city',
// value: 'city value'
// },
// {
// label: 'road',
// value: 'road value 2'
// }
// ];
//
// callback(null, response);
// };
//
// const controller = proxyquire('../../../controller/libpostal', {
// 'pelias-logger': logger
// })(service, () => true);
//
// const req = {
// clean: {
// text: 'query value'
// },
// errors: []
// };
//
// controller(req, undefined, () => {
// t.ok(logger.isWarnMessage('discarding libpostal parse of \'query value\' due to duplicate field assignments'));
//
// t.deepEquals(req, {
// clean: {
// text: 'query value'
// },
// errors: []
// }, 'req should not have been modified');
//
// t.end();
//
// });
//
// });
//
// test('service returning empty array should not set parsed_text or parser', t => {
// const logger = mock_logger();
//
// const service = (req, callback) => {
// callback(null, []);
// };
//
// const controller = proxyquire('../../../controller/libpostal', {
// 'pelias-logger': logger
// })(service, () => true);
//
// const req = {
// clean: {
// text: 'query value'
// },
// errors: []
// };
//
// controller(req, undefined, () => {
// t.deepEquals(req, {
// clean: {
// text: 'query value'
// },
// errors: []
// }, 'req should not have been modified');
//
// t.end();
//
// });
//
// });
//
// };
//
module.exports.tests.success_conditions = (test, common) => {
test('service returning house_number should set req.clean.parsed_text.', t => {
const service = (req, callback) => {
const response = [
{
label: 'house_number',
value: 'house_number value'
},
{
label: 'postcode',
value: 'postcode value'
}
];
callback(null, response);
};
const controller = libpostal(service, () => true);
const req = {
clean: {
parsed_text: {
address: 'other value house_number value street value'
}
},
errors: []
};
controller(req, undefined, () => {
t.deepEquals(req, {
clean: {
parsed_text: {
number: 'house_number value',
street: 'other value street value'
}
},
errors: []
}, 'req should not have been modified');
t.end();
});
});
test('service returning postcode should set req.clean.parsed_text.', t => {
const service = (req, callback) => {
const response = [
{
label: 'postcode',
value: 'postcode value'
}
];
callback(null, response);
};
const controller = libpostal(service, () => true);
const req = {
clean: {
parsed_text: {
address: 'other value postcode value street value'
}
},
errors: []
};
controller(req, undefined, () => {
t.deepEquals(req, {
clean: {
parsed_text: {
number: 'postcode value',
street: 'other value street value'
}
},
errors: []
}, 'req should not have been modified');
t.end();
});
});
test('service returning neither house_number nor postcode should not set req.clean.parsed_text.number', t => {
const service = (req, callback) => {
const response = [
{
label: 'city',
value: 'city value'
}
];
callback(null, response);
};
const controller = libpostal(service, () => true);
const req = {
clean: {
parsed_text: {
address: 'street value'
}
},
errors: []
};
controller(req, undefined, () => {
t.deepEquals(req, {
clean: {
parsed_text: {
street: 'street value'
}
},
errors: []
}, 'req should not have been modified');
t.end();
});
});
// test('service returning valid response should convert and append', t => {
// const service = (req, callback) => {
// const response = [
// {
// label: 'island',
// value: 'island value'
// },
// {
// label: 'category',
// value: 'category value'
// },
// {
// label: 'house',
// value: 'house value'
// },
// {
// label: 'house_number',
// value: 'house_number value'
// },
// {
// label: 'road',
// value: 'road value'
// },
// {
// label: 'suburb',
// value: 'suburb value'
// },
// {
// label: 'city_district',
// value: 'city_district value'
// },
// {
// label: 'city',
// value: 'city value'
// },
// {
// label: 'state_district',
// value: 'state_district value'
// },
// {
// label: 'state',
// value: 'state value'
// },
// {
// label: 'postcode',
// value: 'postcode value'
// },
// {
// label: 'country',
// value: 'country value'
// }
// ];
//
// callback(null, response);
// };
//
// const controller = libpostal(service, () => true);
//
// const req = {
// clean: {
// text: 'original query'
// },
// errors: []
// };
//
// controller(req, undefined, () => {
// t.deepEquals(req, {
// clean: {
// text: 'original query',
// parser: 'libpostal',
// parsed_text: {
// island: 'island value',
// category: 'category value',
// query: 'house value',
// number: 'house_number value',
// street: 'road value',
// neighbourhood: 'suburb value',
// borough: 'city_district value',
// city: 'city value',
// county: 'state_district value',
// state: 'state value',
// postalcode: 'postcode value',
// country: 'country value'
// }
// },
// errors: []
// }, 'req should not have been modified');
//
// t.end();
//
// });
//
// });
//
// test('ISO-2 country should be converted to ISO-3', t => {
// const service = (req, callback) => {
// const response = [
// {
// label: 'country',
// value: 'ca'
// }
// ];
//
// callback(null, response);
// };
//
// const controller = libpostal(service, () => true);
//
// const req = {
// clean: {
// text: 'original query'
// },
// errors: []
// };
//
// controller(req, undefined, () => {
// t.deepEquals(req, {
// clean: {
// text: 'original query',
// parser: 'libpostal',
// parsed_text: {
// country: 'CAN'
// }
// },
// errors: []
// }, 'req should not have been modified');
//
// t.end();
//
// });
//
// });
//
};
module.exports.all = (tape, common) => {
function test(name, testFunction) {
return tape(`GET /libpostal ${name}`, testFunction);
}
for( const testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

2
test/unit/run.js

@ -14,6 +14,7 @@ var tests = [
require('./controller/coarse_reverse'), require('./controller/coarse_reverse'),
require('./controller/index'), require('./controller/index'),
require('./controller/libpostal'), require('./controller/libpostal'),
require('./controller/structured_libpostal'),
require('./controller/place'), require('./controller/place'),
require('./controller/placeholder'), require('./controller/placeholder'),
require('./controller/search'), require('./controller/search'),
@ -100,6 +101,7 @@ var tests = [
require('./sanitizer/wrap'), require('./sanitizer/wrap'),
require('./service/configurations/Interpolation'), require('./service/configurations/Interpolation'),
require('./service/configurations/Language'), require('./service/configurations/Language'),
require('./service/configurations/Libpostal'),
require('./service/configurations/PlaceHolder'), require('./service/configurations/PlaceHolder'),
require('./service/configurations/PointInPolygon'), require('./service/configurations/PointInPolygon'),
require('./service/mget'), require('./service/mget'),

153
test/unit/sanitizer/_synthesize_analysis.js

@ -1,18 +1,14 @@
const _ = require('lodash'); const _ = require('lodash');
const proxyquire = require('proxyquire').noCallThru(); const proxyquire = require('proxyquire').noCallThru();
const sanitizer = require('../../../sanitizer/_synthesize_analysis');
module.exports.tests = {}; module.exports.tests = {};
module.exports.tests.text_parser = function(test, common) { module.exports.tests.text_parser = function(test, common) {
test('all variables should be parsed', function(t) { test('all variables should be parsed', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.fail('parse should not have been called');
}
}});
const raw = { const raw = {
venue: ' \t venue \t value \t ', venue: ' \t venue \t value \t ',
address: ' \t address \t value \t ',
neighbourhood: ' \t neighbourhood \t value \t ', neighbourhood: ' \t neighbourhood \t value \t ',
borough: ' \t borough \t value \t ', borough: ' \t borough \t value \t ',
locality: ' \t locality \t value \t ', locality: ' \t locality \t value \t ',
@ -27,6 +23,7 @@ module.exports.tests.text_parser = function(test, common) {
const expected_clean = { const expected_clean = {
parsed_text: { parsed_text: {
query: 'venue value', query: 'venue value',
address: 'address value',
neighbourhood: 'neighbourhood value', neighbourhood: 'neighbourhood value',
borough: 'borough value', borough: 'borough value',
city: 'locality value', city: 'locality value',
@ -47,12 +44,6 @@ module.exports.tests.text_parser = function(test, common) {
}); });
test('non-string and blank string values should be treated as not supplied', function(t) { test('non-string and blank string values should be treated as not supplied', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.fail('parse should not have been called');
}
}});
// helper to return a random value that's considered invalid // helper to return a random value that's considered invalid
function getInvalidValue() { function getInvalidValue() {
return _.sample([{}, [], false, '', ' \t ', 17, undefined]); return _.sample([{}, [], false, '', ' \t ', 17, undefined]);
@ -87,12 +78,6 @@ module.exports.tests.text_parser = function(test, common) {
}); });
test('no supplied fields should return error', function(t) { test('no supplied fields should return error', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.fail('parse should not have been called');
}
}});
const raw = {}; const raw = {};
const clean = {}; const clean = {};
@ -110,12 +95,6 @@ module.exports.tests.text_parser = function(test, common) {
}); });
test('postalcode-only parsed_text should return error', function(t) { test('postalcode-only parsed_text should return error', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.fail('parse should not have been called');
}
}});
const raw = { const raw = {
postalcode: 'postalcode value' postalcode: 'postalcode value'
}; };
@ -137,132 +116,6 @@ module.exports.tests.text_parser = function(test, common) {
}); });
test('text_analyzer identifying house number should extract it and street', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.equals(query, 'Number Value Street Value Number Value');
return {
number: 'Number Value'
};
}
}});
const raw = {
address: 'Number Value Street Value Number Value'
};
const clean = {};
const expected_clean = {
parsed_text: {
number: 'Number Value',
street: 'Street Value Number Value'
}
};
const messages = sanitizer().sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('text_analyzer identifying postalcode but not house number should assign to number and remove from address', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.equals(query, 'Number Value Street Value Number Value');
return {
postalcode: 'Number Value'
};
}
}});
const raw = {
address: 'Number Value Street Value Number Value'
};
const clean = {};
const expected_clean = {
parsed_text: {
number: 'Number Value',
street: 'Street Value Number Value'
}
};
const messages = sanitizer().sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('text_analyzer not revealing possible number should move address to street', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.equals(query, 'Street Value');
return {};
}
}});
const raw = {
address: 'Street Value'
};
const clean = {};
const expected_clean = {
parsed_text: {
street: 'Street Value'
}
};
const messages = sanitizer().sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('text_analyzer returning undefined on address resolution should treat as if no house number field was found', t => {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.equals(query, 'Street Value');
return undefined;
}
}});
const raw = {
address: 'Street Value'
};
const clean = {};
const expected_clean = {
parsed_text: {
street: 'Street Value'
}
};
const messages = sanitizer().sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('return an array of expected parameters in object form for validation', function (t) { test('return an array of expected parameters in object form for validation', function (t) {
const sanitizer = require('../../../sanitizer/_synthesize_analysis'); const sanitizer = require('../../../sanitizer/_synthesize_analysis');
const expected = [ const expected = [

15
test/unit/schema.js

@ -28,6 +28,9 @@ module.exports.tests.completely_valid = (test, common) => {
}, },
interpolation: { interpolation: {
url: 'http://localhost' url: 'http://localhost'
},
libpostal: {
url: 'http://localhost'
} }
}, },
defaultParameters: { defaultParameters: {
@ -419,7 +422,7 @@ module.exports.tests.api_validation = (test, common) => {
}); });
// api.pipService has been moved to api.services.pip.url // api.pipService has been moved to api.services.pip.url
test('any api.pipService value should be allowed', (t) => { test('any api.pipService value should fail', (t) => {
[null, 17, {}, [], true, 'http://localhost'].forEach((value) => { [null, 17, {}, [], true, 'http://localhost'].forEach((value) => {
var config = { var config = {
api: { api: {
@ -543,7 +546,7 @@ module.exports.tests.api_services_validation = (test, common) => {
module.exports.tests.service_validation = (test, common) => { module.exports.tests.service_validation = (test, common) => {
// these tests apply for all the individual service definitions // these tests apply for all the individual service definitions
const services = ['pip', 'placeholder', 'interpolation']; const services = ['pip', 'placeholder', 'interpolation', 'libpostal'];
test('timeout and retries not specified should default to 250 and 3', (t) => { test('timeout and retries not specified should default to 250 and 3', (t) => {
services.forEach(service => { services.forEach(service => {
@ -572,7 +575,7 @@ module.exports.tests.service_validation = (test, common) => {
}); });
test('when api.services.service is defined, url is required', (t) => { test('when api.services.<service> is defined, url is required', (t) => {
services.forEach(service => { services.forEach(service => {
const config = { const config = {
api: { api: {
@ -596,7 +599,7 @@ module.exports.tests.service_validation = (test, common) => {
}); });
test('non-string api.services.pip.url should throw error', (t) => { test('non-string api.services.<service>.url should throw error', (t) => {
services.forEach(service => { services.forEach(service => {
[null, 17, {}, [], true].forEach(value => { [null, 17, {}, [], true].forEach(value => {
const config = { const config = {
@ -626,7 +629,7 @@ module.exports.tests.service_validation = (test, common) => {
}); });
test('non-http/https api.services.pip.url should throw error', (t) => { test('non-http/https api.services.<service>.url should throw error', (t) => {
services.forEach(service => { services.forEach(service => {
['ftp', 'git', 'unknown'].forEach((scheme) => { ['ftp', 'git', 'unknown'].forEach((scheme) => {
const config = { const config = {
@ -656,7 +659,7 @@ module.exports.tests.service_validation = (test, common) => {
}); });
test('non-url children of api.services.pip should be disallowed', (t) => { test('non-url/timeout/retries children of api.services.<service> should be disallowed', (t) => {
services.forEach(service => { services.forEach(service => {
const config = { const config = {
api: { api: {

99
test/unit/service/configurations/Libpostal.js

@ -0,0 +1,99 @@
const Libpostal = require('../../../../service/configurations/Libpostal');
module.exports.tests = {};
module.exports.tests.all = (test, common) => {
test('getName should return \'libpostal\'', (t) => {
const configBlob = {
url: 'http://localhost:1234',
timeout: 17,
retries: 19
};
const libpostal = new Libpostal(configBlob);
t.equals(libpostal.getName(), 'libpostal');
t.equals(libpostal.getBaseUrl(), 'http://localhost:1234/');
t.equals(libpostal.getTimeout(), 17);
t.equals(libpostal.getRetries(), 19);
t.end();
});
test('getUrl should return value passed to constructor', (t) => {
const configBlob = {
url: 'http://localhost:1234',
timeout: 17,
retries: 19
};
const libpostal = new Libpostal(configBlob);
t.equals(libpostal.getUrl(), 'http://localhost:1234/parse');
t.end();
});
test('getParameters should return object with text and lang from req', (t) => {
const configBlob = {
url: 'http://localhost:1234',
timeout: 17,
retries: 19
};
const propertyExtractor = (req) => {
t.deepEquals(req, { a: 1, b: 2});
return 'property value';
};
const libpostal = new Libpostal(configBlob, propertyExtractor);
const req = {
a: 1,
b: 2
};
t.deepEquals(libpostal.getParameters(req), { address: 'property value' });
t.end();
});
test('getHeaders should return empty object', (t) => {
const configBlob = {
url: 'base url',
timeout: 17,
retries: 19
};
const libpostal = new Libpostal(configBlob);
t.deepEquals(libpostal.getHeaders(), {});
t.end();
});
test('baseUrl ending in / should not have double /\'s return by getUrl', (t) => {
const configBlob = {
url: 'http://localhost:1234/',
timeout: 17,
retries: 19
};
const libpostal = new Libpostal(configBlob);
t.deepEquals(libpostal.getUrl(), 'http://localhost:1234/parse');
t.end();
});
};
module.exports.all = (tape, common) => {
function test(name, testFunction) {
return tape(`SERVICE CONFIGURATION /Libpostal ${name}`, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};
Loading…
Cancel
Save