diff --git a/.npmrc b/.npmrc new file mode 100644 index 00000000..43c97e71 --- /dev/null +++ b/.npmrc @@ -0,0 +1 @@ +package-lock=false diff --git a/Dockerfile b/Dockerfile index 041f00dc..e28228c3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # base image -FROM pelias/libpostal_baseimage +FROM pelias/baseimage # maintainer information LABEL maintainer="pelias@mapzen.com" diff --git a/README.md b/README.md index 4253d15e..fd42a7f6 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,6 @@ The API recognizes the following properties under the top-level `api` key in you |parameter|required|default|description| |---|---|---|---| |`host`|*yes*||specifies the url under which the http service is to run| -|`textAnalyzer`|*no*|*addressit*|can be either `libpostal` or `addressit` however will soon be **deprecated** and only `libpostal` will be supported going forward| |`indexName`|*no*|*pelias*|name of the Elasticsearch index to be used when building queries| |`relativeScores`|*no*|true|if set to true, confidence scores will be normalized, realistically at this point setting this to false is not tested or desirable |`accessLog`|*no*||name of the format to use for access logs; may be any one of the [predefined values](https://github.com/expressjs/morgan#predefined-formats) in the `morgan` package. Defaults to `"common"`; if set to `false`, or an otherwise falsy value, disables access-logging entirely.| @@ -66,7 +65,6 @@ Example configuration file would look something like this: "host": "localhost:3100/v1/", "indexName": "foobar", "relativeScores": true, - "textAnalyzer": "libpostal", "services": { "pip": { "url": "http://mypipservice.com:3000" diff --git a/controller/libpostal.js b/controller/libpostal.js index 23c82f85..7ecbe3b6 100644 --- a/controller/libpostal.js +++ b/controller/libpostal.js @@ -1,31 +1,108 @@ -const text_analyzer = require('pelias-text-analyzer'); const _ = require('lodash'); const iso3166 = require('iso3166-1'); const Debug = require('../helper/debug'); const debugLog = new Debug('controller:libpostal'); +const logger = require('pelias-logger').get('api'); -function setup(should_execute) { +// mapping object from libpostal fields to pelias fields +var field_mapping = { + island: 'island', + category: 'category', + house: 'query', + house_number: 'number', + road: 'street', + suburb: 'neighbourhood', + city_district: 'borough', + city: 'city', + state_district: 'county', + state: 'state', + postcode: 'postalcode', + country: 'country' +}; + +// This controller calls the hosted libpostal service and converts the response +// to a generic format for later use. The hosted service returns an array like: +// +// ``` +// [ +// { +// label: 'house_number', +// value: '30' +// }, +// { +// label: 'road', +// value: 'west 26th street' +// }, +// { +// label: 'city', +// value: 'new york' +// }, +// { +// label: 'state', +// value: 'ny' +// } +//] +// ``` +// +// where `label` can be any of (currently): +// - house (generally interpreted as unknown, treated by pelias like a query term) +// - category (like "restaurants") +// - house_number +// - road +// - unit (apt or suite #) +// - suburb (like a neighbourhood) +// - city +// - city_district (like an NYC borough) +// - state_district (like a county) +// - state +// - postcode +// - country +// +// The Pelias query module is not concerned with unit. +// +function setup(libpostalService, should_execute) { function controller( req, res, next ){ // bail early if req/res don't pass conditions for execution if (!should_execute(req, res)) { return next(); } + const initialTime = debugLog.beginTimer(req); - // parse text with query parser - const parsed_text = text_analyzer.parse(req.clean.text); - if (parsed_text !== undefined) { - // if a known ISO2 country was parsed, convert it to ISO3 - if (_.has(parsed_text, 'country') && iso3166.is2(_.toUpper(parsed_text.country))) { - parsed_text.country = iso3166.to3(_.toUpper(parsed_text.country)); + libpostalService(req, (err, response) => { + if (err) { + // push err.message or err onto req.errors + req.errors.push( _.get(err, 'message', err) ); + + } else if (_.some(_.countBy(response, o => o.label), count => count > 1)) { + logger.warn(`discarding libpostal parse of '${req.clean.text}' due to duplicate field assignments`); + return next(); + + } else if (_.isEmpty(response)) { + return next(); + + } else { + req.clean.parser = 'libpostal'; + req.clean.parsed_text = response.reduce(function(o, f) { + if (field_mapping.hasOwnProperty(f.label)) { + o[field_mapping[f.label]] = f.value; + } + + return o; + }, {}); + + if (_.has(req.clean.parsed_text, 'country') && iso3166.is2(_.toUpper(req.clean.parsed_text.country))) { + req.clean.parsed_text.country = iso3166.to3(_.toUpper(req.clean.parsed_text.country)); + } + + debugLog.push(req, {parsed_text: req.clean.parsed_text}); + } - req.clean.parser = 'libpostal'; - req.clean.parsed_text = parsed_text; - debugLog.push(req, {parsed_text: req.clean.parsed_text}); - } - debugLog.stopTimer(req, initialTime); - return next(); + debugLog.stopTimer(req, initialTime); + return next(); + + }); } diff --git a/controller/structured_libpostal.js b/controller/structured_libpostal.js new file mode 100644 index 00000000..626bf945 --- /dev/null +++ b/controller/structured_libpostal.js @@ -0,0 +1,75 @@ +const _ = require('lodash'); +const Debug = require('../helper/debug'); +const debugLog = new Debug('controller:libpostal'); +const logger = require('pelias-logger').get('api'); + +// if there's a house_number in the libpostal response, return it +// otherwise return the postcode field (which may be undefined) +function findHouseNumberField(response) { + const house_number_field = response.find(f => f.label === 'house_number'); + + if (house_number_field) { + return house_number_field; + } + + return response.find(f => f.label === 'postcode'); + +} + +function setup(libpostalService, should_execute) { + function controller( req, res, next ){ + // bail early if req/res don't pass conditions for execution + if (!should_execute(req, res)) { + return next(); + } + + const initialTime = debugLog.beginTimer(req); + + libpostalService(req, (err, response) => { + if (err) { + // push err.message or err onto req.errors + req.errors.push( _.get(err, 'message', err) ); + + } else { + // figure out which field contains the probable house number, prefer house_number + // libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street + // so because we're treating the entire field as a street address, it's safe + // to assume that an identified postcode is actually a house number. + const house_number_field = findHouseNumberField(response); + + // if we're fairly certain that libpostal identified a house number + // (from either the house_number or postcode field), place it into the + // number field and remove the first instance of that value from address + // and assign to street + // eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St + if (house_number_field) { + req.clean.parsed_text.number = house_number_field.value; + + // remove the first instance of the number and trim whitespace + req.clean.parsed_text.street = _.trim(_.replace(req.clean.parsed_text.address, req.clean.parsed_text.number, '')); + + } else { + // otherwise no house number was identifiable, so treat the entire input + // as a street + req.clean.parsed_text.street = req.clean.parsed_text.address; + + } + + // the address field no longer means anything since it's been parsed, so remove it + delete req.clean.parsed_text.address; + + debugLog.push(req, {parsed_text: response}); + + } + + debugLog.stopTimer(req, initialTime); + return next(); + + }); + + } + + return controller; +} + +module.exports = setup; diff --git a/package.json b/package.json index 80243ad1..d32e37cc 100644 --- a/package.json +++ b/package.json @@ -62,7 +62,6 @@ "pelias-model": "5.2.0", "pelias-query": "9.1.1", "pelias-sorting": "1.1.0", - "pelias-text-analyzer": "1.10.2", "predicates": "^2.0.0", "retry": "^0.10.1", "stats-lite": "^2.0.4", diff --git a/routes/v1.js b/routes/v1.js index d300d20e..fb3539a8 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -29,6 +29,7 @@ var controllers = { coarse_reverse: require('../controller/coarse_reverse'), mdToHTML: require('../controller/markdownToHtml'), libpostal: require('../controller/libpostal'), + structured_libpostal: require('../controller/structured_libpostal'), place: require('../controller/place'), placeholder: require('../controller/placeholder'), search: require('../controller/search'), @@ -96,6 +97,7 @@ const PlaceHolder = require('../service/configurations/PlaceHolder'); const PointInPolygon = require('../service/configurations/PointInPolygon'); const Language = require('../service/configurations/Language'); const Interpolation = require('../service/configurations/Interpolation'); +const Libpostal = require('../service/configurations/Libpostal'); /** * Append routes to app @@ -122,6 +124,18 @@ function addRoutes(app, peliasConfig) { const interpolationService = serviceWrapper(interpolationConfiguration); const isInterpolationEnabled = _.constant(interpolationConfiguration.isEnabled()); + // standard libpostal should use req.clean.text for the `address` parameter + const libpostalConfiguration = new Libpostal( + _.defaultTo(peliasConfig.api.services.libpostal, {}), + _.property('clean.text')); + const libpostalService = serviceWrapper(libpostalConfiguration); + + // structured libpostal should use req.clean.parsed_text.address for the `address` parameter + const structuredLibpostalConfiguration = new Libpostal( + _.defaultTo(peliasConfig.api.services.libpostal, {}), + _.property('clean.parsed_text.address')); + const structuredLibpostalService = serviceWrapper(structuredLibpostalConfiguration); + // fallback to coarse reverse when regular reverse didn't return anything const coarseReverseShouldExecute = all( isPipServiceEnabled, not(hasRequestErrors), not(hasResponseData) @@ -132,6 +146,12 @@ function addRoutes(app, peliasConfig) { not(isRequestSourcesOnlyWhosOnFirst) ); + // for libpostal to execute for structured requests, req.clean.parsed_text.address must exist + const structuredLibpostalShouldExecute = all( + not(hasRequestErrors), + hasParsedTextProperties.all('address') + ); + // execute placeholder if libpostal only parsed as admin-only and needs to // be geodisambiguated const placeholderGeodisambiguationShouldExecute = all( @@ -256,7 +276,7 @@ function addRoutes(app, peliasConfig) { sanitizers.search.middleware(peliasConfig.api), middleware.requestLanguage, middleware.calcSize(), - controllers.libpostal(libpostalShouldExecute), + controllers.libpostal(libpostalService, libpostalShouldExecute), controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute), controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute), controllers.search_with_ids(peliasConfig.api, esclient, queries.address_using_ids, searchWithIdsShouldExecute), @@ -286,6 +306,7 @@ function addRoutes(app, peliasConfig) { sanitizers.structured_geocoding.middleware(peliasConfig.api), middleware.requestLanguage, middleware.calcSize(), + controllers.structured_libpostal(structuredLibpostalService, structuredLibpostalShouldExecute), controllers.search(peliasConfig.api, esclient, queries.structured_geocoding, not(hasResponseDataOrRequestErrors)), postProc.trimByGranularityStructured(), postProc.distances('focus.point.'), diff --git a/sanitizer/_synthesize_analysis.js b/sanitizer/_synthesize_analysis.js index 3822d9d4..da67c752 100644 --- a/sanitizer/_synthesize_analysis.js +++ b/sanitizer/_synthesize_analysis.js @@ -1,5 +1,4 @@ const _ = require('lodash'); -const text_analyzer = require('pelias-text-analyzer'); const fields = { 'venue': 'query', @@ -17,20 +16,6 @@ function normalizeWhitespaceToSingleSpace(val) { return _.replace(_.trim(val), /\s+/g, ' '); } -function isPostalCodeOnly(parsed_text) { - return Object.keys(parsed_text).length === 1 && - parsed_text.hasOwnProperty('postalcode'); -} - -// figure out which field contains the probable house number, prefer number -// libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street -// so because we're treating the entire field as a street address, it's safe -// to assume that an identified postcode is actually a house number. -function getHouseNumberField(analyzed_address) { - // return the first field available in the libpostal response, undefined if none - return _.find(['number', 'postalcode'], _.partial(_.has, analyzed_address)); -} - function _sanitize( raw, clean ){ // error & warning messages @@ -51,35 +36,8 @@ function _sanitize( raw, clean ){ `at least one of the following fields is required: ${Object.keys(fields).join(', ')}`); } - if (clean.parsed_text.hasOwnProperty('address')) { - const analyzed_address = text_analyzer.parse(clean.parsed_text.address); - - const house_number_field = getHouseNumberField(analyzed_address); - - // if we're fairly certain that libpostal identified a house number - // (from either the house_number or postcode field), place it into the - // number field and remove the first instance of that value from address - // and assign to street - // eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St - if (house_number_field) { - clean.parsed_text.number = analyzed_address[house_number_field]; - - // remove the first instance of the number and trim whitespace - clean.parsed_text.street = _.trim(_.replace(clean.parsed_text.address, clean.parsed_text.number, '')); - - } else { - // otherwise no house number was identifiable, so treat the entire input - // as a street - clean.parsed_text.street = clean.parsed_text.address; - - } - - // the address field no longer means anything since it's been parsed, so remove it - delete clean.parsed_text.address; - - } - return messages; + } function _expected() { diff --git a/schema.js b/schema.js index 3203bb7d..440da818 100644 --- a/schema.js +++ b/schema.js @@ -41,6 +41,11 @@ module.exports = Joi.object().keys({ url: Joi.string().uri({ scheme: /https?/ }), timeout: Joi.number().integer().optional().default(250).min(0), retries: Joi.number().integer().optional().default(3).min(0), + }).unknown(false).requiredKeys('url'), + libpostal: Joi.object().keys({ + url: Joi.string().uri({ scheme: /https?/ }), + timeout: Joi.number().integer().optional().default(250).min(0), + retries: Joi.number().integer().optional().default(3).min(0), }).unknown(false).requiredKeys('url') }).unknown(false).default({}), // default api.services to an empty object defaultParameters: Joi.object().keys({ diff --git a/service/configurations/Libpostal.js b/service/configurations/Libpostal.js new file mode 100644 index 00000000..9cf1de2b --- /dev/null +++ b/service/configurations/Libpostal.js @@ -0,0 +1,33 @@ +'use strict'; + +const url = require('url'); + +const ServiceConfiguration = require('pelias-microservice-wrapper').ServiceConfiguration; + +class Libpostal extends ServiceConfiguration { + constructor(o, propertyExtractor) { + super('libpostal', o); + + // save off the propertyExtractor function + // this is used to extract a single property from req. eg: + // * _.property('clean.text') + // * _.property('clean.parsed_text.address') + // will return those properties from req + this.propertyExtractor = propertyExtractor; + + } + + getParameters(req) { + return { + address: this.propertyExtractor(req) + }; + + } + + getUrl(req) { + return url.resolve(this.baseUrl, 'parse'); + } + +} + +module.exports = Libpostal; diff --git a/test/unit/controller/libpostal.js b/test/unit/controller/libpostal.js index f117007d..cf62fff5 100644 --- a/test/unit/controller/libpostal.js +++ b/test/unit/controller/libpostal.js @@ -1,281 +1,304 @@ 'use strict'; const proxyquire = require('proxyquire').noCallThru(); +const libpostal = require('../../../controller/libpostal'); const _ = require('lodash'); +const mock_logger = require('pelias-mock-logger'); module.exports.tests = {}; module.exports.tests.interface = (test, common) => { - test('valid interface', t => { - const controller = proxyquire('../../../controller/libpostal', { - 'pelias-text-analyzer': { - parse: () => undefined - } - }); - - t.equal(typeof controller, 'function', 'libpostal is a function'); - t.equal(typeof controller(), 'function', 'libpostal returns a controller'); + test('valid interface', (t) => { + t.equal(typeof libpostal, 'function', 'libpostal is a function'); + t.equal(typeof libpostal(), 'function', 'libpostal returns a controller'); t.end(); - }); - }; -module.exports.tests.should_execute = (test, common) => { - test('should_execute returning false should not call text-analyzer', t => { - const should_execute = (req, res) => { +module.exports.tests.early_exit_conditions = (test, common) => { + test('should_execute returning false should not call service', t => { + const service = () => { + t.fail('service should not have been called'); + }; + + const should_execute = (req) => { // req and res should be passed to should_execute t.deepEquals(req, { clean: { text: 'original query' } }); - t.deepEquals(res, { b: 2 }); + return false; }; - const controller = proxyquire('../../../controller/libpostal', { - 'pelias-text-analyzer': { - parse: () => { - t.fail('parse should not have been called'); - } - } - })(should_execute); + const controller = libpostal(service, should_execute); const req = { clean: { text: 'original query' } }; - const res = { b: 2 }; - controller(req, res, () => { + controller(req, undefined, () => { t.deepEquals(req, { clean: { text: 'original query' } }, 'req should not have been modified'); - t.deepEquals(res, { b: 2 }); + t.end(); }); }); - test('should_execute returning false should not call text-analyzer', t => { - t.plan(5); +}; - const should_execute = (req, res) => { - // req and res should be passed to should_execute - t.deepEquals(req, { - clean: { - text: 'original query' - } - }); - t.deepEquals(res, { b: 2 }); - return true; +module.exports.tests.error_conditions = (test, common) => { + test('service returning error should append and not modify req.clean', t => { + const service = (req, callback) => { + callback('libpostal service error', []); }; - const controller = proxyquire('../../../controller/libpostal', { - 'pelias-text-analyzer': { - parse: (query) => { - t.equals(query, 'original query'); - return undefined; - } - } - })(should_execute); + const controller = libpostal(service, () => true); const req = { clean: { text: 'original query' - } + }, + errors: [] }; - const res = { b: 2 }; - controller(req, res, () => { + controller(req, undefined, () => { t.deepEquals(req, { clean: { text: 'original query' - } + }, + errors: ['libpostal service error'] }, 'req should not have been modified'); - t.deepEquals(res, { b: 2 }); + t.end(); + }); }); }; -module.exports.tests.parse_is_called = (test, common) => { - test('parse returning undefined should not overwrite clean.parsed_text', t => { - const controller = proxyquire('../../../controller/libpostal', { - 'pelias-text-analyzer': { - parse: () => undefined - } - })(() => true); - - const req = { - clean: { - parsed_text: 'original parsed_text' - } - }; - const res = 'this is the response'; +module.exports.tests.failure_conditions = (test, common) => { + test('service returning 2 or more of a label should return undefined and log message', t => { + const logger = mock_logger(); - controller(req, res, () => { - t.deepEquals(req, { - clean: { - parsed_text: 'original parsed_text' + const service = (req, callback) => { + const response = [ + { + label: 'road', + value: 'road value 1' + }, + { + label: 'city', + value: 'city value' + }, + { + label: 'road', + value: 'road value 2' } - }); - t.deepEquals(res, 'this is the response'); - t.end(); - }); + ]; - }); + callback(null, response); + }; - test('parse returning something should overwrite clean.parsed_text', t => { const controller = proxyquire('../../../controller/libpostal', { - 'pelias-text-analyzer': { - parse: () => 'replacement parsed_text' - } - })(() => true); + 'pelias-logger': logger + })(service, () => true); const req = { clean: { - parsed_text: 'original parsed_text' - } + text: 'query value' + }, + errors: [] }; - const res = 'this is the response'; - controller(req, res, () => { + controller(req, undefined, () => { + t.ok(logger.isWarnMessage('discarding libpostal parse of \'query value\' due to duplicate field assignments')); + t.deepEquals(req, { clean: { - parser: 'libpostal', - parsed_text: 'replacement parsed_text' - } - }); - t.deepEquals(res, 'this is the response'); + text: 'query value' + }, + errors: [] + }, 'req should not have been modified'); + t.end(); + }); }); -}; + test('service returning empty array should not set parsed_text or parser', t => { + const logger = mock_logger(); + + const service = (req, callback) => { + callback(null, []); + }; -module.exports.tests.iso2_conversion = (test, common) => { - test('no country in parse response should not leave country unset', t => { const controller = proxyquire('../../../controller/libpostal', { - 'pelias-text-analyzer': { - parse: () => ({ - locality: 'this is the locality' - }) - }, - 'iso3166-1': { - is2: () => t.fail('should not have been called'), - to3: () => t.fail('should not have been called') - } - })(() => true); + 'pelias-logger': logger + })(service, () => true); const req = { clean: { - parsed_text: 'original parsed_text' - } + text: 'query value' + }, + errors: [] }; - const res = 'this is the response'; - controller(req, res, () => { + controller(req, undefined, () => { t.deepEquals(req, { clean: { - parser: 'libpostal', - parsed_text: { - locality: 'this is the locality' - } - } - }); - t.deepEquals(res, 'this is the response'); + text: 'query value' + }, + errors: [] + }, 'req should not have been modified'); + t.end(); + }); }); - test('unknown country should not be converted', t => { - t.plan(3); +}; - const controller = proxyquire('../../../controller/libpostal', { - 'pelias-text-analyzer': { - parse: () => ({ - country: 'unknown country code' - }) - }, - 'iso3166-1': { - is2: country => { - t.equals(country, 'UNKNOWN COUNTRY CODE'); - return false; +module.exports.tests.success_conditions = (test, common) => { + test('service returning valid response should convert and append', t => { + const service = (req, callback) => { + const response = [ + { + label: 'island', + value: 'island value' }, - to3: () => t.fail('should not have been called') - } - })(() => true); + { + label: 'category', + value: 'category value' + }, + { + label: 'house', + value: 'house value' + }, + { + label: 'house_number', + value: 'house_number value' + }, + { + label: 'road', + value: 'road value' + }, + { + label: 'suburb', + value: 'suburb value' + }, + { + label: 'city_district', + value: 'city_district value' + }, + { + label: 'city', + value: 'city value' + }, + { + label: 'state_district', + value: 'state_district value' + }, + { + label: 'state', + value: 'state value' + }, + { + label: 'postcode', + value: 'postcode value' + }, + { + label: 'country', + value: 'country value' + } + ]; + + callback(null, response); + }; + + const controller = libpostal(service, () => true); const req = { clean: { - parsed_text: 'original parsed_text' - } + text: 'original query' + }, + errors: [] }; - const res = 'this is the response'; - controller(req, res, () => { + controller(req, undefined, () => { t.deepEquals(req, { clean: { + text: 'original query', parser: 'libpostal', parsed_text: { - country: 'unknown country code' + island: 'island value', + category: 'category value', + query: 'house value', + number: 'house_number value', + street: 'road value', + neighbourhood: 'suburb value', + borough: 'city_district value', + city: 'city value', + county: 'state_district value', + state: 'state value', + postalcode: 'postcode value', + country: 'country value' } - } - }); - t.deepEquals(res, 'this is the response'); + }, + errors: [] + }, 'req should not have been modified'); + t.end(); + }); }); - test('ISO2 country should be converted to ISO3', t => { - t.plan(4); - - const controller = proxyquire('../../../controller/libpostal', { - 'pelias-text-analyzer': { - parse: () => ({ - country: 'ISO2 COUNTRY CODE' - }) - }, - 'iso3166-1': { - is2: country => { - t.equals(country, 'ISO2 COUNTRY CODE'); - return true; - }, - to3: country => { - t.equals(country, 'ISO2 COUNTRY CODE'); - return 'ISO3 COUNTRY CODE'; + test('ISO-2 country should be converted to ISO-3', t => { + const service = (req, callback) => { + const response = [ + { + label: 'country', + value: 'ca' } - } - })(() => true); + ]; + + callback(null, response); + }; + + const controller = libpostal(service, () => true); const req = { clean: { - parsed_text: 'original parsed_text' - } + text: 'original query' + }, + errors: [] }; - const res = 'this is the response'; - controller(req, res, () => { + controller(req, undefined, () => { t.deepEquals(req, { clean: { + text: 'original query', parser: 'libpostal', parsed_text: { - country: 'ISO3 COUNTRY CODE' + country: 'CAN' } - } - }); - t.deepEquals(res, 'this is the response'); + }, + errors: [] + }, 'req should not have been modified'); + t.end(); + }); }); diff --git a/test/unit/controller/structured_libpostal.js b/test/unit/controller/structured_libpostal.js new file mode 100644 index 00000000..eb4aea12 --- /dev/null +++ b/test/unit/controller/structured_libpostal.js @@ -0,0 +1,440 @@ +'use strict'; + +const proxyquire = require('proxyquire').noCallThru(); +const libpostal = require('../../../controller/structured_libpostal'); +const _ = require('lodash'); +const mock_logger = require('pelias-mock-logger'); + +module.exports.tests = {}; + +module.exports.tests.interface = (test, common) => { + test('valid interface', (t) => { + t.equal(typeof libpostal, 'function', 'libpostal is a function'); + t.equal(typeof libpostal(), 'function', 'libpostal returns a controller'); + t.end(); + }); +}; + +module.exports.tests.early_exit_conditions = (test, common) => { + test('should_execute returning false should not call service', t => { + const service = () => { + t.fail('service should not have been called'); + }; + + const should_execute = (req) => { + // req and res should be passed to should_execute + t.deepEquals(req, { + clean: { + text: 'original query' + } + }); + + return false; + }; + + const controller = libpostal(service, should_execute); + + const req = { + clean: { + text: 'original query' + } + }; + + controller(req, undefined, () => { + t.deepEquals(req, { + clean: { + text: 'original query' + } + }, 'req should not have been modified'); + + t.end(); + }); + + }); + +}; + +module.exports.tests.error_conditions = (test, common) => { + test('service returning error should append and not modify req.clean', t => { + const service = (req, callback) => { + callback('libpostal service error', []); + }; + + const controller = libpostal(service, () => true); + + const req = { + clean: { + text: 'original query' + }, + errors: [] + }; + + controller(req, undefined, () => { + t.deepEquals(req, { + clean: { + text: 'original query' + }, + errors: ['libpostal service error'] + }, 'req should not have been modified'); + + t.end(); + + }); + + }); + +}; + +// module.exports.tests.failure_conditions = (test, common) => { +// test('service returning 2 or more of a label should return undefined and log message', t => { +// const logger = mock_logger(); +// +// const service = (req, callback) => { +// const response = [ +// { +// label: 'road', +// value: 'road value 1' +// }, +// { +// label: 'city', +// value: 'city value' +// }, +// { +// label: 'road', +// value: 'road value 2' +// } +// ]; +// +// callback(null, response); +// }; +// +// const controller = proxyquire('../../../controller/libpostal', { +// 'pelias-logger': logger +// })(service, () => true); +// +// const req = { +// clean: { +// text: 'query value' +// }, +// errors: [] +// }; +// +// controller(req, undefined, () => { +// t.ok(logger.isWarnMessage('discarding libpostal parse of \'query value\' due to duplicate field assignments')); +// +// t.deepEquals(req, { +// clean: { +// text: 'query value' +// }, +// errors: [] +// }, 'req should not have been modified'); +// +// t.end(); +// +// }); +// +// }); +// +// test('service returning empty array should not set parsed_text or parser', t => { +// const logger = mock_logger(); +// +// const service = (req, callback) => { +// callback(null, []); +// }; +// +// const controller = proxyquire('../../../controller/libpostal', { +// 'pelias-logger': logger +// })(service, () => true); +// +// const req = { +// clean: { +// text: 'query value' +// }, +// errors: [] +// }; +// +// controller(req, undefined, () => { +// t.deepEquals(req, { +// clean: { +// text: 'query value' +// }, +// errors: [] +// }, 'req should not have been modified'); +// +// t.end(); +// +// }); +// +// }); +// +// }; +// +module.exports.tests.success_conditions = (test, common) => { + test('service returning house_number should set req.clean.parsed_text.', t => { + const service = (req, callback) => { + const response = [ + { + label: 'house_number', + value: 'house_number value' + }, + { + label: 'postcode', + value: 'postcode value' + } + ]; + + callback(null, response); + }; + + const controller = libpostal(service, () => true); + + const req = { + clean: { + parsed_text: { + address: 'other value house_number value street value' + } + }, + errors: [] + }; + + controller(req, undefined, () => { + t.deepEquals(req, { + clean: { + parsed_text: { + number: 'house_number value', + street: 'other value street value' + } + }, + errors: [] + }, 'req should not have been modified'); + + t.end(); + + }); + + }); + + test('service returning postcode should set req.clean.parsed_text.', t => { + const service = (req, callback) => { + const response = [ + { + label: 'postcode', + value: 'postcode value' + } + ]; + + callback(null, response); + }; + + const controller = libpostal(service, () => true); + + const req = { + clean: { + parsed_text: { + address: 'other value postcode value street value' + } + }, + errors: [] + }; + + controller(req, undefined, () => { + t.deepEquals(req, { + clean: { + parsed_text: { + number: 'postcode value', + street: 'other value street value' + } + }, + errors: [] + }, 'req should not have been modified'); + + t.end(); + + }); + + }); + + test('service returning neither house_number nor postcode should not set req.clean.parsed_text.number', t => { + const service = (req, callback) => { + const response = [ + { + label: 'city', + value: 'city value' + } + ]; + + callback(null, response); + }; + + const controller = libpostal(service, () => true); + + const req = { + clean: { + parsed_text: { + address: 'street value' + } + }, + errors: [] + }; + + controller(req, undefined, () => { + t.deepEquals(req, { + clean: { + parsed_text: { + street: 'street value' + } + }, + errors: [] + }, 'req should not have been modified'); + + t.end(); + + }); + + }); + + // test('service returning valid response should convert and append', t => { + // const service = (req, callback) => { + // const response = [ + // { + // label: 'island', + // value: 'island value' + // }, + // { + // label: 'category', + // value: 'category value' + // }, + // { + // label: 'house', + // value: 'house value' + // }, + // { + // label: 'house_number', + // value: 'house_number value' + // }, + // { + // label: 'road', + // value: 'road value' + // }, + // { + // label: 'suburb', + // value: 'suburb value' + // }, + // { + // label: 'city_district', + // value: 'city_district value' + // }, + // { + // label: 'city', + // value: 'city value' + // }, + // { + // label: 'state_district', + // value: 'state_district value' + // }, + // { + // label: 'state', + // value: 'state value' + // }, + // { + // label: 'postcode', + // value: 'postcode value' + // }, + // { + // label: 'country', + // value: 'country value' + // } + // ]; + // + // callback(null, response); + // }; + // + // const controller = libpostal(service, () => true); + // + // const req = { + // clean: { + // text: 'original query' + // }, + // errors: [] + // }; + // + // controller(req, undefined, () => { + // t.deepEquals(req, { + // clean: { + // text: 'original query', + // parser: 'libpostal', + // parsed_text: { + // island: 'island value', + // category: 'category value', + // query: 'house value', + // number: 'house_number value', + // street: 'road value', + // neighbourhood: 'suburb value', + // borough: 'city_district value', + // city: 'city value', + // county: 'state_district value', + // state: 'state value', + // postalcode: 'postcode value', + // country: 'country value' + // } + // }, + // errors: [] + // }, 'req should not have been modified'); + // + // t.end(); + // + // }); + // + // }); + // + // test('ISO-2 country should be converted to ISO-3', t => { + // const service = (req, callback) => { + // const response = [ + // { + // label: 'country', + // value: 'ca' + // } + // ]; + // + // callback(null, response); + // }; + // + // const controller = libpostal(service, () => true); + // + // const req = { + // clean: { + // text: 'original query' + // }, + // errors: [] + // }; + // + // controller(req, undefined, () => { + // t.deepEquals(req, { + // clean: { + // text: 'original query', + // parser: 'libpostal', + // parsed_text: { + // country: 'CAN' + // } + // }, + // errors: [] + // }, 'req should not have been modified'); + // + // t.end(); + // + // }); + // + // }); + // +}; + +module.exports.all = (tape, common) => { + + function test(name, testFunction) { + return tape(`GET /libpostal ${name}`, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index 5631b9f3..ffebf9ea 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -14,6 +14,7 @@ var tests = [ require('./controller/coarse_reverse'), require('./controller/index'), require('./controller/libpostal'), + require('./controller/structured_libpostal'), require('./controller/place'), require('./controller/placeholder'), require('./controller/search'), @@ -100,6 +101,7 @@ var tests = [ require('./sanitizer/wrap'), require('./service/configurations/Interpolation'), require('./service/configurations/Language'), + require('./service/configurations/Libpostal'), require('./service/configurations/PlaceHolder'), require('./service/configurations/PointInPolygon'), require('./service/mget'), diff --git a/test/unit/sanitizer/_synthesize_analysis.js b/test/unit/sanitizer/_synthesize_analysis.js index 64243dc9..19ff57bd 100644 --- a/test/unit/sanitizer/_synthesize_analysis.js +++ b/test/unit/sanitizer/_synthesize_analysis.js @@ -1,18 +1,14 @@ const _ = require('lodash'); const proxyquire = require('proxyquire').noCallThru(); +const sanitizer = require('../../../sanitizer/_synthesize_analysis'); module.exports.tests = {}; module.exports.tests.text_parser = function(test, common) { test('all variables should be parsed', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.fail('parse should not have been called'); - } - }}); - const raw = { venue: ' \t venue \t value \t ', + address: ' \t address \t value \t ', neighbourhood: ' \t neighbourhood \t value \t ', borough: ' \t borough \t value \t ', locality: ' \t locality \t value \t ', @@ -27,6 +23,7 @@ module.exports.tests.text_parser = function(test, common) { const expected_clean = { parsed_text: { query: 'venue value', + address: 'address value', neighbourhood: 'neighbourhood value', borough: 'borough value', city: 'locality value', @@ -47,12 +44,6 @@ module.exports.tests.text_parser = function(test, common) { }); test('non-string and blank string values should be treated as not supplied', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.fail('parse should not have been called'); - } - }}); - // helper to return a random value that's considered invalid function getInvalidValue() { return _.sample([{}, [], false, '', ' \t ', 17, undefined]); @@ -87,12 +78,6 @@ module.exports.tests.text_parser = function(test, common) { }); test('no supplied fields should return error', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.fail('parse should not have been called'); - } - }}); - const raw = {}; const clean = {}; @@ -110,12 +95,6 @@ module.exports.tests.text_parser = function(test, common) { }); test('postalcode-only parsed_text should return error', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.fail('parse should not have been called'); - } - }}); - const raw = { postalcode: 'postalcode value' }; @@ -137,132 +116,6 @@ module.exports.tests.text_parser = function(test, common) { }); - test('text_analyzer identifying house number should extract it and street', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.equals(query, 'Number Value Street Value Number Value'); - - return { - number: 'Number Value' - }; - } - }}); - - const raw = { - address: 'Number Value Street Value Number Value' - }; - - const clean = {}; - - const expected_clean = { - parsed_text: { - number: 'Number Value', - street: 'Street Value Number Value' - } - }; - - const messages = sanitizer().sanitize(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('text_analyzer identifying postalcode but not house number should assign to number and remove from address', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.equals(query, 'Number Value Street Value Number Value'); - - return { - postalcode: 'Number Value' - }; - } - }}); - - const raw = { - address: 'Number Value Street Value Number Value' - }; - - const clean = {}; - - const expected_clean = { - parsed_text: { - number: 'Number Value', - street: 'Street Value Number Value' - } - }; - - const messages = sanitizer().sanitize(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('text_analyzer not revealing possible number should move address to street', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.equals(query, 'Street Value'); - - return {}; - } - }}); - - const raw = { - address: 'Street Value' - }; - - const clean = {}; - - const expected_clean = { - parsed_text: { - street: 'Street Value' - } - }; - - const messages = sanitizer().sanitize(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('text_analyzer returning undefined on address resolution should treat as if no house number field was found', t => { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.equals(query, 'Street Value'); - - return undefined; - } - }}); - - const raw = { - address: 'Street Value' - }; - - const clean = {}; - - const expected_clean = { - parsed_text: { - street: 'Street Value' - } - }; - - const messages = sanitizer().sanitize(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - test('return an array of expected parameters in object form for validation', function (t) { const sanitizer = require('../../../sanitizer/_synthesize_analysis'); const expected = [ diff --git a/test/unit/schema.js b/test/unit/schema.js index 7c62f109..1278ee59 100644 --- a/test/unit/schema.js +++ b/test/unit/schema.js @@ -28,6 +28,9 @@ module.exports.tests.completely_valid = (test, common) => { }, interpolation: { url: 'http://localhost' + }, + libpostal: { + url: 'http://localhost' } }, defaultParameters: { @@ -419,7 +422,7 @@ module.exports.tests.api_validation = (test, common) => { }); // api.pipService has been moved to api.services.pip.url - test('any api.pipService value should be allowed', (t) => { + test('any api.pipService value should fail', (t) => { [null, 17, {}, [], true, 'http://localhost'].forEach((value) => { var config = { api: { @@ -543,7 +546,7 @@ module.exports.tests.api_services_validation = (test, common) => { module.exports.tests.service_validation = (test, common) => { // these tests apply for all the individual service definitions - const services = ['pip', 'placeholder', 'interpolation']; + const services = ['pip', 'placeholder', 'interpolation', 'libpostal']; test('timeout and retries not specified should default to 250 and 3', (t) => { services.forEach(service => { @@ -572,7 +575,7 @@ module.exports.tests.service_validation = (test, common) => { }); - test('when api.services.service is defined, url is required', (t) => { + test('when api.services. is defined, url is required', (t) => { services.forEach(service => { const config = { api: { @@ -596,7 +599,7 @@ module.exports.tests.service_validation = (test, common) => { }); - test('non-string api.services.pip.url should throw error', (t) => { + test('non-string api.services..url should throw error', (t) => { services.forEach(service => { [null, 17, {}, [], true].forEach(value => { const config = { @@ -626,7 +629,7 @@ module.exports.tests.service_validation = (test, common) => { }); - test('non-http/https api.services.pip.url should throw error', (t) => { + test('non-http/https api.services..url should throw error', (t) => { services.forEach(service => { ['ftp', 'git', 'unknown'].forEach((scheme) => { const config = { @@ -656,7 +659,7 @@ module.exports.tests.service_validation = (test, common) => { }); - test('non-url children of api.services.pip should be disallowed', (t) => { + test('non-url/timeout/retries children of api.services. should be disallowed', (t) => { services.forEach(service => { const config = { api: { diff --git a/test/unit/service/configurations/Libpostal.js b/test/unit/service/configurations/Libpostal.js new file mode 100644 index 00000000..2dcbc78c --- /dev/null +++ b/test/unit/service/configurations/Libpostal.js @@ -0,0 +1,99 @@ +const Libpostal = require('../../../../service/configurations/Libpostal'); + +module.exports.tests = {}; + +module.exports.tests.all = (test, common) => { + test('getName should return \'libpostal\'', (t) => { + const configBlob = { + url: 'http://localhost:1234', + timeout: 17, + retries: 19 + }; + + const libpostal = new Libpostal(configBlob); + + t.equals(libpostal.getName(), 'libpostal'); + t.equals(libpostal.getBaseUrl(), 'http://localhost:1234/'); + t.equals(libpostal.getTimeout(), 17); + t.equals(libpostal.getRetries(), 19); + t.end(); + + }); + + test('getUrl should return value passed to constructor', (t) => { + const configBlob = { + url: 'http://localhost:1234', + timeout: 17, + retries: 19 + }; + + const libpostal = new Libpostal(configBlob); + + t.equals(libpostal.getUrl(), 'http://localhost:1234/parse'); + t.end(); + + }); + + test('getParameters should return object with text and lang from req', (t) => { + const configBlob = { + url: 'http://localhost:1234', + timeout: 17, + retries: 19 + }; + + const propertyExtractor = (req) => { + t.deepEquals(req, { a: 1, b: 2}); + return 'property value'; + }; + + const libpostal = new Libpostal(configBlob, propertyExtractor); + + const req = { + a: 1, + b: 2 + }; + + t.deepEquals(libpostal.getParameters(req), { address: 'property value' }); + t.end(); + + }); + + test('getHeaders should return empty object', (t) => { + const configBlob = { + url: 'base url', + timeout: 17, + retries: 19 + }; + + const libpostal = new Libpostal(configBlob); + + t.deepEquals(libpostal.getHeaders(), {}); + t.end(); + + }); + + test('baseUrl ending in / should not have double /\'s return by getUrl', (t) => { + const configBlob = { + url: 'http://localhost:1234/', + timeout: 17, + retries: 19 + }; + + const libpostal = new Libpostal(configBlob); + + t.deepEquals(libpostal.getUrl(), 'http://localhost:1234/parse'); + t.end(); + + }); + +}; + +module.exports.all = (tape, common) => { + function test(name, testFunction) { + return tape(`SERVICE CONFIGURATION /Libpostal ${name}`, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +};