const _ = require('lodash'); const iso3166 = require('../helper/iso3166'); const Debug = require('../helper/debug'); const debugLog = new Debug('controller:libpostal'); const logger = require('pelias-logger').get('api'); // mapping object from libpostal fields to pelias fields var field_mapping = { island: 'island', category: 'category', house: 'query', house_number: 'number', road: 'street', suburb: 'neighbourhood', city_district: 'borough', city: 'city', state_district: 'county', state: 'state', postcode: 'postalcode', country: 'country', unit: 'unit', }; // This controller calls the hosted libpostal service and converts the response // to a generic format for later use. The hosted service returns an array like: // // ``` // [ // { // label: 'house_number', // value: '30' // }, // { // label: 'road', // value: 'west 26th street' // }, // { // label: 'city', // value: 'new york' // }, // { // label: 'state', // value: 'ny' // } //] // ``` // // where `label` can be any of (currently): // - house (generally interpreted as unknown, treated by pelias like a query term) // - category (like "restaurants") // - house_number // - road // - unit (apt or suite #) // - suburb (like a neighbourhood) // - city // - city_district (like an NYC borough) // - state_district (like a county) // - state // - postcode // - country // // The Pelias query module is not concerned with unit. // function setup(libpostalService, should_execute) { function controller( req, res, next ){ // bail early if req/res don't pass conditions for execution if (!should_execute(req, res)) { return next(); } const initialTime = debugLog.beginTimer(req); libpostalService(req, (err, response) => { if (err) { // push err.message or err onto req.errors req.errors.push( _.get(err, 'message', err) ); } else if (_.some(_.countBy(response, o => o.label), count => count > 1)) { logger.warn(`discarding libpostal parse of '${req.clean.text}' due to duplicate field assignments`); return next(); } else if (_.isEmpty(response)) { return next(); } else { // apply fixes for known bugs in libpostal response = patchBuggyResponses(response); req.clean.parser = 'libpostal'; req.clean.parsed_text = response.reduce(function(o, f) { if (field_mapping.hasOwnProperty(f.label)) { o[field_mapping[f.label]] = f.value; } return o; }, {}); if (_.has(req.clean.parsed_text, 'country') && iso3166.isISO2Code(req.clean.parsed_text.country)) { req.clean.parsed_text.country = iso3166.convertISO2ToISO3(req.clean.parsed_text.country); } debugLog.push(req, {parsed_text: req.clean.parsed_text}); } debugLog.stopTimer(req, initialTime); return next(); }); } return controller; } const RECAST_LABELS = [{ value: 'zoo', label: { to: 'house' } }]; const DIAGONAL_DIRECTIONALS = ['ne','nw','se','sw']; // apply fixes for known bugs in libpostal function patchBuggyResponses(response){ if( !Array.isArray(response) || !response.length ){ return response; } // recast labels for certain values, currently only applied to parses which return a single label. // the RECAST_LABELS array contains match/replace conditions which are applied in order. // the 'value' and 'label.to' properties are mandatory, they define the value to match on and // the replacement label to assign. you may optionally also provide 'label.from' which will restrict // replacements to only records with BOTH a matching 'value' and a matching 'label.from'. if( response.length === 1 ){ let first = response[0]; RECAST_LABELS.forEach(recast => { if( !_.has(recast, 'label') || !_.has(recast.label, 'to') ){ return; } if( recast.value !== first.value ){ return; } if( _.has(recast.label, 'from') && recast.label.from !== first.label ){ return; } first.label = recast.label.to; }); } // generate an index to avoid multiple iterations over the response array let idx = {}; response.forEach((res, pos) => idx[res.label] = _.assign({ _pos: pos }, res)); // known bug where the street name is only a directional, in this case we will merge it // with the subsequent element. // note: the bug only affects diagonals, not N,S,E,W // https://github.com/OpenTransitTools/trimet-mod-pelias/issues/20#issuecomment-417732128 if( response.length > 1 ){ let road = _.get(idx, 'road'); if( _.isPlainObject(road) && _.isString(road.value) && road.value.length === 2 ){ if( DIAGONAL_DIRECTIONALS.includes( road.value.toLowerCase() ) ){ let subsequentElement = response[road._pos+1]; if( subsequentElement && _.isString(subsequentElement.value) ){ response[road._pos].value += ' ' + subsequentElement.value; // merge elements response.splice(road._pos+1, 1); // remove merged element } } } } // known bug where Australian unit numbers are incorrectly included in the house_number label // note: in the case where a 'unit' label already exists, do nothing. // https://github.com/pelias/pelias/issues/753 let unit = _.get(idx, 'unit'); let house_number = _.get(idx, 'house_number'); if( _.isPlainObject(house_number) && !_.isPlainObject(unit) && _.isString(house_number.value) ){ let split = _.trim(_.trim(house_number.value),'/').split('/'); if( split.length === 2 ){ response[house_number._pos].value = _.trim(split[1]); // second part (house number) response.push({ label: 'unit', value: _.trim(split[0]) }); // first part (unit number) } } return response; } module.exports = setup;