diff --git a/controller/intersections.js b/controller/intersections.js new file mode 100644 index 00000000..ff61eb96 --- /dev/null +++ b/controller/intersections.js @@ -0,0 +1,59 @@ +const _ = require('lodash'); +const iso3166 = require('iso3166-1'); +const util = require('../util/arrayHelper'); + +/* +this function returns an object that denotes an intersection of form: +{ + street1: value1, + street2: value2 +} +*/ +function parseIntersections(text) { + var str1 = '', str2 = ''; + if(text.trim().length > 1) { + var words = text.toLowerCase().split(' '); + // remove all the whitespaces + words = util.removeWhitespaceElements(words); + words = util.EWStreetsSanitizer(words); + words = util.addOrdinality(words); + // only treat input as intersection if contains '&' or 'and' + const delimiter = _.includes(text, '&') ? '&' : 'and'; + const delimiterIndex = words.indexOf(delimiter); + + str1 = util.wordsToSentence(words, 0, delimiterIndex); + str2 = util.wordsToSentence(words, delimiterIndex+1, words.length); + } else { + throw 'Missing streets in the intersection'; + } + return { street1: str1, street2: str2 }; +} + +function setup(should_execute) { + function controller( req, res, next ){ + // bail early if req/res don't pass conditions for execution + if (!should_execute(req, res)) { + return next(); + } + + // parse text with query parser + //const parsed_text = text_analyzer.parse(req.clean.text); + const parsed_text = parseIntersections(req.clean.text); + + if (parsed_text !== undefined) { + // if a known ISO2 country was parsed, convert it to ISO3 + if (_.has(parsed_text, 'country') && iso3166.is2(_.toUpper(parsed_text.country))) { + parsed_text.country = iso3166.to3(_.toUpper(parsed_text.country)); + } + + req.clean.parsed_text = parsed_text; + } + + return next(); + + } + + return controller; +} + +module.exports = setup; diff --git a/controller/libpostal.js b/controller/libpostal.js index 6c913cd7..ecf39aa6 100644 --- a/controller/libpostal.js +++ b/controller/libpostal.js @@ -8,7 +8,6 @@ function setup(should_execute) { if (!should_execute(req, res)) { return next(); } - // parse text with query parser const parsed_text = text_analyzer.parse(req.clean.text); diff --git a/controller/predicates/is_intersection_layer.js b/controller/predicates/is_intersection_layer.js new file mode 100644 index 00000000..9117f0c5 --- /dev/null +++ b/controller/predicates/is_intersection_layer.js @@ -0,0 +1,5 @@ +const _ = require('lodash'); + +module.exports = (request, response) => { + return _.includes(request.query.text, '&') || _.includes(request.query.text, ' and '); +}; diff --git a/controller/search.js b/controller/search.js index a8bee3e4..ad22f17b 100644 --- a/controller/search.js +++ b/controller/search.js @@ -1,6 +1,7 @@ 'use strict'; const _ = require('lodash'); +const util = require('util'); const searchService = require('../service/search'); const logger = require('pelias-logger').get('api'); @@ -20,6 +21,7 @@ function setup( apiConfig, esclient, query, should_execute ){ } debugLog.beginTimer(req); let cleanOutput = _.cloneDeep(req.clean); + if (logging.isDNT(req)) { cleanOutput = logging.removeFields(cleanOutput); } @@ -27,7 +29,7 @@ function setup( apiConfig, esclient, query, should_execute ){ logger.info('[req]', 'endpoint=' + req.path, cleanOutput); const renderedQuery = query(req.clean); - + // if there's no query to call ES with, skip the service if (_.isUndefined(renderedQuery)) { debugLog.stopTimer(req, 'No query to call ES with. Skipping'); diff --git a/query/search_intersections.js b/query/search_intersections.js new file mode 100644 index 00000000..f0c1b687 --- /dev/null +++ b/query/search_intersections.js @@ -0,0 +1,220 @@ +/* eslint-disable */ + +// +// 'use strict'; +// +// const peliasQuery = require('pelias-query'); +// const defaults = require('./search_defaults'); +// const textParser = require('./text_parser'); +// const check = require('check-types'); +// const logger = require('pelias-logger').get('api'); +// +// //------------------------------ +// // general-purpose search query +// //------------------------------ +// var fallbackQuery = new peliasQuery.layout.FallbackQuery(); +// +// // scoring boost +// fallbackQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); +// fallbackQuery.score( peliasQuery.view.popularity_only_function ); +// fallbackQuery.score( peliasQuery.view.population_only_function ); +// // -------------------------------- +// +// // non-scoring hard filters +// fallbackQuery.filter( peliasQuery.view.boundary_country ); +// fallbackQuery.filter( peliasQuery.view.boundary_circle ); +// fallbackQuery.filter( peliasQuery.view.boundary_rect ); +// fallbackQuery.filter( peliasQuery.view.sources ); +// fallbackQuery.filter( peliasQuery.view.layers ); +// fallbackQuery.filter( peliasQuery.view.categories ); +// // -------------------------------- +// +// /** +// map request variables to query variables for all inputs +// provided by this HTTP request. +// **/ +// function generateQuery( clean ){ +// +// console.log("I am in intersections query"); +// +// const vs = new peliasQuery.Vars( defaults ); +// +// let logStr = '[query:search] [parser:intersections_parser] '; +// +// // input text +// vs.var( 'input:name', clean.text ); +// +// // sources +// if( check.array(clean.sources) && clean.sources.length ) { +// vs.var( 'sources', clean.sources); +// logStr += '[param:sources] '; +// } +// +// // layers +// if( check.array(clean.layers) && clean.layers.length ) { +// vs.var('layers', clean.layers); +// logStr += '[param:layers] '; +// } +// +// // categories +// if (clean.categories) { +// vs.var('input:categories', clean.categories); +// logStr += '[param:categories] '; +// } +// +// // size +// if( clean.querySize ) { +// vs.var( 'size', clean.querySize ); +// logStr += '[param:querySize] '; +// } +// +// // focus point +// if( check.number(clean['focus.point.lat']) && +// check.number(clean['focus.point.lon']) ){ +// vs.set({ +// 'focus:point:lat': clean['focus.point.lat'], +// 'focus:point:lon': clean['focus.point.lon'] +// }); +// logStr += '[param:focus_point] '; +// } +// +// // boundary rect +// if( check.number(clean['boundary.rect.min_lat']) && +// check.number(clean['boundary.rect.max_lat']) && +// check.number(clean['boundary.rect.min_lon']) && +// check.number(clean['boundary.rect.max_lon']) ){ +// vs.set({ +// 'boundary:rect:top': clean['boundary.rect.max_lat'], +// 'boundary:rect:right': clean['boundary.rect.max_lon'], +// 'boundary:rect:bottom': clean['boundary.rect.min_lat'], +// 'boundary:rect:left': clean['boundary.rect.min_lon'] +// }); +// logStr += '[param:boundary_rect] '; +// } +// +// // boundary circle +// // @todo: change these to the correct request variable names +// if( check.number(clean['boundary.circle.lat']) && +// check.number(clean['boundary.circle.lon']) ){ +// vs.set({ +// 'boundary:circle:lat': clean['boundary.circle.lat'], +// 'boundary:circle:lon': clean['boundary.circle.lon'] +// }); +// +// if( check.number(clean['boundary.circle.radius']) ){ +// vs.set({ +// 'boundary:circle:radius': Math.round( clean['boundary.circle.radius'] ) + 'km' +// }); +// } +// logStr += '[param:boundary_circle] '; +// } +// +// // boundary country +// if( check.string(clean['boundary.country']) ){ +// vs.set({ +// 'boundary:country': clean['boundary.country'] +// }); +// logStr += '[param:boundary_country] '; +// } +// +// // run the address parser +// if( clean.parsed_text ){ +// textParser( clean.parsed_text, vs ); +// } +// +// var q = getQuery(vs); +// +// //console.log(JSON.stringify(q, null, 2)); +// +// if (q !== undefined) { +// logger.info(logStr); +// } +// else { +// logger.info('[parser:intersections] query type not supported'); +// } +// +// return q; +// } +// +// function getQuery(vs) { +// +// logger.info(`[query:search] [search_input_type:${determineQueryType(vs)}]`); +// +// if (hasStreet(vs) || isPostalCodeOnly(vs)) { +// return { +// type: 'fallback', +// body: fallbackQuery.render(vs) +// }; +// } +// +// // returning undefined is a signal to a later step that the addressit-parsed +// // query should be queried for +// return undefined; +// +// } +// +// function determineQueryType(vs) { +// if (vs.isset('input:housenumber') && vs.isset('input:street')) { +// return 'address'; +// } +// else if (vs.isset('input:street')) { +// return 'street'; +// } +// else if (vs.isset('input:street1') || vs.isset('input:street2')) { +// return 'intersection'; +// } +// else if (vs.isset('input:query')) { +// return 'venue'; +// } +// else if (['neighbourhood', 'borough', 'postcode', 'county', 'region','country'].some( +// layer => vs.isset(`input:${layer}`) +// )) { +// return 'admin'; +// } +// return 'other'; +// } +// +// function hasStreet(vs) { +// return vs.isset('input:street') || vs.isset('input:street1') || vs.isset('input:street2'); +// } +// +// function isPostalCodeOnly(vs) { +// var isSet = layer => vs.isset(`input:${layer}`); +// +// var allowedFields = ['postcode']; +// var disallowedFields = ['query', 'category', 'housenumber', 'street', +// 'neighbourhood', 'borough', 'county', 'region', 'country']; +// +// return allowedFields.every(isSet) && +// !disallowedFields.some(isSet); +// +// } + +function generateQuery(clean) { + return { + type: 'fallback', + body: { + 'size': 2, + 'query': { + 'or': [{ + 'bool' : { + 'must': [ + { 'match': { 'layer' : 'intersection'} }, + { 'match': { 'address_parts.street1' : clean.parsed_text.street1} }, + { 'match': { 'address_parts.street2' : clean.parsed_text.street2} } + ] + } }, { + 'bool' : { + 'must': [ + { 'match': { 'layer' : 'intersection'} }, + { 'match': { 'address_parts.street1' : clean.parsed_text.street2} }, + { 'match': { 'address_parts.street2' : clean.parsed_text.street1} } + ] + } } + ] + } + } + }; +} + +module.exports = generateQuery; diff --git a/query/search_original.js b/query/search_original.js index a6b666d6..aee3307d 100644 --- a/query/search_original.js +++ b/query/search_original.js @@ -58,7 +58,7 @@ query.filter( peliasQuery.view.categories ); function generateQuery( clean ){ var vs = new peliasQuery.Vars( defaults ); - + let logStr = '[query:search] [parser:addressit] '; // input text diff --git a/query/text_parser.js b/query/text_parser.js index fd07cd0d..f8f0412a 100644 --- a/query/text_parser.js +++ b/query/text_parser.js @@ -29,6 +29,16 @@ function addParsedVariablesToQueryVariables( parsed_text, vs ){ vs.var( 'input:street', parsed_text.street ); } + // street1 name + if( ! _.isEmpty(parsed_text.street1) ){ + vs.var( 'input:street1', parsed_text.street1 ); + } + + // street2 name + if( ! _.isEmpty(parsed_text.street2) ){ + vs.var( 'input:street2', parsed_text.street2 ); + } + // neighbourhood if ( ! _.isEmpty(parsed_text.neighbourhood) ) { vs.var( 'input:neighbourhood', parsed_text.neighbourhood); diff --git a/routes/v1.js b/routes/v1.js index d300d20e..078024ce 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -33,7 +33,8 @@ var controllers = { placeholder: require('../controller/placeholder'), search: require('../controller/search'), search_with_ids: require('../controller/search_with_ids'), - status: require('../controller/status') + status: require('../controller/status'), + intersection: require('../controller/intersections') }; var queries = { @@ -42,7 +43,8 @@ var queries = { structured_geocoding: require('../query/structured_geocoding'), reverse: require('../query/reverse'), autocomplete: require('../query/autocomplete'), - address_using_ids: require('../query/address_search_using_ids') + address_using_ids: require('../query/address_search_using_ids'), + intersections: require('../query/search_intersections') }; /** ----------------------- controllers ----------------------- **/ @@ -82,6 +84,8 @@ const isRequestSourcesOnlyWhosOnFirst = require('../controller/predicates/is_req const hasRequestParameter = require('../controller/predicates/has_request_parameter'); const hasParsedTextProperties = require('../controller/predicates/has_parsed_text_properties'); +const isIntersectionLayer = require('../controller/predicates/is_intersection_layer'); + // shorthand for standard early-exit conditions const hasResponseDataOrRequestErrors = any(hasResponseData, hasRequestErrors); const hasAdminOnlyResults = not(hasResultsAtLayers(['venue', 'address', 'street'])); @@ -127,7 +131,19 @@ function addRoutes(app, peliasConfig) { isPipServiceEnabled, not(hasRequestErrors), not(hasResponseData) ); + // defines whether to skip libpostal and control should be switched to intersection processing + const IntersectionParserShouldExecute = all ( + isIntersectionLayer, + not(hasRequestErrors) + ); + + const intersectionQueryShouldExecute = all ( + isIntersectionLayer, + not(hasRequestErrors) + ); + const libpostalShouldExecute = all( + not(isIntersectionLayer), not(hasRequestErrors), not(isRequestSourcesOnlyWhosOnFirst) ); @@ -197,12 +213,14 @@ function addRoutes(app, peliasConfig) { const shouldDeferToAddressIt = all( not(hasRequestErrors), not(hasResponseData), - not(placeholderShouldHaveExecuted) + not(placeholderShouldHaveExecuted), + not(isIntersectionLayer) ); // call very old prod query if addressit was the parser const oldProdQueryShouldExecute = all( not(hasRequestErrors), + not(isIntersectionLayer), isAddressItParse ); @@ -256,6 +274,7 @@ function addRoutes(app, peliasConfig) { sanitizers.search.middleware(peliasConfig.api), middleware.requestLanguage, middleware.calcSize(), + controllers.intersection(IntersectionParserShouldExecute), controllers.libpostal(libpostalShouldExecute), controllers.placeholder(placeholderService, geometricFiltersApply, placeholderGeodisambiguationShouldExecute), controllers.placeholder(placeholderService, geometricFiltersDontApply, placeholderIdsLookupShouldExecute), @@ -265,6 +284,7 @@ function addRoutes(app, peliasConfig) { controllers.search(peliasConfig.api, esclient, queries.cascading_fallback, fallbackQueryShouldExecute), sanitizers.defer_to_addressit(shouldDeferToAddressIt), controllers.search(peliasConfig.api, esclient, queries.very_old_prod, oldProdQueryShouldExecute), + controllers.search(peliasConfig.api, esclient, queries.intersections, intersectionQueryShouldExecute), postProc.trimByGranularity(), postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig.api), diff --git a/util/arrayHelper.js b/util/arrayHelper.js new file mode 100644 index 00000000..c361d246 --- /dev/null +++ b/util/arrayHelper.js @@ -0,0 +1,75 @@ +module.exports.removeWhitespaceElements = function (arr) { + for(let i = 0; i < arr.length; i++) { + if(arr[i] === '') { + arr.splice(i, 1); + i--; + } + } + return arr; +}; + +// intended for intersections only +// this function turns '77' into '77th', '3' into '3rd', etc +module.exports.addOrdinality = function (arr) { + arr.forEach( function (elmnt, index) { + // is it only numbers + let isNum = /^\d+$/.test(elmnt); + if(isNum) { + switch(elmnt[elmnt.length-1]){ + case '1': + elmnt += 'st'; + arr[index] = elmnt; + break; + case '2': + elmnt += 'nd'; + arr[index] = elmnt; + break; + case '3': + elmnt += 'rd'; + arr[index] = elmnt; + break; + default : + elmnt += 'th'; + arr[index] = elmnt; + } + } + }); + return arr; +}; + +// intended to do the conversions like: +// 'w28' -> 'West 28' +// 'e17' -> 'East 17' +module.exports.EWStreetsSanitizer = function(arr){ + const mapping = { + e : 'East', + w : 'West' + }; + + for (let i = 0; i < arr.length; i++) { + if (arr[i].length > 1) { + if((arr[i][0].toLowerCase() === 'e' || arr[i][0].toLowerCase() === 'w') && /^\d$/.test(arr[i][1])) { + let streetNum = arr[i].substring(1); + arr[i] = mapping[arr[i][0]]; + if(i+1 === arr.length) { + arr.push(streetNum); + } else { + arr.splice(i+1,0,streetNum); + } + } + } + } + + return arr; +}; + +module.exports.wordsToSentence = function (arr, start, end) { + var sentence = ''; + for (let i = start; i < end; i++) { + sentence += arr[i]; + if (i < (end - 1)) { + sentence += ' '; + } + } + return sentence; +};