diff --git a/middleware/trimByGranularityComponent.js b/middleware/trimByGranularityComponent.js new file mode 100644 index 00000000..afef01f0 --- /dev/null +++ b/middleware/trimByGranularityComponent.js @@ -0,0 +1,127 @@ +const _ = require('lodash'); + +// This middleware component trims the results array by granularity when +// FallbackQuery was used. FallbackQuery is used for inputs like +// `1090 N Charlotte St, Lancaster, PA` where the address may not exist and +// we must fall back to trying `Lancaster, PA`. If the address does exist then +// FallbackQuery will return results for: +// - address+city+state +// - city+state +// - state +// +// Because the address matched, we're not interested in city+state or state, so +// this component removes results that aren't the most granular. + +// layers in increasing order of granularity +// some places where this list differs in order from trimByGranularity: +// - because house number and street are in a single field, street hits must be considered +// more important than addresses due to how ES matches +// - country outranks dependency, this was done to ensure that "country=United States" doesn't +// bump up US dependencies containing "United States" above the country +// - retain both borough and locality results if both exist for when city=Manhattan is +// supplied we want to retain borough=Manhattan and city=Manhattan results +const layers = [ + 'venue', + 'street', + 'address', + 'neighbourhood', + ['borough', 'locality'], + 'localadmin', + 'county', + 'macrocounty', + 'region', + 'macroregion', + 'country', + 'dependency' +]; + +// these layers are strictly used to drive one special case: +// - when there was a borough explicitly supplied +// for example, if the user passed borough=manhattan and city=new york +// then we want to preserve just boroughs if they're most granular and throw away +// city results. In the usual case where no borough is passed, the city value +// is looked up as a borough in the off chance that the user passed +// city=Manhattan +const explicit_borough_layers = [ + 'venue', + 'street', + 'address', + 'neighbourhood', + 'borough', + 'locality', + 'localadmin', + 'county', + 'macrocounty', + 'region', + 'macroregion', + 'country', + 'dependency' +]; + +// this helper method returns `true` if every result has a matched_query +// starting with `fallback.` +function isFallbackQuery(results) { + return results.every(function(result) { + return result.hasOwnProperty('_matched_queries') && + !_.isEmpty(result._matched_queries) && + _.startsWith(result._matched_queries[0], 'fallback.'); + }); +} + +function hasRecordsAtLayers(results, layer) { + return results.some( (result) => { + if (_.isArray(layer)) { + return layer.some( (sublayer) => { + return result._matched_queries[0] === 'fallback.' + sublayer; + }); + } else { + return result._matched_queries[0] === 'fallback.' + layer; + } + + }); +} + +function retainRecordsAtLayers(results, layer) { + return results.filter( (result) => { + if (_.isArray(layer)) { + return layer.some( (sublayer) => { + return result._matched_queries[0] === 'fallback.' + sublayer; + }); + } + else { + return result._matched_queries[0] === 'fallback.' + layer; + } + + }); +} + +function getLayers(parsed_text) { + if (parsed_text && parsed_text.hasOwnProperty('borough')) { + return explicit_borough_layers; + } + return layers; +} + +function setup() { + return function trim(req, res, next) { + // don't do anything if there are no results or there are non-fallback.* named queries + // there should never be a mixture of fallback.* and non-fallback.* named queries + if (_.isUndefined(res.data) || !isFallbackQuery(res.data)) { + return next(); + } + + const layers = getLayers(req.clean.parsed_text); + + // start at the most granular possible layer. if there are results at a layer + // then remove everything not at that layer. + layers.forEach( (layer) => { + if (hasRecordsAtLayers(res.data, layer )) { + res.data = retainRecordsAtLayers(res.data, layer); + } + }); + + next(); + }; +} + +module.exports = setup; diff --git a/query/component_geocoding.js b/query/component_geocoding.js new file mode 100644 index 00000000..c212c848 --- /dev/null +++ b/query/component_geocoding.js @@ -0,0 +1,112 @@ +var peliasQuery = require('pelias-query'), + defaults = require('./search_defaults'), + textParser = require('./text_parser'), + check = require('check-types'); + +//------------------------------ +// general-purpose search query +//------------------------------ +var componentQuery = new peliasQuery.layout.ComponentFallbackQuery(); + +// scoring boost +componentQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); +componentQuery.score( peliasQuery.view.popularity_only_function ); +componentQuery.score( peliasQuery.view.population_only_function ); +// -------------------------------- + +// non-scoring hard filters +componentQuery.filter( peliasQuery.view.boundary_country ); +componentQuery.filter( peliasQuery.view.boundary_circle ); +componentQuery.filter( peliasQuery.view.boundary_rect ); +componentQuery.filter( peliasQuery.view.sources ); +componentQuery.filter( peliasQuery.view.layers ); +componentQuery.filter( peliasQuery.view.categories ); +// -------------------------------- + +/** + map request variables to query variables for all inputs + provided by this HTTP request. +**/ +function generateQuery( clean ){ + + var vs = new peliasQuery.Vars( defaults ); + + // input text + vs.var( 'input:name', clean.text ); + + // sources + vs.var( 'sources', clean.sources); + + // layers + vs.var( 'layers', clean.layers); + + // size + if( clean.querySize ) { + vs.var( 'size', clean.querySize ); + } + + // focus point + if( check.number(clean['focus.point.lat']) && + check.number(clean['focus.point.lon']) ){ + vs.set({ + 'focus:point:lat': clean['focus.point.lat'], + 'focus:point:lon': clean['focus.point.lon'] + }); + } + + // boundary rect + if( check.number(clean['boundary.rect.min_lat']) && + check.number(clean['boundary.rect.max_lat']) && + check.number(clean['boundary.rect.min_lon']) && + check.number(clean['boundary.rect.max_lon']) ){ + vs.set({ + 'boundary:rect:top': clean['boundary.rect.max_lat'], + 'boundary:rect:right': clean['boundary.rect.max_lon'], + 'boundary:rect:bottom': clean['boundary.rect.min_lat'], + 'boundary:rect:left': clean['boundary.rect.min_lon'] + }); + } + + // boundary circle + // @todo: change these to the correct request variable names + if( check.number(clean['boundary.circle.lat']) && + check.number(clean['boundary.circle.lon']) ){ + vs.set({ + 'boundary:circle:lat': clean['boundary.circle.lat'], + 'boundary:circle:lon': clean['boundary.circle.lon'] + }); + + if( check.number(clean['boundary.circle.radius']) ){ + vs.set({ + 'boundary:circle:radius': Math.round( clean['boundary.circle.radius'] ) + 'km' + }); + } + } + + // boundary country + if( check.string(clean['boundary.country']) ){ + vs.set({ + 'boundary:country': clean['boundary.country'] + }); + } + + // run the address parser + if( clean.parsed_text ){ + textParser( clean.parsed_text, vs ); + } + + var q = getQuery(vs); + + // console.log(JSON.stringify(q.body, null, 2)); + + return q; +} + +function getQuery(vs) { + return { + type: 'fallback', + body: componentQuery.render(vs) + }; +} + +module.exports = generateQuery; diff --git a/query/text_parser.js b/query/text_parser.js index c5b8da44..4ccc8661 100644 --- a/query/text_parser.js +++ b/query/text_parser.js @@ -14,6 +14,10 @@ function addParsedVariablesToQueryVariables( parsed_text, vs ){ vs.var('input:category', parsed_text.category); } + if (parsed_text.hasOwnProperty('address')) { + vs.var( 'input:address', parsed_text.address ); + } + // house number if( parsed_text.hasOwnProperty('number') ){ vs.var( 'input:housenumber', parsed_text.number ); diff --git a/routes/v1.js b/routes/v1.js index 6c5afd58..a1c71fb1 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -8,6 +8,7 @@ var sanitizers = { place: require('../sanitizer/place'), search: require('../sanitizer/search'), search_fallback: require('../sanitizer/search_fallback'), + component_geocoding: require('../sanitizer/component_geocoding'), reverse: require('../sanitizer/reverse'), nearby: require('../sanitizer/nearby') }; @@ -28,13 +29,15 @@ var controllers = { var queries = { libpostal: require('../query/search'), - fallback_to_old_prod: require('../query/search_original') + fallback_to_old_prod: require('../query/search_original'), + component_geocoding: require('../query/component_geocoding') }; /** ----------------------- controllers ----------------------- **/ var postProc = { trimByGranularity: require('../middleware/trimByGranularity'), + trimByGranularityComponent: require('../middleware/trimByGranularityComponent'), distances: require('../middleware/distance'), confidenceScores: require('../middleware/confidenceScore'), confidenceScoresFallback: require('../middleware/confidenceScoreFallback'), @@ -92,6 +95,24 @@ function addRoutes(app, peliasConfig) { postProc.geocodeJSON(peliasConfig, base), postProc.sendJSON ]), + component: createRouter([ + sanitizers.component_geocoding.middleware, + middleware.calcSize(), + controllers.search(peliasConfig, undefined, queries.component_geocoding), + postProc.trimByGranularityComponent(), + postProc.distances('focus.point.'), + postProc.confidenceScores(peliasConfig), + postProc.confidenceScoresFallback(), + postProc.dedupe(), + postProc.accuracy(), + postProc.localNamingConventions(), + postProc.renamePlacenames(), + postProc.parseBoundingBox(), + postProc.normalizeParentIds(), + postProc.assignLabels(), + postProc.geocodeJSON(peliasConfig, base), + postProc.sendJSON + ]), autocomplete: createRouter([ sanitizers.autocomplete.middleware, controllers.search(peliasConfig, null, require('../query/autocomplete')), @@ -172,6 +193,7 @@ function addRoutes(app, peliasConfig) { app.get ( base + 'autocomplete', routers.autocomplete ); app.get ( base + 'search', routers.search ); app.post( base + 'search', routers.search ); + app.get ( base + 'beta/component', routers.component ); app.get ( base + 'reverse', routers.reverse ); app.get ( base + 'nearby', routers.nearby ); diff --git a/sanitizer/_iso2_to_iso3.js b/sanitizer/_iso2_to_iso3.js new file mode 100644 index 00000000..d8fc1837 --- /dev/null +++ b/sanitizer/_iso2_to_iso3.js @@ -0,0 +1,19 @@ +const _ = require('lodash'); +const iso3166 = require('iso3166-1'); + +// this sanitizer exists solely to convert an ISO2 country value to ISO3 +// eg - 'TH' -> 'THA' +// this can go away once altnames imports ISO2 country values from WOF +function sanitize( raw, clean ){ + // error & warning messages + const messages = { errors: [], warnings: [] }; + + if (clean.hasOwnProperty('parsed_text') && iso3166.is2(_.toUpper(clean.parsed_text.country))) { + clean.parsed_text.country = iso3166.to3(_.toUpper(clean.parsed_text.country)); + } + + return messages; +} + +// export function +module.exports = sanitize; diff --git a/sanitizer/_synthesize_analysis.js b/sanitizer/_synthesize_analysis.js new file mode 100644 index 00000000..788132ec --- /dev/null +++ b/sanitizer/_synthesize_analysis.js @@ -0,0 +1,42 @@ +const _ = require('lodash'); + +const fields = ['address', 'neighbourhood', 'borough', 'city', 'county', + 'state', 'postalcode', 'country']; + +function normalizeWhitespaceToSingleSpace(val) { + return _.replace(_.trim(val), /\s+/g, ' '); +} + +function isPostalCodeOnly(parsed_text) { + return Object.keys(parsed_text).length === 1 && + parsed_text.hasOwnProperty('postalcode'); +} + +function sanitize( raw, clean ){ + + // error & warning messages + const messages = { errors: [], warnings: [] }; + + // collect all the valid values into a single object + clean.parsed_text = fields.reduce( (o, f) => { + if (_.isString(raw[f]) && !_.isEmpty(_.trim(raw[f]))) { + o[f] = normalizeWhitespaceToSingleSpace(raw[f]); + } + + return o; + + }, {}); + + if (isPostalCodeOnly(clean.parsed_text)) { + messages.errors.push('postalcode-only inputs are not supported'); + } + else if (_.isEmpty(Object.keys(clean.parsed_text))) { + messages.errors.push( + `at least one of the following fields is required: ${fields.join(', ')}`); + } + + return messages; +} + +// export function +module.exports = sanitize; diff --git a/sanitizer/component_geocoding.js b/sanitizer/component_geocoding.js new file mode 100644 index 00000000..a5a95de9 --- /dev/null +++ b/sanitizer/component_geocoding.js @@ -0,0 +1,27 @@ +var type_mapping = require('../helper/type_mapping'); + +var sanitizeAll = require('../sanitizer/sanitizeAll'), + sanitizers = { + quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'), + singleScalarParameters: require('../sanitizer/_single_scalar_parameters'), + synthesize_analysis: require('../sanitizer/_synthesize_analysis'), + iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'), + size: require('../sanitizer/_size')(/* use defaults*/), + layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping), + sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping), + // depends on the layers and sources sanitizers, must be run after them + sources_and_layers: require('../sanitizer/_sources_and_layers'), + private: require('../sanitizer/_flag_bool')('private', false), + geo_search: require('../sanitizer/_geo_search'), + boundary_country: require('../sanitizer/_boundary_country'), + categories: require('../sanitizer/_categories') + }; + +var sanitize = function(req, cb) { sanitizeAll(req, sanitizers, cb); }; + +// middleware +module.exports.middleware = function( req, res, next ){ + sanitize( req, function( err, clean ){ + next(); + }); +}; diff --git a/test/unit/fixture/component_geocoding/boundary_country.json b/test/unit/fixture/component_geocoding/boundary_country.json new file mode 100644 index 00000000..182da5a9 --- /dev/null +++ b/test/unit/fixture/component_geocoding/boundary_country.json @@ -0,0 +1,62 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [] + } + }, + "filter": { + "bool": { + "must": [ + { + "match": { + "parent.country_a": { + "analyzer": "standard", + "query": "ABC" + } + } + }, + { + "terms": { + "layer": [ + "test" + ] + } + } + ] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 10, + "track_scores": true +} \ No newline at end of file diff --git a/test/unit/fixture/component_geocoding/fallback.json b/test/unit/fixture/component_geocoding/fallback.json new file mode 100644 index 00000000..11891881 --- /dev/null +++ b/test/unit/fixture/component_geocoding/fallback.json @@ -0,0 +1,517 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [ + { + "bool": { + "_name": "fallback.neighbourhood", + "must": [ + { + "multi_match": { + "query": "neighbourhood value", + "type": "phrase", + "fields": [ + "parent.neighbourhood", + "parent.neighbourhood_a" + ] + } + }, + { + "multi_match": { + "query": "borough value", + "type": "phrase", + "fields": [ + "parent.borough", + "parent.borough_a" + ] + } + }, + { + "multi_match": { + "query": "city value", + "type": "phrase", + "fields": [ + "parent.locality", + "parent.locality_a", + "parent.localadmin", + "parent.localadmin_a" + ] + } + }, + { + "multi_match": { + "query": "county value", + "type": "phrase", + "fields": [ + "parent.county", + "parent.county_a", + "parent.macrocounty", + "parent.macrocounty_a" + ] + } + }, + { + "multi_match": { + "query": "state value", + "type": "phrase", + "fields": [ + "parent.region", + "parent.region_a", + "parent.macroregion", + "parent.macroregion_a" + ] + } + }, + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.country", + "parent.country_a", + "parent.dependency", + "parent.dependency_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "neighbourhood" + } + } + } + }, + { + "bool": { + "_name": "fallback.borough", + "must": [ + { + "multi_match": { + "query": "borough value", + "type": "phrase", + "fields": [ + "parent.borough", + "parent.borough_a" + ] + } + }, + { + "multi_match": { + "query": "city value", + "type": "phrase", + "fields": [ + "parent.locality", + "parent.locality_a", + "parent.localadmin", + "parent.localadmin_a" + ] + } + }, + { + "multi_match": { + "query": "county value", + "type": "phrase", + "fields": [ + "parent.county", + "parent.county_a", + "parent.macrocounty", + "parent.macrocounty_a" + ] + } + }, + { + "multi_match": { + "query": "state value", + "type": "phrase", + "fields": [ + "parent.region", + "parent.region_a", + "parent.macroregion", + "parent.macroregion_a" + ] + } + }, + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.country", + "parent.country_a", + "parent.dependency", + "parent.dependency_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "borough" + } + } + } + }, + { + "bool": { + "_name": "fallback.locality", + "must": [ + { + "multi_match": { + "query": "city value", + "type": "phrase", + "fields": [ + "parent.locality", + "parent.locality_a" + ] + } + }, + { + "multi_match": { + "query": "county value", + "type": "phrase", + "fields": [ + "parent.county", + "parent.county_a", + "parent.macrocounty", + "parent.macrocounty_a" + ] + } + }, + { + "multi_match": { + "query": "state value", + "type": "phrase", + "fields": [ + "parent.region", + "parent.region_a", + "parent.macroregion", + "parent.macroregion_a" + ] + } + }, + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.country", + "parent.country_a", + "parent.dependency", + "parent.dependency_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "locality" + } + } + } + }, + { + "bool": { + "_name": "fallback.localadmin", + "must": [ + { + "multi_match": { + "query": "city value", + "type": "phrase", + "fields": [ + "parent.localadmin", + "parent.localadmin_a" + ] + } + }, + { + "multi_match": { + "query": "county value", + "type": "phrase", + "fields": [ + "parent.county", + "parent.county_a", + "parent.macrocounty", + "parent.macrocounty_a" + ] + } + }, + { + "multi_match": { + "query": "state value", + "type": "phrase", + "fields": [ + "parent.region", + "parent.region_a", + "parent.macroregion", + "parent.macroregion_a" + ] + } + }, + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.country", + "parent.country_a", + "parent.dependency", + "parent.dependency_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "localadmin" + } + } + } + }, + { + "bool": { + "_name": "fallback.county", + "must": [ + { + "multi_match": { + "query": "county value", + "type": "phrase", + "fields": [ + "parent.county", + "parent.county_a" + ] + } + }, + { + "multi_match": { + "query": "state value", + "type": "phrase", + "fields": [ + "parent.region", + "parent.region_a", + "parent.macroregion", + "parent.macroregion_a" + ] + } + }, + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.country", + "parent.country_a", + "parent.dependency", + "parent.dependency_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "county" + } + } + } + }, + { + "bool": { + "_name": "fallback.macrocounty", + "must": [ + { + "multi_match": { + "query": "county value", + "type": "phrase", + "fields": [ + "parent.macrocounty", + "parent.macrocounty_a" + ] + } + }, + { + "multi_match": { + "query": "state value", + "type": "phrase", + "fields": [ + "parent.region", + "parent.region_a", + "parent.macroregion", + "parent.macroregion_a" + ] + } + }, + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.country", + "parent.country_a", + "parent.dependency", + "parent.dependency_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "macrocounty" + } + } + } + }, + { + "bool": { + "_name": "fallback.region", + "must": [ + { + "multi_match": { + "query": "state value", + "type": "phrase", + "fields": [ + "parent.region", + "parent.region_a" + ] + } + }, + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.country", + "parent.country_a", + "parent.dependency", + "parent.dependency_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "region" + } + } + } + }, + { + "bool": { + "_name": "fallback.macroregion", + "must": [ + { + "multi_match": { + "query": "state value", + "type": "phrase", + "fields": [ + "parent.macroregion", + "parent.macroregion_a" + ] + } + }, + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.country", + "parent.country_a", + "parent.dependency", + "parent.dependency_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "macroregion" + } + } + } + }, + { + "bool": { + "_name": "fallback.dependency", + "must": [ + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.dependency", + "parent.dependency_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "dependency" + } + } + } + }, + { + "bool": { + "_name": "fallback.country", + "must": [ + { + "multi_match": { + "query": "country value", + "type": "phrase", + "fields": [ + "parent.country", + "parent.country_a" + ] + } + } + ], + "filter": { + "term": { + "layer": "country" + } + } + } + } + ] + } + }, + "filter": { + "bool": { + "must": [] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 20, + "track_scores": true +} \ No newline at end of file diff --git a/test/unit/fixture/component_geocoding/linguistic_bbox.json b/test/unit/fixture/component_geocoding/linguistic_bbox.json new file mode 100644 index 00000000..2912a10d --- /dev/null +++ b/test/unit/fixture/component_geocoding/linguistic_bbox.json @@ -0,0 +1,65 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [] + } + }, + "filter": { + "bool": { + "must": [ + { + "geo_bounding_box": { + "type": "indexed", + "center_point": { + "top": 11.51, + "right": -61.84, + "bottom": 47.47, + "left": -103.16 + } + } + }, + { + "terms": { + "layer": [ + "test" + ] + } + } + ] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 10, + "track_scores": true +} \ No newline at end of file diff --git a/test/unit/fixture/component_geocoding/linguistic_focus.json b/test/unit/fixture/component_geocoding/linguistic_focus.json new file mode 100644 index 00000000..cee04e8e --- /dev/null +++ b/test/unit/fixture/component_geocoding/linguistic_focus.json @@ -0,0 +1,68 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [] + } + }, + "filter": { + "bool": { + "must": [ + { + "terms": { + "layer": [ + "test" + ] + } + } + ] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "weight": 2, + "linear": { + "center_point": { + "origin": { + "lat": 29.49136, + "lon": -82.50622 + }, + "offset": "0km", + "scale": "50km", + "decay": 0.5 + } + } + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 10, + "track_scores": true +} \ No newline at end of file diff --git a/test/unit/fixture/component_geocoding/linguistic_focus_bbox.json b/test/unit/fixture/component_geocoding/linguistic_focus_bbox.json new file mode 100644 index 00000000..8c9e8cef --- /dev/null +++ b/test/unit/fixture/component_geocoding/linguistic_focus_bbox.json @@ -0,0 +1,79 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [] + } + }, + "filter": { + "bool": { + "must": [ + { + "geo_bounding_box": { + "type": "indexed", + "center_point": { + "top": 11.51, + "right": -61.84, + "bottom": 47.47, + "left": -103.16 + } + } + }, + { + "terms": { + "layer": [ + "test" + ] + } + } + ] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "weight": 2, + "linear": { + "center_point": { + "origin": { + "lat": 29.49136, + "lon": -82.50622 + }, + "offset": "0km", + "scale": "50km", + "decay": 0.5 + } + } + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 10, + "track_scores": true +} diff --git a/test/unit/fixture/component_geocoding/linguistic_focus_null_island.json b/test/unit/fixture/component_geocoding/linguistic_focus_null_island.json new file mode 100644 index 00000000..59fd11bb --- /dev/null +++ b/test/unit/fixture/component_geocoding/linguistic_focus_null_island.json @@ -0,0 +1,68 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [] + } + }, + "filter": { + "bool": { + "must": [ + { + "terms": { + "layer": [ + "test" + ] + } + } + ] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "weight": 2, + "linear": { + "center_point": { + "origin": { + "lat": 0, + "lon": 0 + }, + "offset": "0km", + "scale": "50km", + "decay": 0.5 + } + } + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 10, + "track_scores": true +} \ No newline at end of file diff --git a/test/unit/fixture/component_geocoding/linguistic_only.json b/test/unit/fixture/component_geocoding/linguistic_only.json new file mode 100644 index 00000000..17a54486 --- /dev/null +++ b/test/unit/fixture/component_geocoding/linguistic_only.json @@ -0,0 +1,54 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [] + } + }, + "filter": { + "bool": { + "must": [ + { + "terms": { + "layer": [ + "test" + ] + } + } + ] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 10, + "track_scores": true +} \ No newline at end of file diff --git a/test/unit/fixture/component_geocoding/linguistic_viewport.json b/test/unit/fixture/component_geocoding/linguistic_viewport.json new file mode 100644 index 00000000..17a54486 --- /dev/null +++ b/test/unit/fixture/component_geocoding/linguistic_viewport.json @@ -0,0 +1,54 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [] + } + }, + "filter": { + "bool": { + "must": [ + { + "terms": { + "layer": [ + "test" + ] + } + } + ] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 10, + "track_scores": true +} \ No newline at end of file diff --git a/test/unit/fixture/component_geocoding/linguistic_viewport_min_diagonal.json b/test/unit/fixture/component_geocoding/linguistic_viewport_min_diagonal.json new file mode 100644 index 00000000..17a54486 --- /dev/null +++ b/test/unit/fixture/component_geocoding/linguistic_viewport_min_diagonal.json @@ -0,0 +1,54 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [] + } + }, + "filter": { + "bool": { + "must": [ + { + "terms": { + "layer": [ + "test" + ] + } + } + ] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 10, + "track_scores": true +} \ No newline at end of file diff --git a/test/unit/fixture/component_geocoding/with_source_filtering.json b/test/unit/fixture/component_geocoding/with_source_filtering.json new file mode 100644 index 00000000..659d5eb4 --- /dev/null +++ b/test/unit/fixture/component_geocoding/with_source_filtering.json @@ -0,0 +1,54 @@ +{ + "query": { + "function_score": { + "query": { + "filtered": { + "query": { + "bool": { + "should": [] + } + }, + "filter": { + "bool": { + "must": [ + { + "terms": { + "source": [ + "test_source" + ] + } + } + ] + } + } + } + }, + "max_boost": 20, + "functions": [ + { + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }, + { + "field_value_factor": { + "modifier": "log1p", + "field": "population", + "missing": 1 + }, + "weight": 2 + } + ], + "score_mode": "avg", + "boost_mode": "multiply" + } + }, + "sort": [ + "_score" + ], + "size": 20, + "track_scores": true +} \ No newline at end of file diff --git a/test/unit/middleware/trimByGranularityComponent.js b/test/unit/middleware/trimByGranularityComponent.js new file mode 100644 index 00000000..69c80e92 --- /dev/null +++ b/test/unit/middleware/trimByGranularityComponent.js @@ -0,0 +1,518 @@ +var trimByGranularity = require('../../../middleware/trimByGranularityComponent')(); + +module.exports.tests = {}; + +module.exports.tests.trimByGranularity = function(test, common) { + test('empty res and req should not throw exception', function(t) { + function testIt() { + trimByGranularity({}, {}, function() {}); + } + + t.doesNotThrow(testIt, 'an exception should not have been thrown'); + t.end(); + }); + + test('all records with fallback.* matched_queries name should retain only venues when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'venue 1', _matched_queries: ['fallback.venue'] }, + { name: 'venue 2', _matched_queries: ['fallback.venue'] }, + { name: 'street 1', _matched_queries: ['fallback.street'] }, + { name: 'address 1', _matched_queries: ['fallback.address'] }, + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'borough 1', _matched_queries: ['fallback.borough'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'venue 1', _matched_queries: ['fallback.venue'] }, + { name: 'venue 2', _matched_queries: ['fallback.venue'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only venue records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only streets when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'street 1', _matched_queries: ['fallback.street'] }, + { name: 'street 2', _matched_queries: ['fallback.street'] }, + { name: 'address 1', _matched_queries: ['fallback.address'] }, + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'borough 1', _matched_queries: ['fallback.borough'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'street 1', _matched_queries: ['fallback.street'] }, + { name: 'street 2', _matched_queries: ['fallback.street'] } + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only street records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only address when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'address 1', _matched_queries: ['fallback.address'] }, + { name: 'address 2', _matched_queries: ['fallback.address'] }, + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'borough 1', _matched_queries: ['fallback.borough'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'address 1', _matched_queries: ['fallback.address'] }, + { name: 'address 2', _matched_queries: ['fallback.address'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only address records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only neighbourhoods when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'neighbourhood 2', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'borough 1', _matched_queries: ['fallback.borough'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'neighbourhood 2', _matched_queries: ['fallback.neighbourhood'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only neighbourhood records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only boroughs when they are most granular', function(t) { + var req = { + clean: { + parsed_text: { + borough: 'borough value' + } + } + }; + + var res = { + data: [ + { name: 'borough 1', _matched_queries: ['fallback.borough'] }, + { name: 'borough 2', _matched_queries: ['fallback.borough'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'borough 1', _matched_queries: ['fallback.borough'] }, + { name: 'borough 2', _matched_queries: ['fallback.borough'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only borough records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('if req.parsed_text has city but not borough then borough and city results should be returned', function(t) { + var req = { + clean: { + parsed_text: { + city: 'city value' + } + } + }; + + var res = { + data: [ + { name: 'borough 1', _matched_queries: ['fallback.borough'] }, + { name: 'borough 2', _matched_queries: ['fallback.borough'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'locality 2', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'borough 1', _matched_queries: ['fallback.borough'] }, + { name: 'borough 2', _matched_queries: ['fallback.borough'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'locality 2', _matched_queries: ['fallback.locality'] } + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only borough/locality records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only localities when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'locality 2', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'locality 2', _matched_queries: ['fallback.locality'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only locality records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only localadmins when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'localadmin 2', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'localadmin 2', _matched_queries: ['fallback.localadmin'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only localadmin records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only counties when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'county 2', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'county 2', _matched_queries: ['fallback.county'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only county records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only macrocounties when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'macrocounty 2', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'macrocounty 2', _matched_queries: ['fallback.macrocounty'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only macrocounty records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only regions when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'region 2', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'region 2', _matched_queries: ['fallback.region'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only region records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only macroregions when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'macroregion 2', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'macroregion 2', _matched_queries: ['fallback.macroregion'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only macroregion records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain countries over dependencies', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'country 2', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'country 2', _matched_queries: ['fallback.country'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only country records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only dependencies when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'dependency 2', _matched_queries: ['fallback.dependency'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'dependency 2', _matched_queries: ['fallback.dependency'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only dependency records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('presence of any non-fallback.* named queries should not trim', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'region', _matched_queries: ['fallback.region'] }, + { name: 'country', _matched_queries: ['fallback.country'] }, + { name: 'result with non-named query' } + ] + }; + + var expected_data = [ + { name: 'region', _matched_queries: ['fallback.region'] }, + { name: 'country', _matched_queries: ['fallback.country'] }, + { name: 'result with non-named query' } + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'all should results should have been retained'); + t.end(); + }); + } + + testIt(); + + }); + +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('[middleware] trimByGranularity: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/query/component_geocoding.js b/test/unit/query/component_geocoding.js new file mode 100644 index 00000000..68bedf6a --- /dev/null +++ b/test/unit/query/component_geocoding.js @@ -0,0 +1,247 @@ +var generate = require('../../../query/component_geocoding'); +var fs = require('fs'); + +module.exports.tests = {}; + +module.exports.tests.interface = function(test, common) { + test('valid interface', function(t) { + t.equal(typeof generate, 'function', 'valid function'); + t.end(); + }); +}; + +module.exports.tests.query = function(test, common) { + test('valid search + focus + bbox', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', + querySize: 10, + 'focus.point.lat': 29.49136, 'focus.point.lon': -82.50622, + 'boundary.rect.min_lat': 47.47, + 'boundary.rect.max_lon': -61.84, + 'boundary.rect.max_lat': 11.51, + 'boundary.rect.min_lon': -103.16, + layers: ['test'] + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/component_geocoding/linguistic_focus_bbox'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_focus_bbox'); + t.end(); + }); + + test('valid search + bbox', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', + querySize: 10, + 'boundary.rect.min_lat': 47.47, + 'boundary.rect.max_lon': -61.84, + 'boundary.rect.max_lat': 11.51, + 'boundary.rect.min_lon': -103.16, + layers: ['test'] + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/component_geocoding/linguistic_bbox'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_bbox'); + t.end(); + }); + + test('valid lingustic-only search', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', querySize: 10, + layers: ['test'] + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/component_geocoding/linguistic_only'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_only'); + t.end(); + }); + + test('search search + focus', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', querySize: 10, + 'focus.point.lat': 29.49136, 'focus.point.lon': -82.50622, + layers: ['test'] + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/component_geocoding/linguistic_focus'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_focus'); + t.end(); + }); + + test('search search + viewport', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', querySize: 10, + 'focus.viewport.min_lat': 28.49136, + 'focus.viewport.max_lat': 30.49136, + 'focus.viewport.min_lon': -87.50622, + 'focus.viewport.max_lon': -77.50622, + layers: ['test'] + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/component_geocoding/linguistic_viewport'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_viewport'); + t.end(); + }); + + // viewport scale sizing currently disabled. + // ref: https://github.com/pelias/api/pull/388 + test('search with viewport diagonal < 1km should set scale to 1km', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', querySize: 10, + 'focus.viewport.min_lat': 28.49135, + 'focus.viewport.max_lat': 28.49137, + 'focus.viewport.min_lon': -87.50622, + 'focus.viewport.max_lon': -87.50624, + layers: ['test'] + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/component_geocoding/linguistic_viewport_min_diagonal'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid search query'); + t.end(); + }); + + test('search search + focus on null island', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', querySize: 10, + 'focus.point.lat': 0, 'focus.point.lon': 0, + layers: ['test'] + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/component_geocoding/linguistic_focus_null_island'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_focus_null_island'); + t.end(); + }); + + test('parsed_text with all fields should use FallbackQuery', function(t) { + var clean = { + parsed_text: { + query: 'query value', + category: 'category value', + number: 'number value', + street: 'street value', + neighbourhood: 'neighbourhood value', + borough: 'borough value', + postalcode: 'postalcode value', + city: 'city value', + county: 'county value', + state: 'state value', + country: 'country value' + } + }; + + var query = generate(clean); + + var compiled = JSON.parse(JSON.stringify(query)); + var expected = require('../fixture/component_geocoding/fallback'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'fallbackQuery'); + t.end(); + + }); + + test('valid boundary.country search', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', querySize: 10, + layers: ['test'], + 'boundary.country': 'ABC' + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/component_geocoding/boundary_country'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search: valid boundary.country query'); + t.end(); + }); + + test('valid sources filter', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + 'text': 'test', + 'sources': ['test_source'] + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/component_geocoding/with_source_filtering'); + + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search: valid search query with source filtering'); + t.end(); + }); + +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('component_geocoding query ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/query/text_parser.js b/test/unit/query/text_parser.js index 34830c7f..839ddfbd 100644 --- a/test/unit/query/text_parser.js +++ b/test/unit/query/text_parser.js @@ -21,6 +21,7 @@ module.exports.tests.query = function(test, common) { t.false(vs.isset('input:category')); t.false(vs.isset('input:housenumber')); t.false(vs.isset('input:street')); + t.false(vs.isset('input:address')); t.false(vs.isset('input:neighbourhood')); t.false(vs.isset('input:borough')); t.false(vs.isset('input:postcode')); @@ -38,6 +39,7 @@ module.exports.tests.query = function(test, common) { category: 'category value', number: 'number value', street: 'street value', + address: 'address value', neighbourhood: 'neighbourhood value', borough: 'borough value', postalcode: 'postalcode value', @@ -54,6 +56,7 @@ module.exports.tests.query = function(test, common) { t.equals(vs.var('input:category').toString(), 'category value'); t.equals(vs.var('input:housenumber').toString(), 'number value'); t.equals(vs.var('input:street').toString(), 'street value'); + t.equals(vs.var('input:address').toString(), 'address value'); t.equals(vs.var('input:neighbourhood').toString(), 'neighbourhood value'); t.equals(vs.var('input:borough').toString(), 'borough value'); t.equals(vs.var('input:postcode').toString(), 'postalcode value'); diff --git a/test/unit/run.js b/test/unit/run.js index 3a8114f0..6ef7d417 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -30,8 +30,10 @@ var tests = [ require('./middleware/sendJSON'), require('./middleware/normalizeParentIds'), require('./middleware/trimByGranularity'), + require('./middleware/trimByGranularityComponent'), require('./query/autocomplete'), require('./query/autocomplete_defaults'), + require('./query/component_geocoding'), require('./query/search_defaults'), require('./query/reverse_defaults'), require('./query/reverse'), @@ -44,11 +46,13 @@ var tests = [ require('./sanitizer/_geo_reverse'), require('./sanitizer/_groups'), require('./sanitizer/_ids'), + require('./sanitizer/_iso2_to_iso3'), require('./sanitizer/_layers'), require('./sanitizer/_single_scalar_parameters'), require('./sanitizer/_size'), require('./sanitizer/_sources'), require('./sanitizer/_sources_and_layers'), + require('./sanitizer/_synthesize_analysis'), require('./sanitizer/_text'), require('./sanitizer/_text_addressit'), require('./sanitizer/_tokenizer'), @@ -57,6 +61,7 @@ var tests = [ require('./sanitizer/nearby'), require('./src/backend'), require('./sanitizer/autocomplete'), + require('./sanitizer/component_geocoding'), require('./sanitizer/place'), require('./sanitizer/reverse'), require('./sanitizer/sanitizeAll'), diff --git a/test/unit/sanitizer/_iso2_to_iso3.js b/test/unit/sanitizer/_iso2_to_iso3.js new file mode 100644 index 00000000..7d27499b --- /dev/null +++ b/test/unit/sanitizer/_iso2_to_iso3.js @@ -0,0 +1,112 @@ +const sanitizer = require('../../../sanitizer/_iso2_to_iso3'); + +module.exports.tests = {}; + +module.exports.tests.text_parser = function(test, common) { + test('clean without parsed_text should not throw exception', function(t) { + const raw = {}; + + const clean = { + }; + + const expected_clean = { + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('country with known iso2 should be converted to iso3', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + address: 'address value', + country: 'tH' + } + }; + + const expected_clean = { + parsed_text: { + address: 'address value', + country: 'THA' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('country with unknown iso2 should be unchanged', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + address: 'address value', + country: 'TB' + } + }; + + const expected_clean = { + parsed_text: { + address: 'address value', + country: 'TB' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('undefined country should be unchanged', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + address: 'address value', + country: undefined + } + }; + + const expected_clean = { + parsed_text: { + address: 'address value', + country: undefined + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('sanitizer _iso2_to_iso3: ' + name, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/sanitizer/_synthesize_analysis.js b/test/unit/sanitizer/_synthesize_analysis.js new file mode 100644 index 00000000..d137ad49 --- /dev/null +++ b/test/unit/sanitizer/_synthesize_analysis.js @@ -0,0 +1,126 @@ +const sanitizer = require('../../../sanitizer/_synthesize_analysis'); +const _ = require('lodash'); + +module.exports.tests = {}; + +module.exports.tests.text_parser = function(test, common) { + test('all variables should be parsed', function(t) { + const raw = { + query: ' \t query \t value \t ', + address: ' \t address \t value \t ', + neighbourhood: ' \t neighbourhood \t value \t ', + borough: ' \t borough \t value \t ', + city: ' \t city \t value \t ', + county: ' \t county \t value \t ', + state: ' \t state \t value \t ', + postalcode: ' \t postalcode \t value \t ', + country: ' \t country \t value \t ' + }; + + const clean = {}; + + const expected_clean = { + parsed_text: { + address: 'address value', + neighbourhood: 'neighbourhood value', + borough: 'borough value', + city: 'city value', + county: 'county value', + state: 'state value', + postalcode: 'postalcode value', + country: 'country value' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('non-string and blank string values should be treated as not supplied', function(t) { + // helper to return a random value that's considered invalid + function getInvalidValue() { + return _.sample([{}, [], false, '', ' \t ', 17, undefined]); + } + + const raw = { + address: getInvalidValue(), + neighbourhood: getInvalidValue(), + borough: getInvalidValue(), + city: getInvalidValue(), + county: getInvalidValue(), + state: getInvalidValue(), + postalcode: getInvalidValue(), + country: getInvalidValue() + }; + + const clean = {}; + + const expected_clean = { + parsed_text: {} + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, ['at least one of the following fields is required: ' + + 'address, neighbourhood, borough, city, county, state, postalcode, country'], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('no supplied fields should return error', function(t) { + const raw = {}; + + const clean = {}; + + const expected_clean = { parsed_text: {} }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, ['at least one of the following fields is required: ' + + 'address, neighbourhood, borough, city, county, state, postalcode, country'], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('postalcode-only parsed_text should return error', function(t) { + const raw = { + postalcode: 'postalcode value' + }; + + const clean = {}; + + const expected_clean = { + parsed_text: { + postalcode: 'postalcode value' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, ['postalcode-only inputs are not supported'], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('sanitizer _synthesize_analysis: ' + name, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/sanitizer/component_geocoding.js b/test/unit/sanitizer/component_geocoding.js new file mode 100644 index 00000000..6477b738 --- /dev/null +++ b/test/unit/sanitizer/component_geocoding.js @@ -0,0 +1,118 @@ +var proxyquire = require('proxyquire').noCallThru(); + +module.exports.tests = {}; + +module.exports.tests.sanitize = function(test, common) { + test('verify that all sanitizers were called as expected', function(t) { + var called_sanitizers = []; + + // rather than re-verify the functionality of all the sanitizers, this test just verifies that they + // were all called correctly + var search = proxyquire('../../../sanitizer/component_geocoding', { + '../sanitizer/_deprecate_quattroshapes': function() { + called_sanitizers.push('_deprecate_quattroshapes'); + return { errors: [], warnings: [] }; + }, + '../sanitizer/_single_scalar_parameters': function() { + called_sanitizers.push('_single_scalar_parameters'); + return { errors: [], warnings: [] }; + }, + '../sanitizer/_synthesize_analysis': function() { + called_sanitizers.push('_synthesize_analysis'); + return { errors: [], warnings: [] }; + }, + '../sanitizer/_iso2_to_iso3': function() { + called_sanitizers.push('_iso2_to_iso3'); + return { errors: [], warnings: [] }; + }, + '../sanitizer/_size': function() { + if (arguments.length === 0) { + return function() { + called_sanitizers.push('_size'); + return { errors: [], warnings: [] }; + }; + + } else { + throw new Error('should not have passed any parameters to _size'); + } + + }, + '../sanitizer/_targets': function(type) { + if (['layers', 'sources'].indexOf(type) !== -1) { + return function() { + called_sanitizers.push('_targets/' + type); + return { errors: [], warnings: [] }; + }; + + } + else { + throw new Error('incorrect parameters passed to _targets'); + } + + }, + '../sanitizer/_sources_and_layers': function() { + called_sanitizers.push('_sources_and_layers'); + return { errors: [], warnings: [] }; + }, + '../sanitizer/_flag_bool': function() { + if (arguments[0] === 'private' && arguments[1] === false) { + return function() { + called_sanitizers.push('_flag_bool'); + return { errors: [], warnings: [] }; + }; + + } + else { + throw new Error('incorrect parameters passed to _flag_bool'); + } + + }, + '../sanitizer/_geo_search': function() { + called_sanitizers.push('_geo_search'); + return { errors: [], warnings: [] }; + }, + '../sanitizer/_boundary_country': function() { + called_sanitizers.push('_boundary_country'); + return { errors: [], warnings: [] }; + }, + '../sanitizer/_categories': function() { + called_sanitizers.push('_categories'); + return { errors: [], warnings: [] }; + }, + }); + + var expected_sanitizers = [ + '_deprecate_quattroshapes', + '_single_scalar_parameters', + '_synthesize_analysis', + '_iso2_to_iso3', + '_size', + '_targets/layers', + '_targets/sources', + '_sources_and_layers', + '_flag_bool', + '_geo_search', + '_boundary_country', + '_categories' + ]; + + var req = {}; + var res = {}; + + search.middleware(req, res, function(){ + t.deepEquals(called_sanitizers, expected_sanitizers); + t.end(); + }); + }); +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('SANTIZE /component ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +};