diff --git a/controller/search.js b/controller/search.js index 271d2899..39183fdf 100644 --- a/controller/search.js +++ b/controller/search.js @@ -17,6 +17,13 @@ function setup( config, backend, query ){ return next(); } + // do not run controller if there are already results + // this was added during libpostal integration. if the libpostal parse/query + // doesn't return anything then fallback to old search-engine-y behavior + if (res && res.hasOwnProperty('data') && res.data.length > 0) { + return next(); + } + var cleanOutput = _.cloneDeep(req.clean); if (logging.isDNT(req)) { cleanOutput = logging.removeFields(cleanOutput); @@ -24,11 +31,18 @@ function setup( config, backend, query ){ // log clean parameters for stats logger.info('[req]', 'endpoint=' + req.path, cleanOutput); + var renderedQuery = query(req.clean); + + // if there's no query to call ES with, skip the service + if (_.isUndefined(renderedQuery)) { + return next(); + } + // backend command var cmd = { index: config.indexName, searchType: 'dfs_query_then_fetch', - body: query( req.clean ) + body: renderedQuery.body }; logger.debug( '[ES req]', cmd ); @@ -47,7 +61,9 @@ function setup( config, backend, query ){ // set response data else { res.data = docs; - res.meta = meta; + res.meta = meta || {}; + // store the query_type for subsequent middleware + res.meta.query_type = renderedQuery.type; } logger.debug('[ES response]', docs); next(); diff --git a/helper/diffPlaces.js b/helper/diffPlaces.js new file mode 100644 index 00000000..712c7959 --- /dev/null +++ b/helper/diffPlaces.js @@ -0,0 +1,177 @@ +var _ = require('lodash'); +var placeTypes = require('./placeTypes'); + +/** + * Compare the layer properties if they exist. + * Returns false if the objects are the same, and throws + * an exception with the message 'different' if not. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function assertLayerMatch(item1, item2) { + if (item1.layer === item2.layer) { + return false; + } + + throw new Error('different'); +} + +/** + * Compare the parent.*_id properties if they exist. + * Returns false if the objects are the same, and throws + * an exception with the message 'different' if not. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function assertParentHierarchyMatch(item1, item2) { + // if neither object has parent, assume same + if (!item1.hasOwnProperty('parent') && !item2.hasOwnProperty('parent')) { + return false; + } + + // if both have parent, do the rest of the checking + if (item1.hasOwnProperty('parent') && item2.hasOwnProperty('parent')) { + placeTypes.forEach(function (placeType) { + // don't consider its own id + if (placeType === item1.layer) { + return; + } + propMatch(item1.parent, item2.parent, placeType + '_id'); + }); + return false; + } + + // if one has parent and the other doesn't consider different + throw new Error('different'); +} + +/** + * Compare the name.* properties if they exist. + * Returns false if the objects are the same, and throws + * an exception with the message 'different' if not. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function assertNameMatch(item1, item2) { + if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) { + for (var lang in item1.name) { + if(item2.name.hasOwnProperty(lang) || lang === 'default') { + // do not consider absence of an additional name as a difference + propMatch(item1.name, item2.name, lang); + } + } + } + else { + propMatch(item1, item2, 'name'); + } +} + +/** + * Compare the address_parts properties if they exist. + * Returns false if the objects are the same, and throws + * an exception with the message 'different' if not. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function assertAddressMatch(item1, item2) { + // if neither record has address, assume same + if (!item1.hasOwnProperty('address_parts') && !item2.hasOwnProperty('address_parts')) { + return false; + } + + // if both have address, check parts + if (item1.hasOwnProperty('address_parts') && item2.hasOwnProperty('address_parts')) { + propMatch(item1.address_parts, item2.address_parts, 'number'); + propMatch(item1.address_parts, item2.address_parts, 'street'); + + // only compare zip if both records have it, otherwise just ignore and assume it's the same + // since by this time we've already compared parent hierarchies + if (item1.address_parts.hasOwnProperty('zip') && item2.address_parts.hasOwnProperty('zip')) { + propMatch(item1.address_parts, item2.address_parts, 'zip'); + } + + return false; + } + + // one has address and the other doesn't, different! + throw new Error('different'); +} + +/** + * Compare the two records and return true if they differ and false if same. + * + * @param {object} item1 + * @param {object} item2 + * @returns {boolean} + * @throws {Error} + */ +function isDifferent(item1, item2) { + try { + assertLayerMatch(item1, item2); + assertParentHierarchyMatch(item1, item2); + assertNameMatch(item1, item2); + assertAddressMatch(item1, item2); + } + catch (err) { + if (err.message === 'different') { + return true; + } + throw err; + } + + return false; +} + +/** + * Throw exception if properties are different + * + * @param {object} item1 + * @param {object} item2 + * @param {string} prop + * @throws {Error} + */ +function propMatch(item1, item2, prop) { + var prop1 = item1[prop]; + var prop2 = item2[prop]; + + // in the case the property is an array (currently only in parent schema) + // simply take the 1st item. this will change in the near future to support multiple hierarchies + if (_.isArray(prop1)) { prop1 = prop1[0]; } + if (_.isArray(prop2)) { prop2 = prop2[0]; } + + if (normalizeString(prop1) !== normalizeString(prop2)) { + throw new Error('different'); + } +} + +/** + * Remove punctuation and lowercase + * + * @param {string} str + * @returns {string} + */ +function normalizeString(str) { + if (!_.isString(str)) { + return str; + } + + if (_.isEmpty(str)) { + return ''; + } + + return str.toLowerCase().split(/[ ,-]+/).join(' '); +} + +module.exports.isDifferent = isDifferent; \ No newline at end of file diff --git a/helper/geojsonify_place_details.js b/helper/geojsonify_place_details.js index 297ca3bd..b936cd41 100644 --- a/helper/geojsonify_place_details.js +++ b/helper/geojsonify_place_details.js @@ -8,7 +8,9 @@ var DETAILS_PROPS = [ { name: 'street', type: 'string' }, { name: 'postalcode', type: 'string' }, { name: 'confidence', type: 'default' }, + { name: 'match_type', type: 'string' }, { name: 'distance', type: 'default' }, + { name: 'accuracy', type: 'string' }, { name: 'country', type: 'string' }, { name: 'country_gid', type: 'string' }, { name: 'country_a', type: 'string' }, diff --git a/helper/type_mapping.js b/helper/type_mapping.js index b46751c3..c43a96bb 100644 --- a/helper/type_mapping.js +++ b/helper/type_mapping.js @@ -51,7 +51,7 @@ var LAYERS_BY_SOURCE = { 'locality','borough', 'neighbourhood', 'venue' ], whosonfirst: [ 'continent', 'country', 'dependency', 'macroregion', 'region', 'locality', 'localadmin', 'macrocounty', 'county', 'macrohood', 'borough', - 'neighbourhood', 'microhood', 'disputed'] + 'neighbourhood', 'microhood', 'disputed', 'venue'] }; /* @@ -60,7 +60,9 @@ var LAYERS_BY_SOURCE = { * may have layers that mean the same thing but have a different name */ var LAYER_ALIASES = { - 'coarse': LAYERS_BY_SOURCE.whosonfirst + 'coarse': [ 'continent', 'country', 'dependency', 'macroregion', 'region', + 'locality', 'localadmin', 'macrocounty', 'county', 'macrohood', 'borough', + 'neighbourhood', 'microhood', 'disputed'] }; // create a list of all layers by combining each entry from LAYERS_BY_SOURCE diff --git a/index.js b/index.js index 8df79add..42116f53 100644 --- a/index.js +++ b/index.js @@ -2,7 +2,8 @@ var cluster = require('cluster'), app = require('./app'), port = ( process.env.PORT || 3100 ), - multicore = true; + // when pelias/api#601 is done this can be changed to `true` + multicore = false; /** cluster webserver across all cores **/ if( multicore ){ diff --git a/middleware/accuracy.js b/middleware/accuracy.js new file mode 100644 index 00000000..519973c3 --- /dev/null +++ b/middleware/accuracy.js @@ -0,0 +1,57 @@ +/** + * + * Accuracy level should be set for each item in the results. + * The level can be any of the following: + * - point + * - interpolated + * - centroid + */ + +var check = require('check-types'); + +var accuracyLevelPoint = 'point'; +var accuracyLevelInterpolated = 'interpolated'; +var accuracyLevelCentroid = 'centroid'; + + +function setup() { + return computeAccuracy; +} + +function computeAccuracy(req, res, next) { + // do nothing if no result data set + if (check.undefined(res) || check.undefined(res.data)) { + return next(); + } + + // loop through data items and determine accuracy levels + res.data = res.data.map(computeAccuracyLevelForResult); + + next(); +} + +/** + * Determine accuracy level based on the type of result being returned. + * + * @param {object} hit + * @returns {object} + */ +function computeAccuracyLevelForResult(hit) { + + // TODO: add a check for interpolated addresses when that feature lands + + switch (hit.layer) { + case 'venue': + case 'address': + hit.accuracy = accuracyLevelPoint; + break; + // this means it's a street or admin area + default: + hit.accuracy = accuracyLevelCentroid; + break; + } + + return hit; +} + +module.exports = setup; diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js index 2e9eb1c6..8f5b61fb 100644 --- a/middleware/confidenceScore.js +++ b/middleware/confidenceScore.js @@ -25,9 +25,10 @@ function setup(peliasConfig) { } function computeScores(req, res, next) { - // do nothing if no result data set + // do nothing if no result data set or if query is not of the original variety if (check.undefined(req.clean) || check.undefined(res) || - check.undefined(res.data) || check.undefined(res.meta)) { + check.undefined(res.data) || check.undefined(res.meta) || + res.meta.query_type !== 'original') { return next(); } diff --git a/middleware/confidenceScoreFallback.js b/middleware/confidenceScoreFallback.js new file mode 100644 index 00000000..8593ed60 --- /dev/null +++ b/middleware/confidenceScoreFallback.js @@ -0,0 +1,118 @@ +/** + * + * Basic confidence score should be computed and returned for each item in the results. + * The score should range between 0-1, and take into consideration as many factors as possible. + * + * Some factors to consider: + * + * - number of results from ES + * - fallback status (aka layer match between expected and actual) + */ + +var check = require('check-types'); +var logger = require('pelias-logger').get('api-confidence'); + +function setup() { + return computeScores; +} + +function computeScores(req, res, next) { + // do nothing if no result data set or if the query is not of the fallback variety + // later add disambiguation to this list + if (check.undefined(req.clean) || check.undefined(res) || + check.undefined(res.data) || check.undefined(res.meta) || + res.meta.query_type !== 'fallback') { + return next(); + } + + // loop through data items and determine confidence scores + res.data = res.data.map(computeConfidenceScore.bind(null, req)); + + next(); +} + +/** + * Check all types of things to determine how confident we are that this result + * is correct. + * + * @param {object} req + * @param {object} hit + * @returns {object} + */ +function computeConfidenceScore(req, hit) { + + // if parsed text doesn't exist, which it never should, just assign a low confidence and move on + if (!req.clean.hasOwnProperty('parsed_text')) { + hit.confidence = 0.1; + hit.match_type = 'unknown'; + return hit; + } + + // start with a confidence level of 1 because we trust ES queries to be accurate + hit.confidence = 1.0; + + // in the case of fallback there might be deductions + hit.confidence *= checkFallbackLevel(req, hit); + + // truncate the precision + hit.confidence = Number((hit.confidence).toFixed(3)); + + return hit; +} + +function checkFallbackLevel(req, hit) { + if (checkFallbackOccurred(req, hit)) { + hit.match_type = 'fallback'; + + // if we know a fallback occurred, deduct points based on layer granularity + switch (hit.layer) { + case 'venue': + case 'address': + logger.warn('Fallback scenarios should not result in address or venue records!', req.clean.parsed_text); + return 0.8; + case 'street': + return 0.8; + case 'locality': + case 'borough': + case 'neighbourhood': + return 0.6; + case 'macrocounty': + case 'county': + case 'localadmin': + return 0.4; + case 'region': + return 0.3; + case 'country': + case 'dependency': + case 'macroregion': + return 0.1; + default: + return 0.1; + } + } + + hit.match_type = 'exact'; + return 1.0; +} + +function checkFallbackOccurred(req, hit) { + // at this time we only do this for address queries, so keep this simple + // TODO: add other layer checks once we start handling disambiguation + + return (requestedAddress(req) && hit.layer !== 'address') || + (requestedStreet(req) && hit.layer !== 'street'); +} + +function requestedAddress(req) { + // house number and street name were specified + return req.clean.parsed_text.hasOwnProperty('number') && + req.clean.parsed_text.hasOwnProperty('street'); +} + +function requestedStreet(req) { + // only street name was specified + return !req.clean.parsed_text.hasOwnProperty('number') && + req.clean.parsed_text.hasOwnProperty('street'); +} + +module.exports = setup; diff --git a/middleware/dedupe.js b/middleware/dedupe.js index 786675c4..b40f1806 100644 --- a/middleware/dedupe.js +++ b/middleware/dedupe.js @@ -1,5 +1,6 @@ var logger = require('pelias-logger').get('api'); var _ = require('lodash'); +var isDifferent = require('../helper/diffPlaces').isDifferent; function setup() { return dedupeResults; @@ -19,7 +20,7 @@ function dedupeResults(req, res, next) { uniqueResults.push(hit); } else { - logger.info('[dupe]', { query: req.clean.text, hit: hit.name.default }); + logger.info('[dupe]', { query: req.clean.text, hit: hit.name.default + ' ' + hit.source + ':' + hit._id }); } // stop looping when requested size has been reached in uniqueResults @@ -31,89 +32,4 @@ function dedupeResults(req, res, next) { next(); } -/** - * @param {object} item1 - * @param {object} item2 - * @returns {boolean} - * @throws {Error} - */ -function isDifferent(item1, item2) { - try { - if (item1.hasOwnProperty('parent') && item2.hasOwnProperty('parent')) { - propMatch(item1.parent, item2.parent, 'region_a'); - propMatch(item1.parent, item2.parent, 'country'); - propMatch(item1.parent, item2.parent, 'locality'); - propMatch(item1.parent, item2.parent, 'neighbourhood'); - } - else if (item1.parent !== item2.parent) { - throw new Error('different'); - } - - if (item1.hasOwnProperty('name') && item2.hasOwnProperty('name')) { - for (var lang in item1.name) { - if(item2.name[lang] || lang === 'default') { - // do not consider absence of an additional name as a difference - propMatch(item1.name, item2.name, lang); - } - } - } - else { - propMatch(item1, item2, 'name'); - } - - if (item1.hasOwnProperty('address_parts') && item2.hasOwnProperty('address_parts')) { - propMatch(item1.address_parts, item2.address_parts, 'number'); - propMatch(item1.address_parts, item2.address_parts, 'street'); - propMatch(item1.address_parts, item2.address_parts, 'zip'); - } - else if (item1.address_parts !== item2.address_parts) { - throw new Error('different'); - } - } - catch (err) { - if (err.message === 'different') { - return true; - } - throw err; - } - - return false; -} - -/** - * Throw exception if properties are different - * - * @param {object} item1 - * @param {object} item2 - * @param {string} prop - * @throws {Error} - */ -function propMatch(item1, item2, prop) { - var prop1 = item1[prop]; - var prop2 = item2[prop]; - - // in the case the property is an array (currently only in parent schema) - // simply take the 1st item. this will change in the near future to support multiple hierarchies - if (_.isArray(prop1)) { prop1 = prop1[0]; } - if (_.isArray(prop2)) { prop2 = prop2[0]; } - - if (normalizeString(prop1) !== normalizeString(prop2)) { - throw new Error('different'); - } -} - -/** - * Remove punctuation and lowercase - * - * @param {string} str - * @returns {string} - */ -function normalizeString(str) { - if (!str) { - return ''; - } - return str.toLowerCase().split(/[ ,-]+/).join(' '); -} - - module.exports = setup; diff --git a/middleware/geocodeJSON.js b/middleware/geocodeJSON.js index f4ee8c20..3b5170dd 100644 --- a/middleware/geocodeJSON.js +++ b/middleware/geocodeJSON.js @@ -1,6 +1,7 @@ var url = require('url'); var extend = require('extend'); var geojsonify = require('../helper/geojsonify'); +var _ = require('lodash'); /** * Returns a middleware function that converts elasticsearch @@ -79,7 +80,8 @@ function convertToGeocodeJSON(req, res, next, opts) { function addMessages(req, msgType, geocoding) { if (req.hasOwnProperty(msgType) && req[msgType].length) { - geocoding[msgType] = req[msgType]; + // cleanup arrays to make sure there are no duplicates + geocoding[msgType] = _.uniq(req[msgType]); } } diff --git a/middleware/trimByGranularity.js b/middleware/trimByGranularity.js new file mode 100644 index 00000000..da4ef4f9 --- /dev/null +++ b/middleware/trimByGranularity.js @@ -0,0 +1,74 @@ +var _ = require('lodash'); + +// This middleware component trims the results array by granularity when +// FallbackQuery was used. FallbackQuery is used for inputs like +// `1090 N Charlotte St, Lancaster, PA` where the address may not exist and +// we must fall back to trying `Lancaster, PA`. If the address does exist then +// FallbackQuery will return results for: +// - address+city+state +// - city+state +// - state +// +// Because the address matched, we're not interested in city+state or state, so +// this component removes results that aren't the most granular. + +// layers in increasing order of granularity +var layers = [ + 'venue', + 'address', + 'street', + 'neighbourhood', + 'borough', + 'locality', + 'localadmin', + 'county', + 'macrocounty', + 'region', + 'macroregion', + 'dependency', + 'country' +]; + +// this helper method returns `true` if every result has a matched_query +// starting with `fallback.` +function isFallbackQuery(results) { + return results.every(function(result) { + return result.hasOwnProperty('_matched_queries') && + !_.isEmpty(result._matched_queries) && + _.startsWith(result._matched_queries[0], 'fallback.'); + }); +} + +function hasRecordsAtLayers(results, layer) { + return results.some(function(result) { + return result._matched_queries[0] === 'fallback.' + layer; + }); +} + +function retainRecordsAtLayers(results, layer) { + return results.filter(function(result) { + return result._matched_queries[0] === 'fallback.' + layer; + }); +} + +function setup() { + return function trim(req, res, next) { + // don't do anything if there are no results or there are non-fallback.* named queries + // there should never be a mixture of fallback.* and non-fallback.* named queries + if (_.isUndefined(res.data) || !isFallbackQuery(res.data)) { + return next(); + } + + // start at the most granular possible layer. if there are results at a layer + // then remove everything not at that layer. + layers.forEach(function(layer) { + if (hasRecordsAtLayers(res.data, layer )) { + res.data = retainRecordsAtLayers(res.data, layer); + } + }); + + next(); + }; +} + +module.exports = setup; diff --git a/package.json b/package.json index 31c8d36f..8d1025c9 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "node": ">=0.10.26" }, "dependencies": { + "addressit": "1.4.0", "async": "^2.0.0", "check-types": "^7.0.0", "elasticsearch": "^11.0.0", @@ -42,7 +43,7 @@ "express": "^4.8.8", "express-http-proxy": "^0.7.0", "extend": "3.0.0", - "geojson": "^0.3.0", + "geojson": "^0.4.0", "geojson-extent": "^0.3.1", "geolib": "^2.0.18", "geopipes-elasticsearch-backend": "^0.2.0", @@ -54,7 +55,7 @@ "pelias-config": "2.1.0", "pelias-logger": "0.0.8", "pelias-model": "4.2.0", - "pelias-query": "8.5.0", + "pelias-query": "8.6.0", "pelias-text-analyzer": "1.3.0", "stats-lite": "2.0.3", "through2": "2.0.1" @@ -66,7 +67,7 @@ "jshint": "^2.5.6", "nsp": "^2.2.0", "precommit-hook": "^3.0.0", - "proxyquire": "^1.7.7", + "proxyquire": "^1.7.10", "source-map": "^0.5.6", "tap-dot": "1.0.5", "tape": "^4.5.1", diff --git a/query/autocomplete.js b/query/autocomplete.js index 33f394f5..d0e766f3 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -1,7 +1,7 @@ var peliasQuery = require('pelias-query'), defaults = require('./autocomplete_defaults'), - textParser = require('./text_parser'), + textParser = require('./text_parser_addressit'), check = require('check-types'); // additional views (these may be merged in to pelias/query at a later date) @@ -114,7 +114,10 @@ function generateQuery( clean ){ textParser( clean.parsed_text, vs ); } - return query.render( vs ); + return { + type: 'autocomplete', + body: query.render(vs) + }; } -module.exports = generateQuery; +module.exports = generateQuery; \ No newline at end of file diff --git a/query/reverse.js b/query/reverse.js index 930cba49..8cb2fa44 100644 --- a/query/reverse.js +++ b/query/reverse.js @@ -71,7 +71,10 @@ function generateQuery( clean ){ vs.var('input:categories', clean.categories); } - return query.render( vs ); + return { + type: 'reverse', + body: query.render(vs) + }; } module.exports = generateQuery; diff --git a/query/search.js b/query/search.js index 5ab96248..5f7f3e34 100644 --- a/query/search.js +++ b/query/search.js @@ -3,49 +3,36 @@ var peliasQuery = require('pelias-query'), textParser = require('./text_parser'), check = require('check-types'); -var placeTypes = require('../helper/placeTypes'); - -// region_a is also an admin field. addressit tries to detect -// region_a, in which case we use a match query specifically for it. -// but address it doesn't know about all of them so it helps to search -// against this with the other admin parts as a fallback -var adminFields = placeTypes.concat(['region_a']); - //------------------------------ // general-purpose search query //------------------------------ -var query = new peliasQuery.layout.FilteredBooleanQuery(); - -// mandatory matches -query.score( peliasQuery.view.boundary_country, 'must' ); -query.score( peliasQuery.view.ngrams, 'must' ); +var fallbackQuery = new peliasQuery.layout.FallbackQuery(); +var geodisambiguationQuery = new peliasQuery.layout.GeodisambiguationQuery(); // scoring boost -query.score( peliasQuery.view.phrase ); -query.score( peliasQuery.view.focus( peliasQuery.view.phrase ) ); -query.score( peliasQuery.view.popularity( peliasQuery.view.phrase ) ); -query.score( peliasQuery.view.population( peliasQuery.view.phrase ) ); - -// address components -query.score( peliasQuery.view.address('housenumber') ); -query.score( peliasQuery.view.address('street') ); -query.score( peliasQuery.view.address('postcode') ); - -// admin components -// country_a and region_a are left as matches here because the text-analyzer -// can sometimes detect them, in which case a query more specific than a -// multi_match is appropriate. -query.score( peliasQuery.view.admin('country_a') ); -query.score( peliasQuery.view.admin('region_a') ); -query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') ); +fallbackQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); +fallbackQuery.score( peliasQuery.view.popularity_only_function ); +fallbackQuery.score( peliasQuery.view.population_only_function ); -// non-scoring hard filters -query.filter( peliasQuery.view.boundary_circle ); -query.filter( peliasQuery.view.boundary_rect ); -query.filter( peliasQuery.view.sources ); -query.filter( peliasQuery.view.layers ); -query.filter( peliasQuery.view.categories ); +geodisambiguationQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) ); +geodisambiguationQuery.score( peliasQuery.view.popularity_only_function ); +geodisambiguationQuery.score( peliasQuery.view.population_only_function ); +// -------------------------------- +// non-scoring hard filters +fallbackQuery.filter( peliasQuery.view.boundary_country ); +fallbackQuery.filter( peliasQuery.view.boundary_circle ); +fallbackQuery.filter( peliasQuery.view.boundary_rect ); +fallbackQuery.filter( peliasQuery.view.sources ); +fallbackQuery.filter( peliasQuery.view.layers ); +fallbackQuery.filter( peliasQuery.view.categories ); + +geodisambiguationQuery.filter( peliasQuery.view.boundary_country ); +geodisambiguationQuery.filter( peliasQuery.view.boundary_circle ); +geodisambiguationQuery.filter( peliasQuery.view.boundary_rect ); +geodisambiguationQuery.filter( peliasQuery.view.sources ); +geodisambiguationQuery.filter( peliasQuery.view.layers ); +geodisambiguationQuery.filter( peliasQuery.view.categories ); // -------------------------------- /** @@ -125,7 +112,29 @@ function generateQuery( clean ){ textParser( clean.parsed_text, vs ); } - return query.render( vs ); + var q = getQuery(vs); + + //console.log(JSON.stringify(q, null, 2)); + + return q; +} + +function getQuery(vs) { + if (hasStreet(vs)) { + return { + type: 'fallback', + body: fallbackQuery.render(vs) + }; + } + + // returning undefined is a signal to a later step that the addressit-parsed + // query should be queried for + return undefined; + +} + +function hasStreet(vs) { + return vs.isset('input:street'); } -module.exports = generateQuery; +module.exports = generateQuery; \ No newline at end of file diff --git a/query/search_original.js b/query/search_original.js new file mode 100644 index 00000000..e4a0dc66 --- /dev/null +++ b/query/search_original.js @@ -0,0 +1,135 @@ +var peliasQuery = require('pelias-query'), + defaults = require('./search_defaults'), + textParser = require('./text_parser_addressit'), + check = require('check-types'); + +var placeTypes = require('../helper/placeTypes'); + +// region_a is also an admin field. addressit tries to detect +// region_a, in which case we use a match query specifically for it. +// but address it doesn't know about all of them so it helps to search +// against this with the other admin parts as a fallback +var adminFields = placeTypes.concat(['region_a']); + +//------------------------------ +// general-purpose search query +//------------------------------ +var query = new peliasQuery.layout.FilteredBooleanQuery(); + +// mandatory matches +query.score( peliasQuery.view.boundary_country, 'must' ); +query.score( peliasQuery.view.ngrams, 'must' ); + +// scoring boost +query.score( peliasQuery.view.phrase ); +query.score( peliasQuery.view.focus( peliasQuery.view.phrase ) ); +query.score( peliasQuery.view.popularity( peliasQuery.view.phrase ) ); +query.score( peliasQuery.view.population( peliasQuery.view.phrase ) ); + +// address components +query.score( peliasQuery.view.address('housenumber') ); +query.score( peliasQuery.view.address('street') ); +query.score( peliasQuery.view.address('postcode') ); + +// admin components +// country_a and region_a are left as matches here because the text-analyzer +// can sometimes detect them, in which case a query more specific than a +// multi_match is appropriate. +query.score( peliasQuery.view.admin('country_a') ); +query.score( peliasQuery.view.admin('region_a') ); +query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') ); + +// non-scoring hard filters +query.filter( peliasQuery.view.boundary_circle ); +query.filter( peliasQuery.view.boundary_rect ); +query.filter( peliasQuery.view.sources ); +query.filter( peliasQuery.view.layers ); +query.filter( peliasQuery.view.categories ); + +// -------------------------------- + +/** + map request variables to query variables for all inputs + provided by this HTTP request. +**/ +function generateQuery( clean ){ + + var vs = new peliasQuery.Vars( defaults ); + + // input text + vs.var( 'input:name', clean.text ); + + // sources + vs.var( 'sources', clean.sources); + + // layers + vs.var( 'layers', clean.layers); + + // categories + if (clean.categories) { + vs.var('input:categories', clean.categories); + } + + // size + if( clean.querySize ) { + vs.var( 'size', clean.querySize ); + } + + // focus point + if( check.number(clean['focus.point.lat']) && + check.number(clean['focus.point.lon']) ){ + vs.set({ + 'focus:point:lat': clean['focus.point.lat'], + 'focus:point:lon': clean['focus.point.lon'] + }); + } + + // boundary rect + if( check.number(clean['boundary.rect.min_lat']) && + check.number(clean['boundary.rect.max_lat']) && + check.number(clean['boundary.rect.min_lon']) && + check.number(clean['boundary.rect.max_lon']) ){ + vs.set({ + 'boundary:rect:top': clean['boundary.rect.max_lat'], + 'boundary:rect:right': clean['boundary.rect.max_lon'], + 'boundary:rect:bottom': clean['boundary.rect.min_lat'], + 'boundary:rect:left': clean['boundary.rect.min_lon'] + }); + } + + // boundary circle + // @todo: change these to the correct request variable names + if( check.number(clean['boundary.circle.lat']) && + check.number(clean['boundary.circle.lon']) ){ + vs.set({ + 'boundary:circle:lat': clean['boundary.circle.lat'], + 'boundary:circle:lon': clean['boundary.circle.lon'] + }); + + if( check.number(clean['boundary.circle.radius']) ){ + vs.set({ + 'boundary:circle:radius': Math.round( clean['boundary.circle.radius'] ) + 'km' + }); + } + } + + // boundary country + if( check.string(clean['boundary.country']) ){ + vs.set({ + 'boundary:country': clean['boundary.country'] + }); + } + + // run the address parser + if( clean.parsed_text ){ + textParser( clean.parsed_text, vs ); + } + + return { + type: 'original', + body: query.render(vs) + }; +} + + +module.exports = generateQuery; diff --git a/query/text_parser.js b/query/text_parser.js index 00e60724..c5b8da44 100644 --- a/query/text_parser.js +++ b/query/text_parser.js @@ -1,43 +1,19 @@ - var logger = require('pelias-logger').get('api'); -var placeTypes = require('../helper/placeTypes'); - -/* -This list should only contain admin fields we are comfortable matching in the case -when we can't identify parts of an address. This shouldn't contain fields like country_a -or postalcode because we should only try to match those when we're sure that's what they are. - */ -var adminFields = placeTypes.concat([ - 'region_a' -]); - -/** - @todo: refactor me -**/ // all the address parsing logic function addParsedVariablesToQueryVariables( parsed_text, vs ){ + // ==== add parsed matches [address components] ==== - // is it a street address? - var isStreetAddress = parsed_text.hasOwnProperty('number') && parsed_text.hasOwnProperty('street'); - if( isStreetAddress ){ - vs.var( 'input:name', parsed_text.number + ' ' + parsed_text.street ); - } - - // ? - else if( parsed_text.admin_parts ) { - vs.var( 'input:name', parsed_text.name ); + // query - Mexitaly, Sunoco, Lowes + if (parsed_text.hasOwnProperty('query')) { + vs.var('input:query', parsed_text.query); } - // ? - else { - logger.warn( 'chaos monkey asks: what happens now?' ); - logger.warn( parsed_text ); - try{ throw new Error(); } catch(e){ logger.warn( e.stack ); } // print a stack trace + // categories - restaurants, hotels, bars + if (parsed_text.hasOwnProperty('category')) { + vs.var('input:category', parsed_text.category); } - // ==== add parsed matches [address components] ==== - // house number if( parsed_text.hasOwnProperty('number') ){ vs.var( 'input:housenumber', parsed_text.number ); @@ -48,6 +24,16 @@ function addParsedVariablesToQueryVariables( parsed_text, vs ){ vs.var( 'input:street', parsed_text.street ); } + // neighbourhood + if (parsed_text.hasOwnProperty('neighbourhood')) { + vs.var( 'input:neighbourhood', parsed_text.neighbourhood); + } + + // borough + if (parsed_text.hasOwnProperty('borough')) { + vs.var( 'input:borough', parsed_text.borough); + } + // postal code if( parsed_text.hasOwnProperty('postalcode') ){ vs.var( 'input:postcode', parsed_text.postalcode ); @@ -57,43 +43,52 @@ function addParsedVariablesToQueryVariables( parsed_text, vs ){ // city if( parsed_text.hasOwnProperty('city') ){ - vs.var( 'input:county', parsed_text.city ); + vs.var( 'input:locality', parsed_text.city ); + } + + // county + if( parsed_text.hasOwnProperty('county') ){ + vs.var( 'input:county', parsed_text.county ); } // state if( parsed_text.hasOwnProperty('state') ){ - vs.var( 'input:region_a', parsed_text.state ); + vs.var( 'input:region', parsed_text.state ); } // country if( parsed_text.hasOwnProperty('country') ){ - vs.var( 'input:country_a', parsed_text.country ); + vs.var( 'input:country', parsed_text.country ); } - // ==== deal with the 'leftover' components ==== - // @todo: clean up this code - - // a concept called 'leftovers' which is just 'admin_parts' /or 'regions'. - var leftoversString = ''; - if( parsed_text.hasOwnProperty('admin_parts') ){ - leftoversString = parsed_text.admin_parts; - } - else if( parsed_text.hasOwnProperty('regions') ){ - leftoversString = parsed_text.regions.join(' '); + // libpostal sometimes parses addresses with prefix house numbers in places where + // the house number is normally postfix incorrectly, for instance: + // ```> 1 Grolmanstraße, Berlin, Germany + // + // Result: + // + // { + // "house": "1", + // "road": "grolmanstrasse", + // "state": "berlin", + // "country": "germany" + // }``` + // + // In libpostal parlance, `house` is just a query term, not the house number. + // This special case moves the query term to the house number field if there's a street, + // there's no house number, and the query is parseable as an integer, then use the + // query as the house number and blank out the query. + if (shouldSetQueryIntoHouseNumber(vs)) { + vs.var( 'input:housenumber', vs.var('input:query').toString()); + vs.unset( 'input:query' ); } - // if we have 'leftovers' then assign them to any fields which - // currently don't have a value assigned. - if( leftoversString.length ){ - - // cycle through fields and set fields which - // are still currently unset - adminFields.forEach( function( key ){ - if( !vs.isset( 'input:' + key ) ){ - vs.var( 'input:' + key, leftoversString ); - } - }); - } +} + +function shouldSetQueryIntoHouseNumber(vs) { + return !vs.isset('input:housenumber') && + vs.isset('input:street') && + /^[0-9]+$/.test(vs.var('input:query').toString()); } module.exports = addParsedVariablesToQueryVariables; diff --git a/query/text_parser_addressit.js b/query/text_parser_addressit.js new file mode 100644 index 00000000..00e60724 --- /dev/null +++ b/query/text_parser_addressit.js @@ -0,0 +1,99 @@ + +var logger = require('pelias-logger').get('api'); +var placeTypes = require('../helper/placeTypes'); + +/* +This list should only contain admin fields we are comfortable matching in the case +when we can't identify parts of an address. This shouldn't contain fields like country_a +or postalcode because we should only try to match those when we're sure that's what they are. + */ +var adminFields = placeTypes.concat([ + 'region_a' +]); + +/** + @todo: refactor me +**/ + +// all the address parsing logic +function addParsedVariablesToQueryVariables( parsed_text, vs ){ + + // is it a street address? + var isStreetAddress = parsed_text.hasOwnProperty('number') && parsed_text.hasOwnProperty('street'); + if( isStreetAddress ){ + vs.var( 'input:name', parsed_text.number + ' ' + parsed_text.street ); + } + + // ? + else if( parsed_text.admin_parts ) { + vs.var( 'input:name', parsed_text.name ); + } + + // ? + else { + logger.warn( 'chaos monkey asks: what happens now?' ); + logger.warn( parsed_text ); + try{ throw new Error(); } catch(e){ logger.warn( e.stack ); } // print a stack trace + } + + // ==== add parsed matches [address components] ==== + + // house number + if( parsed_text.hasOwnProperty('number') ){ + vs.var( 'input:housenumber', parsed_text.number ); + } + + // street name + if( parsed_text.hasOwnProperty('street') ){ + vs.var( 'input:street', parsed_text.street ); + } + + // postal code + if( parsed_text.hasOwnProperty('postalcode') ){ + vs.var( 'input:postcode', parsed_text.postalcode ); + } + + // ==== add parsed matches [admin components] ==== + + // city + if( parsed_text.hasOwnProperty('city') ){ + vs.var( 'input:county', parsed_text.city ); + } + + // state + if( parsed_text.hasOwnProperty('state') ){ + vs.var( 'input:region_a', parsed_text.state ); + } + + // country + if( parsed_text.hasOwnProperty('country') ){ + vs.var( 'input:country_a', parsed_text.country ); + } + + // ==== deal with the 'leftover' components ==== + // @todo: clean up this code + + // a concept called 'leftovers' which is just 'admin_parts' /or 'regions'. + var leftoversString = ''; + if( parsed_text.hasOwnProperty('admin_parts') ){ + leftoversString = parsed_text.admin_parts; + } + else if( parsed_text.hasOwnProperty('regions') ){ + leftoversString = parsed_text.regions.join(' '); + } + + // if we have 'leftovers' then assign them to any fields which + // currently don't have a value assigned. + if( leftoversString.length ){ + + // cycle through fields and set fields which + // are still currently unset + adminFields.forEach( function( key ){ + if( !vs.isset( 'input:' + key ) ){ + vs.var( 'input:' + key, leftoversString ); + } + }); + } +} + +module.exports = addParsedVariablesToQueryVariables; diff --git a/routes/v1.js b/routes/v1.js index 674c1f5c..362ed2cd 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -7,6 +7,7 @@ var sanitisers = { autocomplete: require('../sanitiser/autocomplete'), place: require('../sanitiser/place'), search: require('../sanitiser/search'), + search_fallback: require('../sanitiser/search_fallback'), reverse: require('../sanitiser/reverse'), nearby: require('../sanitiser/nearby') }; @@ -25,12 +26,20 @@ var controllers = { status: require('../controller/status') }; +var queries = { + libpostal: require('../query/search'), + fallback_to_old_prod: require('../query/search_original') +}; + /** ----------------------- controllers ----------------------- **/ var postProc = { + trimByGranularity: require('../middleware/trimByGranularity'), distances: require('../middleware/distance'), confidenceScores: require('../middleware/confidenceScore'), + confidenceScoresFallback: require('../middleware/confidenceScoreFallback'), confidenceScoresReverse: require('../middleware/confidenceScoreReverse'), + accuracy: require('../middleware/accuracy'), dedupe: require('../middleware/dedupe'), localNamingConventions: require('../middleware/localNamingConventions'), renamePlacenames: require('../middleware/renamePlacenames'), @@ -62,10 +71,18 @@ function addRoutes(app, peliasConfig) { search: createRouter([ sanitisers.search.middleware, middleware.calcSize(), - controllers.search(peliasConfig), + // 2nd parameter is `backend` which gets initialized internally + // 3rd parameter is which query module to use, use fallback/geodisambiguation + // first, then use original search strategy if first query didn't return anything + controllers.search(peliasConfig, undefined, queries.libpostal), + sanitisers.search_fallback.middleware, + controllers.search(peliasConfig, undefined, queries.fallback_to_old_prod), + postProc.trimByGranularity(), postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig), + postProc.confidenceScoresFallback(), postProc.dedupe(), + postProc.accuracy(), postProc.localNamingConventions(), postProc.renamePlacenames(), postProc.parseBoundingBox(), @@ -79,6 +96,7 @@ function addRoutes(app, peliasConfig) { postProc.distances('focus.point.'), postProc.confidenceScores(peliasConfig), postProc.dedupe(), + postProc.accuracy(), postProc.localNamingConventions(), postProc.renamePlacenames(), postProc.parseBoundingBox(), @@ -95,6 +113,7 @@ function addRoutes(app, peliasConfig) { // so it must be calculated first postProc.confidenceScoresReverse(), postProc.dedupe(), + postProc.accuracy(), postProc.localNamingConventions(), postProc.renamePlacenames(), postProc.parseBoundingBox(), @@ -111,6 +130,7 @@ function addRoutes(app, peliasConfig) { // so it must be calculated first postProc.confidenceScoresReverse(), postProc.dedupe(), + postProc.accuracy(), postProc.localNamingConventions(), postProc.renamePlacenames(), postProc.parseBoundingBox(), @@ -121,6 +141,7 @@ function addRoutes(app, peliasConfig) { place: createRouter([ sanitisers.place.middleware, controllers.place(peliasConfig), + postProc.accuracy(), postProc.localNamingConventions(), postProc.renamePlacenames(), postProc.parseBoundingBox(), diff --git a/sanitiser/_text.js b/sanitiser/_text.js index 4709eeee..874a9b17 100644 --- a/sanitiser/_text.js +++ b/sanitiser/_text.js @@ -1,5 +1,5 @@ var check = require('check-types'), - text_analyzer = require('pelias-text-analyzer'); + text_analyzer = require('pelias-text-analyzer'); // validate texts, convert types and apply defaults function sanitize( raw, clean ){ @@ -8,13 +8,14 @@ function sanitize( raw, clean ){ var messages = { errors: [], warnings: [] }; // invalid input 'text' + // must call `!check.nonEmptyString` since `check.emptyString` returns + // `false` for `undefined` and `null` if( !check.nonEmptyString( raw.text ) ){ messages.errors.push('invalid param \'text\': text length, must be >0'); } // valid input 'text' else { - // valid text clean.text = raw.text; diff --git a/sanitiser/_text_addressit.js b/sanitiser/_text_addressit.js new file mode 100644 index 00000000..04fca21a --- /dev/null +++ b/sanitiser/_text_addressit.js @@ -0,0 +1,110 @@ +var check = require('check-types'); +var parser = require('addressit'); +var extend = require('extend'); +var _ = require('lodash'); +var logger = require('pelias-logger').get('api'); + +// validate texts, convert types and apply defaults +function sanitize( raw, clean ){ + + // error & warning messages + var messages = { errors: [], warnings: [] }; + + // invalid input 'text' + if( !check.nonEmptyString( raw.text ) ){ + messages.errors.push('invalid param \'text\': text length, must be >0'); + } + + // valid input 'text' + else { + + // valid text + clean.text = raw.text; + + // remove anything that may have been parsed before + delete clean.parsed_text; + + // parse text with query parser + var parsed_text = parse(clean.text); + if (check.assigned(parsed_text)) { + clean.parsed_text = parsed_text; + } + } + + return messages; +} + +// export function +module.exports = sanitize; + + + +// this is the addressit functionality from https://github.com/pelias/text-analyzer/blob/master/src/addressItParser.js +var DELIM = ','; + +function parse(query) { + var getAdminPartsBySplittingOnDelim = function(queryParts) { + // naive approach - for admin matching during query time + // split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny' + + var address = {}; + + if (queryParts.length > 1) { + address.name = queryParts[0].trim(); + + // 1. slice away all parts after the first one + // 2. trim spaces from each part just in case + // 3. join the parts back together with appropriate delimiter and spacing + address.admin_parts = queryParts.slice(1) + .map(function (part) { return part.trim(); }) + .join(DELIM + ' '); + } + + return address; + }; + + var getAddressParts = function(query) { + // perform full address parsing + // except on queries so short they obviously can't contain an address + if (query.length > 3) { + return parser( query ); + } + }; + + var queryParts = query.split(DELIM); + + var addressWithAdminParts = getAdminPartsBySplittingOnDelim(queryParts); + var addressWithAddressParts= getAddressParts(queryParts.join(DELIM + ' ')); + + var parsedAddress = extend(addressWithAdminParts, + addressWithAddressParts); + + var address_parts = [ 'name', + 'number', + 'street', + 'city', + 'state', + 'country', + 'postalcode', + 'regions', + 'admin_parts' + ]; + + var parsed_text = {}; + + address_parts.forEach(function(part){ + if (parsedAddress[part]) { + parsed_text[part] = parsedAddress[part]; + } + }); + + // if all we found was regions, ignore it as it is not enough information to make smarter decisions + if (Object.keys(parsed_text).length === 1 && !_.isUndefined(parsed_text.regions)) + { + logger.info('Ignoring address parser output, regions only'); + return null; + } + + return parsed_text; + +} diff --git a/sanitiser/autocomplete.js b/sanitiser/autocomplete.js index a7ee68f6..900edba2 100644 --- a/sanitiser/autocomplete.js +++ b/sanitiser/autocomplete.js @@ -3,7 +3,7 @@ var type_mapping = require('../helper/type_mapping'); var sanitizeAll = require('../sanitiser/sanitizeAll'), sanitizers = { singleScalarParameters: require('../sanitiser/_single_scalar_parameters'), - text: require('../sanitiser/_text'), + text: require('../sanitiser/_text_addressit'), tokenizer: require('../sanitiser/_tokenizer'), size: require('../sanitiser/_size')(10, 10, 10), layers: require('../sanitiser/_targets')('layers', type_mapping.layer_mapping), diff --git a/sanitiser/sanitizeAll.js b/sanitiser/sanitizeAll.js index ac31ddfe..f6af363e 100644 --- a/sanitiser/sanitizeAll.js +++ b/sanitiser/sanitizeAll.js @@ -1,15 +1,10 @@ - -var check = require('check-types'); - function sanitize( req, sanitizers, cb ){ + // init an object to store clean (sanitized) input parameters if not initialized + req.clean = req.clean || {}; - // init an object to store clean - // (sanitized) input parameters - req.clean = {}; - - // init erros and warnings arrays - req.errors = []; - req.warnings = []; + // init errors and warnings arrays if not initialized + req.errors = req.errors || []; + req.warnings = req.warnings || []; // source of input parameters // (in this case from the GET querystring params) diff --git a/sanitiser/search.js b/sanitiser/search.js index 7fcc6ab6..130de40f 100644 --- a/sanitiser/search.js +++ b/sanitiser/search.js @@ -18,10 +18,6 @@ var sanitizeAll = require('../sanitiser/sanitizeAll'), var sanitize = function(req, cb) { sanitizeAll(req, sanitizers, cb); }; -// export sanitize for testing -module.exports.sanitize = sanitize; -module.exports.sanitiser_list = sanitizers; - // middleware module.exports.middleware = function( req, res, next ){ sanitize( req, function( err, clean ){ diff --git a/sanitiser/search_fallback.js b/sanitiser/search_fallback.js new file mode 100644 index 00000000..1782dbdb --- /dev/null +++ b/sanitiser/search_fallback.js @@ -0,0 +1,29 @@ +var sanitizeAll = require('../sanitiser/sanitizeAll'), + sanitizers = { + text: require('../sanitiser/_text_addressit') + }; + +var sanitize = function(req, cb) { sanitizeAll(req, sanitizers, cb); }; +var logger = require('pelias-logger').get('api:controller:search_fallback'); +var logging = require( '../helper/logging' ); + +// middleware +module.exports.middleware = function( req, res, next ){ + // if res.data already has results then don't call the _text_autocomplete sanitiser + // this has been put into place for when the libpostal integration way of querying + // ES doesn't return anything and we want to fallback to the old logic + if (res && res.hasOwnProperty('data') && res.data.length > 0) { + return next(); + } + + // log the query that caused a fallback since libpostal+new-queries didn't return anything + if (req.path === '/v1/search') { + var queryText = logging.isDNT(req) ? '[text removed]' : req.clean.text; + logger.info(queryText); + } + + sanitize( req, function( err, clean ){ + next(); + }); + +}; diff --git a/service/search.js b/service/search.js index c5aad5a9..780da2ae 100644 --- a/service/search.js +++ b/service/search.js @@ -35,6 +35,7 @@ function service( backend, cmd, cb ){ hit._source._id = hit._id; hit._source._type = hit._type; hit._source._score = hit._score; + hit._source._matched_queries = hit.matched_queries; return hit._source; }); diff --git a/test/ciao/search/address_parsing.coffee b/test/ciao/search/address_parsing.coffee index e39f484e..5cfdbcce 100644 --- a/test/ciao/search/address_parsing.coffee +++ b/test/ciao/search/address_parsing.coffee @@ -38,4 +38,8 @@ json.geocoding.query.parsed_text['number'].should.eql '30' json.geocoding.query.parsed_text['street'].should.eql 'w 26th st' json.geocoding.query.parsed_text['state'].should.eql 'NY' json.geocoding.query.parsed_text['regions'].should.eql [] -json.geocoding.query.parsed_text['admin_parts'].should.eql "ny" \ No newline at end of file +json.geocoding.query.parsed_text['admin_parts'].should.eql "ny" + +json.features[0].properties.confidence.should.eql 1 +json.features[0].properties.match_type.should.eql "exact" +json.features[0].properties.accuracy.should.eql "point" \ No newline at end of file diff --git a/test/ciao_test_data.js b/test/ciao_test_data.js index da1e9821..55872f67 100644 --- a/test/ciao_test_data.js +++ b/test/ciao_test_data.js @@ -48,6 +48,86 @@ types.forEach( function( type, i1 ){ }); }); +client.index( + { + index: config.indexName, + type: 'address', + id: 'way:265038872', + body: { + 'center_point': { + 'lon': -73.990425, + 'lat': 40.744131 + }, + 'parent': { + 'country': [ + 'United States' + ], + 'neighbourhood_id': [ + '85869245' + ], + 'country_a': [ + 'USA' + ], + 'locality_a': [ + null + ], + 'region_id': [ + '85688543' + ], + 'county': [ + 'New York County' + ], + 'borough_a': [ + null + ], + 'borough_id': [ + '421205771' + ], + 'locality': [ + 'New York' + ], + 'borough': [ + 'Manhattan' + ], + 'region_a': [ + 'NY' + ], + 'county_id': [ + '102081863' + ], + 'locality_id': [ + '85977539' + ], + 'neighbourhood_a': [ + null + ], + 'neighbourhood': [ + 'Flatiron District' + ], + 'region': [ + 'New York' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] + }, + 'name': {'default': '30 West 26th Street'}, + 'address_parts': { + 'zip': '10010', + 'number': '30', + 'street': 'West 26th Street' + }, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'way:265038872', + 'layer': 'address' + } + } +); + // call refresh so the index merges the changes actions.push( function( done ){ client.indices.refresh( { index: config.indexName }, done); diff --git a/test/unit/controller/search.js b/test/unit/controller/search.js index 3e7494ac..86f07b97 100644 --- a/test/unit/controller/search.js +++ b/test/unit/controller/search.js @@ -1,6 +1,7 @@ var setup = require('../../../controller/search'), mockBackend = require('../mock/backend'), mockQuery = require('../mock/query'); +var proxyquire = require('proxyquire').noCallThru(); module.exports.tests = {}; @@ -46,7 +47,8 @@ module.exports.tests.functional_success = function(test, common) { }]; var expectedMeta = { - scores: [10, 20] + scores: [10, 20], + query_type: 'mock' }; var expectedData = [ @@ -54,6 +56,7 @@ module.exports.tests.functional_success = function(test, common) { _id: 'myid1', _score: 10, _type: 'mytype1', + _matched_queries: ['query 1', 'query 2'], parent: { country: ['country1'], region: ['state1'], @@ -67,6 +70,7 @@ module.exports.tests.functional_success = function(test, common) { _id: 'myid2', _score: 20, _type: 'mytype2', + _matched_queries: ['query 3'], parent: { country: ['country2'], region: ['state2'], @@ -169,6 +173,52 @@ module.exports.tests.timeout = function(test, common) { }); }; +module.exports.tests.existing_results = function(test, common) { + test('res with existing data should not call backend', function(t) { + var backend = function() { + throw new Error('backend should not have been called'); + }; + var controller = setup( fakeDefaultConfig, backend, mockQuery() ); + + var req = { }; + // the existence of `data` means that there are already results so + // don't call the backend/query + var res = { data: [{}] }; + + var next = function() { + t.deepEqual(res, {data: [{}]}); + t.end(); + }; + controller(req, res, next); + + }); + +}; + +module.exports.tests.undefined_query = function(test, common) { + test('query returning undefined should not call service', function(t) { + // a function that returns undefined + var query = function () { return; }; + + var search_service_was_called = false; + + var controller = proxyquire('../../../controller/search', { + '../service/search': function() { + search_service_was_called = true; + throw new Error('search service should not have been called'); + } + })(undefined, undefined, query); + + var next = function() { + t.notOk(search_service_was_called, 'should have returned before search service was called'); + t.end(); + }; + + controller({}, {}, next); + + }); +}; + module.exports.all = function (tape, common) { function test(name, testFunction) { diff --git a/test/unit/fixture/dedupe_elasticsearch_results.js b/test/unit/fixture/dedupe_elasticsearch_results.js index e4780097..39c8e71f 100644 --- a/test/unit/fixture/dedupe_elasticsearch_results.js +++ b/test/unit/fixture/dedupe_elasticsearch_results.js @@ -1,377 +1,1036 @@ module.exports = [ { 'center_point': { - 'lon': -76.207456, - 'lat': 40.039265 + 'lon': -76.293127, + 'lat': 40.032787 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'localadmin_id': [ + '404487183' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School' + 'default': 'Hand Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'node:357289197', 'category': [ 'education' ], - '_id': '357321757', + 'layer': 'venue', + '_id': 'node:357289197', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, - { // same as above, but change the neighbourhood + { 'center_point': { - 'lon': -77.207456, - 'lat': 41.039265 + 'lon': -76.32746, + 'lat': 40.02343 }, - 'address': {}, 'parent': { - 'localadmin': 'East Lampeter', - 'region_a': 'PA', - 'region': 'Pennsylvania', - 'locality': 'Smoketown', - 'country_a': 'USA', - 'county': 'Lancaster County', - 'country': 'United States', - 'neighbourhood': 'Blueland' // ### + 'country': [ + 'United States' + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'localadmin_id': [ + '404487185' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School' + 'default': 'Wheatland Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5219083', 'category': [ 'education' ], - '_id': '357321757', + 'layer': 'venue', + '_id': '5219083', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, - { // same as #1, but change the locality + { 'center_point': { - 'lon': -73.207456, - 'lat': 42.039265 + 'lon': -76.30107, + 'lat': 40.05926 }, - 'address': {}, 'parent': { - 'localadmin': 'East Lampeter', - 'region_a': 'PA', - 'region': 'Pennsylvania', - 'locality': 'Firetown', // ### - 'country_a': 'USA', - 'county': 'Lancaster County', - 'country': 'United States', - 'neighbourhood': 'Greenland' + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Rossmere' + ], + 'localadmin_id': [ + '404487183' + ], + 'neighbourhood_id': [ + '85846173' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School' + 'default': 'Catholic High School Stadium' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5183465', 'category': [ - 'education' + 'entertainment', + 'recreation' ], - '_id': '357321757', + 'layer': 'venue', + '_id': '5183465', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, - { // same as #1, but with an additional name + { 'center_point': { - 'lon': -76.207456, - 'lat': 40.039265 + 'lon': -76.285474, + 'lat': 40.048535 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School', - 'alt': 'High School of East Lampeter', + 'default': 'McCaskey East High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'node:368338500', 'category': [ 'education' ], - '_id': '357321757', + 'layer': 'venue', + '_id': 'node:368338500', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.207456, - 'lat': 40.039265 + 'lon': -76.327063, + 'lat': 40.031869 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'] + 'country': [ + 'United States' + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'localadmin_id': [ + '404487185' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, + 'bounding_box': '{\'min_lat\':40.031513,\'max_lat\':40.032233,\'min_lon\':-76.328429,\'max_lon\':-76.326216}', 'name': { - 'default': 'East Lampeter, High-School' + 'default': 'Wheatland Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'way:84969670', 'category': [ 'education' ], - '_id': '357321757', + 'layer': 'venue', + '_id': 'way:84969670', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.23246, - 'lat': 39.99288 + 'lon': -76.29274, + 'lat': 40.03288 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['West Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Lampeter'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Wheatland Mills'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'localadmin_id': [ + '404487183' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'Lampeter-Strasburg High School' + 'default': 'Hand Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5192545', 'category': [ 'education' ], - '_id': '4559068', - '_type': 'geoname', - '_score': 1.2367082, - 'confidence': 0.879 + 'layer': 'venue', + '_id': '5192545', + '_type': 'venue', + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.20746, - 'lat': 40.03927 + 'lon': -76.28496, + 'lat': 40.04732 }, - 'address_parts': {}, 'parent': { - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'], - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter High School' + 'default': 'Lincoln Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5198085', 'category': [ 'education' ], - '_id': '5187980', - '_type': 'geoname', - '_score': 1.2367082, - 'confidence': 0.879 + 'layer': 'venue', + '_id': '5198085', + '_type': 'venue', + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.232457, - 'lat': 39.992877 + 'lon': -76.31857, + 'lat': 40.04204 }, - 'address_parts': {}, 'parent': { - 'region': ['Pennsylvania'], - 'locality': ['Lampeter'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Wheatland Mills'], - 'localadmin': ['West Lampeter'], - 'region_a': ['PA'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'localadmin_id': [ + '404487183' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'Lampeter-Strasburg High School' + 'default': 'Reynolds Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5208101', 'category': [ 'education' ], - '_id': '357294404', + 'layer': 'venue', + '_id': '5208101', '_type': 'venue', - '_score': 1.2367082, - 'confidence': 0.879 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.207456, - 'lat': 40.038987 + 'lon': -76.290392, + 'lat': 40.048281 }, - 'address_parts': {}, 'parent': { - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'], - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, + 'bounding_box': '{\'min_lat\':40.047288,\'max_lat\':40.049171,\'min_lon\':-76.291609,\'max_lon\':-76.289314}', 'name': { - 'default': 'East Lampeter School' + 'default': 'McCaskey High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'way:161088588', 'category': [ 'education' ], - '_id': '357283977', + 'layer': 'venue', + '_id': 'way:161088588', '_type': 'venue', - '_score': 1.1036991, - 'confidence': 0.664 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -76.20746, - 'lat': 40.03899 + 'lon': -76.29051, + 'lat': 40.04788 }, - 'address_parts': {}, 'parent': { - 'region': ['Pennsylvania'], - 'locality': ['Smoketown'], - 'country_a': ['USA'], - 'county': ['Lancaster County'], - 'country': ['United States'], - 'neighbourhood': ['Greenland'], - 'localadmin': ['East Lampeter'], - 'region_a': ['PA'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, 'name': { - 'default': 'East Lampeter School' + 'default': 'McCaskey High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5200263', 'category': [ 'education' ], - '_id': '5187966', - '_type': 'geoname', - '_score': 1.1036991, - 'confidence': 0.664 + 'layer': 'venue', + '_id': '5200263', + '_type': 'venue', + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -94.167445, - 'lat': 38.762788 + 'lon': -76.318983, + 'lat': 40.042051 }, - 'address_parts': {}, 'parent': { - 'region': ['Missouri'], - 'locality': ['Strasburg'], - 'country_a': ['USA'], - 'county': ['Cass County'], - 'country': ['United States'], - 'localadmin': ['Polk'], - 'region_a': ['MO'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'localadmin_id': [ + '404487183' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, + 'bounding_box': '{\'min_lat\':40.041542,\'max_lat\':40.042777,\'min_lon\':-76.31963,\'max_lon\':-76.318094}', 'name': { - 'default': 'Strasburg School' + 'default': 'Reynolds Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'way:34212977', 'category': [ 'education' ], - '_id': '358058986', + 'layer': 'venue', + '_id': 'way:34212977', '_type': 'venue', - '_score': 1.0492544, - 'confidence': 0.658 + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -78.36317, - 'lat': 38.98445 - }, - 'address_parts': {}, - 'name': { - 'default': 'Strasburg High School' + 'lon': -76.284958, + 'lat': 40.04732 }, 'parent': { - 'region_a': ['VA'], - 'region': ['Virginia'], - 'locality': ['Strasburg'], - 'country_a': ['USA'], - 'county': ['Shenandoah County'], - 'country': ['United States'], - 'neighbourhood': ['Strasburg Junction'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'neighbourhood': [ + 'Grandview Heights' + ], + 'localadmin_id': [ + '404487183' + ], + 'neighbourhood_id': [ + '85822505' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'neighbourhood_a': [ + null + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] + }, + 'name': { + 'default': 'Lincoln Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'node:357330916', 'category': [ 'education' ], - '_id': '4787978', - '_type': 'geoname', - '_score': 0.9724125, - 'confidence': 0.649 + 'layer': 'venue', + '_id': 'node:357330916', + '_type': 'venue', + '_score': 0.47265986, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -100.16516, - 'lat': 46.13427 - }, - 'address_parts': {}, - 'name': { - 'default': 'Strasburg High School' + 'lon': -76.280791, + 'lat': 40.045098 }, 'parent': { - 'localadmin': ['Strasburg'], - 'region_a': ['ND'], - 'region': ['North Dakota'], - 'locality': ['Strasburg'], - 'country_a': ['USA'], - 'county': ['Emmons County'], - 'country': ['United States'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'localadmin_id': [ + '404487183' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] + }, + 'name': { + 'default': 'Lancaster Christian Junior High School' }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'openstreetmap', + 'source_id': 'node:357330919', 'category': [ 'education' ], - '_id': '9683163', - '_type': 'geoname', - '_score': 0.9724125, - 'confidence': 0.649 + 'layer': 'venue', + '_id': 'node:357330919', + '_type': 'venue', + '_score': 0.4432487, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 }, { 'center_point': { - 'lon': -81.532392, - 'lat': 40.597578 - }, - 'address_parts': {}, - 'name': { - 'default': 'Strasburg High School' + 'lon': -76.28079, + 'lat': 40.0451 }, 'parent': { - 'localadmin': ['Franklin'], - 'region_a': ['OH'], - 'region': ['Ohio'], - 'locality': ['Strasburg'], - 'country_a': ['USA'], - 'county': ['Tuscarawas County'], - 'country': ['United States'] + 'country': [ + 'United States' + ], + 'locality_a': [ + null + ], + 'county': [ + 'Lancaster County' + ], + 'region_a': [ + 'PA' + ], + 'localadmin': [ + 'Lancaster' + ], + 'county_id': [ + '102081377' + ], + 'localadmin_id': [ + '404487183' + ], + 'country_a': [ + 'USA' + ], + 'localadmin_a': [ + null + ], + 'region_id': [ + '85688481' + ], + 'locality': [ + 'Lancaster' + ], + 'locality_id': [ + '101718643' + ], + 'region': [ + 'Pennsylvania' + ], + 'country_id': [ + '85633793' + ], + 'county_a': [ + null + ] }, + 'name': { + 'default': 'Lancaster Christian Junior High School' + }, + 'address_parts': {}, + 'alpha3': 'USA', + 'source': 'geonames', + 'source_id': '5197082', 'category': [ 'education' ], - '_id': '356646971', + 'layer': 'venue', + '_id': '5197082', '_type': 'venue', - '_score': 0.9724125, - 'confidence': 0.649 + '_score': 0.4432487, + '_matched_queries': [ + 'fallback.venue' + ], + 'confidence': 0.5 } -]; +]; \ No newline at end of file diff --git a/test/unit/fixture/search_boundary_country.js b/test/unit/fixture/search_boundary_country.js index 94f867b2..96fb4170 100644 --- a/test/unit/fixture/search_boundary_country.js +++ b/test/unit/fixture/search_boundary_country.js @@ -1,99 +1,91 @@ - module.exports = { 'query': { - 'bool': { - 'must': [ - { - 'match': { - 'parent.country_a': { - 'analyzer': 'standard', - 'query': 'ABC' - } - } - }, - { - 'match': { - 'name.default': { - 'query': 'test', - 'boost': 1, - 'analyzer': 'peliasQueryFullToken' - } - } - } - ], - 'should': [{ - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2 - } - } - },{ - 'function_score': { + 'function_score': { + 'query': { + 'filtered': { 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } + } + ] } }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - }] - } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'filter': { + 'bool': { + 'must': [ + { + 'match': { + 'parent.country_a': { + 'analyzer': 'standard', + 'query': 'ABC' + } + } + }, + { + 'terms': { + 'layer': [ + 'test' + ] + } + } + ] } - }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 - }] + } } - }], - 'filter': [ + }, + 'max_boost': 20, + 'functions': [ { - 'terms': { - 'layer': [ - 'test' - ] - } + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 } - ] + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, - 'sort': [ '_score' ], 'size': 10, - 'track_scores': true + 'track_scores': true, + 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, + '_score' + ] }; diff --git a/test/unit/fixture/search_boundary_country_original.js b/test/unit/fixture/search_boundary_country_original.js new file mode 100644 index 00000000..94f867b2 --- /dev/null +++ b/test/unit/fixture/search_boundary_country_original.js @@ -0,0 +1,99 @@ + +module.exports = { + 'query': { + 'bool': { + 'must': [ + { + 'match': { + 'parent.country_a': { + 'analyzer': 'standard', + 'query': 'ABC' + } + } + }, + { + 'match': { + 'name.default': { + 'query': 'test', + 'boost': 1, + 'analyzer': 'peliasQueryFullToken' + } + } + } + ], + 'should': [{ + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2 + } + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + }] + } + }], + 'filter': [ + { + 'terms': { + 'layer': [ + 'test' + ] + } + } + ] + } + }, + 'sort': [ '_score' ], + 'size': 10, + 'track_scores': true +}; diff --git a/test/unit/fixture/search_fallback.js b/test/unit/fixture/search_fallback.js new file mode 100644 index 00000000..f7983da9 --- /dev/null +++ b/test/unit/fixture/search_fallback.js @@ -0,0 +1,805 @@ +module.exports = { + 'query': { + 'function_score': { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.venue', + 'must': [ + { + 'multi_match': { + 'query': 'query value', + 'type': 'phrase', + 'fields': [ + 'phrase.default' + ] + } + }, + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.neighbourhood', + 'parent.neighbourhood_a' + ] + } + }, + { + 'multi_match': { + 'query': 'borough value', + 'type': 'phrase', + 'fields': [ + 'parent.borough', + 'parent.borough_a' + ] + } + }, + { + 'multi_match': { + 'query': 'city value', + 'type': 'phrase', + 'fields': [ + 'parent.locality', + 'parent.locality_a', + 'parent.localadmin', + 'parent.localadmin_a' + ] + } + }, + { + 'multi_match': { + 'query': 'county value', + 'type': 'phrase', + 'fields': [ + 'parent.county', + 'parent.county_a', + 'parent.macrocounty', + 'parent.macrocounty_a' + ] + } + }, + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a', + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'venue' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.address', + 'must': [ + { + 'match_phrase': { + 'address_parts.number': 'number value' + } + }, + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + }, + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.neighbourhood', + 'parent.neighbourhood_a' + ] + } + }, + { + 'multi_match': { + 'query': 'borough value', + 'type': 'phrase', + 'fields': [ + 'parent.borough', + 'parent.borough_a' + ] + } + }, + { + 'multi_match': { + 'query': 'city value', + 'type': 'phrase', + 'fields': [ + 'parent.locality', + 'parent.locality_a', + 'parent.localadmin', + 'parent.localadmin_a' + ] + } + }, + { + 'multi_match': { + 'query': 'county value', + 'type': 'phrase', + 'fields': [ + 'parent.county', + 'parent.county_a', + 'parent.macrocounty', + 'parent.macrocounty_a' + ] + } + }, + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a', + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'should': [ + { + 'match_phrase': { + 'address_parts.zip': 'postalcode value' + } + } + ], + 'filter': { + 'term': { + 'layer': 'address' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + }, + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.neighbourhood', + 'parent.neighbourhood_a' + ] + } + }, + { + 'multi_match': { + 'query': 'borough value', + 'type': 'phrase', + 'fields': [ + 'parent.borough', + 'parent.borough_a' + ] + } + }, + { + 'multi_match': { + 'query': 'city value', + 'type': 'phrase', + 'fields': [ + 'parent.locality', + 'parent.locality_a', + 'parent.localadmin', + 'parent.localadmin_a' + ] + } + }, + { + 'multi_match': { + 'query': 'county value', + 'type': 'phrase', + 'fields': [ + 'parent.county', + 'parent.county_a', + 'parent.macrocounty', + 'parent.macrocounty_a' + ] + } + }, + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a', + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'should': [ + { + 'match_phrase': { + 'address_parts.zip': 'postalcode value' + } + } + ], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.neighbourhood', + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.neighbourhood', + 'parent.neighbourhood_a' + ] + } + }, + { + 'multi_match': { + 'query': 'borough value', + 'type': 'phrase', + 'fields': [ + 'parent.borough', + 'parent.borough_a' + ] + } + }, + { + 'multi_match': { + 'query': 'city value', + 'type': 'phrase', + 'fields': [ + 'parent.locality', + 'parent.locality_a', + 'parent.localadmin', + 'parent.localadmin_a' + ] + } + }, + { + 'multi_match': { + 'query': 'county value', + 'type': 'phrase', + 'fields': [ + 'parent.county', + 'parent.county_a', + 'parent.macrocounty', + 'parent.macrocounty_a' + ] + } + }, + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a', + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'neighbourhood' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.borough', + 'must': [ + { + 'multi_match': { + 'query': 'borough value', + 'type': 'phrase', + 'fields': [ + 'parent.borough', + 'parent.borough_a' + ] + } + }, + { + 'multi_match': { + 'query': 'city value', + 'type': 'phrase', + 'fields': [ + 'parent.locality', + 'parent.locality_a', + 'parent.localadmin', + 'parent.localadmin_a' + ] + } + }, + { + 'multi_match': { + 'query': 'county value', + 'type': 'phrase', + 'fields': [ + 'parent.county', + 'parent.county_a', + 'parent.macrocounty', + 'parent.macrocounty_a' + ] + } + }, + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a', + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'borough' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.locality', + 'must': [ + { + 'multi_match': { + 'query': 'city value', + 'type': 'phrase', + 'fields': [ + 'parent.locality', + 'parent.locality_a' + ] + } + }, + { + 'multi_match': { + 'query': 'county value', + 'type': 'phrase', + 'fields': [ + 'parent.county', + 'parent.county_a', + 'parent.macrocounty', + 'parent.macrocounty_a' + ] + } + }, + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a', + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'locality' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.localadmin', + 'must': [ + { + 'multi_match': { + 'query': 'city value', + 'type': 'phrase', + 'fields': [ + 'parent.localadmin', + 'parent.localadmin_a' + ] + } + }, + { + 'multi_match': { + 'query': 'county value', + 'type': 'phrase', + 'fields': [ + 'parent.county', + 'parent.county_a', + 'parent.macrocounty', + 'parent.macrocounty_a' + ] + } + }, + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a', + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'localadmin' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.county', + 'must': [ + { + 'multi_match': { + 'query': 'county value', + 'type': 'phrase', + 'fields': [ + 'parent.county', + 'parent.county_a' + ] + } + }, + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a', + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'county' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.macrocounty', + 'must': [ + { + 'multi_match': { + 'query': 'county value', + 'type': 'phrase', + 'fields': [ + 'parent.macrocounty', + 'parent.macrocounty_a' + ] + } + }, + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a', + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'macrocounty' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.region', + 'must': [ + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'region' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.macroregion', + 'must': [ + { + 'multi_match': { + 'query': 'state value', + 'type': 'phrase', + 'fields': [ + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + }, + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a', + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'macroregion' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.dependency', + 'must': [ + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'dependency' + } + } + } + }, + { + 'bool': { + '_name': 'fallback.country', + 'must': [ + { + 'multi_match': { + 'query': 'country value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'country' + } + } + } + } + ] + } + }, + 'filter': { + 'bool': { + 'must': [] + } + } + } + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + } + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' + } + }, + 'size': 20, + 'track_scores': true, + 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, + '_score' + ] +}; diff --git a/test/unit/fixture/search_full_address.js b/test/unit/fixture/search_full_address_original.js similarity index 100% rename from test/unit/fixture/search_full_address.js rename to test/unit/fixture/search_full_address_original.js diff --git a/test/unit/fixture/search_geodisambiguation.js b/test/unit/fixture/search_geodisambiguation.js new file mode 100644 index 00000000..5db0af98 --- /dev/null +++ b/test/unit/fixture/search_geodisambiguation.js @@ -0,0 +1,267 @@ +module.exports = { + 'query': { + 'function_score': { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'should': [ + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.neighbourhood', + 'parent.neighbourhood_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'neighbourhood' + } + } + } + }, + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.borough', + 'parent.borough_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'borough' + } + } + } + }, + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.locality', + 'parent.locality_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'locality' + } + } + } + }, + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.localadmin', + 'parent.localadmin_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'localadmin' + } + } + } + }, + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.county', + 'parent.county_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'county' + } + } + } + }, + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.macrocounty', + 'parent.macrocounty_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'macrocounty' + } + } + } + }, + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.region', + 'parent.region_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'region' + } + } + } + }, + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.macroregion', + 'parent.macroregion_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'macroregion' + } + } + } + }, + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.dependency', + 'parent.dependency_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'dependency' + } + } + } + }, + { + 'bool': { + 'must': [ + { + 'multi_match': { + 'query': 'neighbourhood value', + 'type': 'phrase', + 'fields': [ + 'parent.country', + 'parent.country_a' + ] + } + } + ], + 'filter': { + 'term': { + 'layer': 'country' + } + } + } + } + ] + } + }, + 'filter': { + 'bool': { + 'must': [] + } + } + } + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + } + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' + } + }, + 'size': 20, + 'track_scores': true, + 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, + '_score' + ] +}; diff --git a/test/unit/fixture/search_linguistic_bbox.js b/test/unit/fixture/search_linguistic_bbox.js index b8dbf3a1..46e2fccd 100644 --- a/test/unit/fixture/search_linguistic_bbox.js +++ b/test/unit/fixture/search_linguistic_bbox.js @@ -1,98 +1,94 @@ - module.exports = { 'query': { - 'bool': { - 'must': [{ - 'match': { - 'name.default': { - 'query': 'test', - 'boost': 1, - 'analyzer': 'peliasQueryFullToken' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2 - } - } - },{ - 'function_score': { + 'function_score': { + 'query': { + 'filtered': { 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } + } + ] } }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - }] - } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } - } - }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 - }] - } - }], - 'filter': [{ - 'geo_bounding_box': { - 'type': 'indexed', - 'center_point': { - 'top': 11.51, - 'right': -61.84, - 'bottom': 47.47, - 'left': -103.16 + 'filter': { + 'bool': { + 'must': [ + { + 'geo_bounding_box': { + 'type': 'indexed', + 'center_point': { + 'top': 11.51, + 'right': -61.84, + 'bottom': 47.47, + 'left': -103.16 + } + } + }, + { + 'terms': { + 'layer': [ + 'test' + ] + } + } + ] } } + } + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 }, { - 'terms': { - 'layer': [ - 'test' - ] - } - }] + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + } + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, - 'sort': [ '_score' ], 'size': 10, - 'track_scores': true + 'track_scores': true, + 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, + '_score' + ] }; diff --git a/test/unit/fixture/search_linguistic_bbox_original.js b/test/unit/fixture/search_linguistic_bbox_original.js new file mode 100644 index 00000000..b8dbf3a1 --- /dev/null +++ b/test/unit/fixture/search_linguistic_bbox_original.js @@ -0,0 +1,98 @@ + +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'name.default': { + 'query': 'test', + 'boost': 1, + 'analyzer': 'peliasQueryFullToken' + } + } + }], + 'should': [{ + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2 + } + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + }] + } + }], + 'filter': [{ + 'geo_bounding_box': { + 'type': 'indexed', + 'center_point': { + 'top': 11.51, + 'right': -61.84, + 'bottom': 47.47, + 'left': -103.16 + } + } + }, + { + 'terms': { + 'layer': [ + 'test' + ] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 10, + 'track_scores': true +}; diff --git a/test/unit/fixture/search_linguistic_focus.js b/test/unit/fixture/search_linguistic_focus.js index 38273273..b2e577b1 100644 --- a/test/unit/fixture/search_linguistic_focus.js +++ b/test/unit/fixture/search_linguistic_focus.js @@ -1,119 +1,97 @@ - module.exports = { 'query': { - 'bool': { - 'must': [{ - 'match': { - 'name.default': { - 'query': 'test', - 'boost': 1, - 'analyzer': 'peliasQueryFullToken' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2 - } - } - }, { - 'function_score': { + 'function_score': { + 'query': { + 'filtered': { 'query': { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } + } + ] } }, - 'functions': [{ - 'linear': { - 'center_point': { - 'origin': { - 'lat': 29.49136, - 'lon': -82.50622 - }, - 'offset': '0km', - 'scale': '50km', - 'decay': 0.5 - } - }, - 'weight': 2 - }], - 'score_mode': 'avg', - 'boost_mode': 'replace' - } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'filter': { + 'bool': { + 'must': [ + { + 'terms': { + 'layer': [ + 'test' + ] + } + } + ] } - }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - }] + } } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + }, + 'max_boost': 20, + 'functions': [ + { + 'weight': 2, + 'linear': { + 'center_point': { + 'origin': { + 'lat': 29.49136, + 'lon': -82.50622 + }, + 'offset': '0km', + 'scale': '50km', + 'decay': 0.5 } + } + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 - }] - } - }], - 'filter': [ + 'weight': 1 + }, { - 'terms': { - 'layer': [ - 'test' - ] - } + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 } - ] + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, - 'sort': [ '_score' ], 'size': 10, - 'track_scores': true + 'track_scores': true, + 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, + '_score' + ] }; diff --git a/test/unit/fixture/search_linguistic_focus_bbox.js b/test/unit/fixture/search_linguistic_focus_bbox.js index ebc5f701..1f61dc6d 100644 --- a/test/unit/fixture/search_linguistic_focus_bbox.js +++ b/test/unit/fixture/search_linguistic_focus_bbox.js @@ -1,128 +1,108 @@ - module.exports = { 'query': { - 'bool': { - 'must': [{ - 'match': { - 'name.default': { - 'query': 'test', - 'boost': 1, - 'analyzer': 'peliasQueryFullToken' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2 - } - } - }, { - 'function_score': { + 'function_score': { + 'query': { + 'filtered': { 'query': { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } + } + ] } }, - 'functions': [{ - 'linear': { - 'center_point': { - 'origin': { - 'lat': 29.49136, - 'lon': -82.50622 + 'filter': { + 'bool': { + 'must': [ + { + 'geo_bounding_box': { + 'type': 'indexed', + 'center_point': { + 'top': 11.51, + 'right': -61.84, + 'bottom': 47.47, + 'left': -103.16 + } + } }, - 'offset': '0km', - 'scale': '50km', - 'decay': 0.5 - } - }, - 'weight': 2 - }], - 'score_mode': 'avg', - 'boost_mode': 'replace' - } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } - } - }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - }] - } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + { + 'terms': { + 'layer': [ + 'test' + ] + } + } + ] } - }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 - }] - } - }], - 'filter': [{ - 'geo_bounding_box': { - 'type': 'indexed', - 'center_point': { - 'top': 11.51, - 'right': -61.84, - 'bottom': 47.47, - 'left': -103.16 } } }, - { - 'terms': { - 'layer': [ - 'test' - ] + 'max_boost': 20, + 'functions': [ + { + 'weight': 2, + 'linear': { + 'center_point': { + 'origin': { + 'lat': 29.49136, + 'lon': -82.50622 + }, + 'offset': '0km', + 'scale': '50km', + 'decay': 0.5 + } + } + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 } - }] + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, - 'sort': [ '_score' ], 'size': 10, - 'track_scores': true + 'track_scores': true, + 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, + '_score' + ] }; diff --git a/test/unit/fixture/search_linguistic_focus_bbox_original.js b/test/unit/fixture/search_linguistic_focus_bbox_original.js new file mode 100644 index 00000000..ebc5f701 --- /dev/null +++ b/test/unit/fixture/search_linguistic_focus_bbox_original.js @@ -0,0 +1,128 @@ + +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'name.default': { + 'query': 'test', + 'boost': 1, + 'analyzer': 'peliasQueryFullToken' + } + } + }], + 'should': [{ + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2 + } + } + }, { + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2, + 'query': 'test' + } + } + }, + 'functions': [{ + 'linear': { + 'center_point': { + 'origin': { + 'lat': 29.49136, + 'lon': -82.50622 + }, + 'offset': '0km', + 'scale': '50km', + 'decay': 0.5 + } + }, + 'weight': 2 + }], + 'score_mode': 'avg', + 'boost_mode': 'replace' + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + }] + } + }], + 'filter': [{ + 'geo_bounding_box': { + 'type': 'indexed', + 'center_point': { + 'top': 11.51, + 'right': -61.84, + 'bottom': 47.47, + 'left': -103.16 + } + } + }, + { + 'terms': { + 'layer': [ + 'test' + ] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 10, + 'track_scores': true +}; diff --git a/test/unit/fixture/search_linguistic_focus_null_island.js b/test/unit/fixture/search_linguistic_focus_null_island.js index 8f6fe381..fc47bc4e 100644 --- a/test/unit/fixture/search_linguistic_focus_null_island.js +++ b/test/unit/fixture/search_linguistic_focus_null_island.js @@ -1,117 +1,97 @@ - module.exports = { 'query': { - 'bool': { - 'must': [{ - 'match': { - 'name.default': { - 'query': 'test', - 'boost': 1, - 'analyzer': 'peliasQueryFullToken' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2 - } - } - }, { - 'function_score': { + 'function_score': { + 'query': { + 'filtered': { 'query': { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } + } + ] } }, - 'functions': [{ - 'linear': { - 'center_point': { - 'origin': { - 'lat': 0, - 'lon': 0 - }, - 'offset': '0km', - 'scale': '50km', - 'decay': 0.5 - } - }, - 'weight': 2 - }], - 'score_mode': 'avg', - 'boost_mode': 'replace' - } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'filter': { + 'bool': { + 'must': [ + { + 'terms': { + 'layer': [ + 'test' + ] + } + } + ] } - }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - }] + } } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + }, + 'max_boost': 20, + 'functions': [ + { + 'weight': 2, + 'linear': { + 'center_point': { + 'origin': { + 'lat': 0, + 'lon': 0 + }, + 'offset': '0km', + 'scale': '50km', + 'decay': 0.5 } + } + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 - }] - } - }], - 'filter':[{ - 'terms': { - 'layer': [ - 'test' - ] + 'weight': 1 + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 } - }] + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, - 'sort': [ '_score' ], 'size': 10, - 'track_scores': true + 'track_scores': true, + 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, + '_score' + ] }; diff --git a/test/unit/fixture/search_linguistic_focus_null_island_original.js b/test/unit/fixture/search_linguistic_focus_null_island_original.js new file mode 100644 index 00000000..8f6fe381 --- /dev/null +++ b/test/unit/fixture/search_linguistic_focus_null_island_original.js @@ -0,0 +1,117 @@ + +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'name.default': { + 'query': 'test', + 'boost': 1, + 'analyzer': 'peliasQueryFullToken' + } + } + }], + 'should': [{ + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2 + } + } + }, { + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2, + 'query': 'test' + } + } + }, + 'functions': [{ + 'linear': { + 'center_point': { + 'origin': { + 'lat': 0, + 'lon': 0 + }, + 'offset': '0km', + 'scale': '50km', + 'decay': 0.5 + } + }, + 'weight': 2 + }], + 'score_mode': 'avg', + 'boost_mode': 'replace' + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + }] + } + }], + 'filter':[{ + 'terms': { + 'layer': [ + 'test' + ] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 10, + 'track_scores': true +}; diff --git a/test/unit/fixture/search_linguistic_focus_original.js b/test/unit/fixture/search_linguistic_focus_original.js new file mode 100644 index 00000000..38273273 --- /dev/null +++ b/test/unit/fixture/search_linguistic_focus_original.js @@ -0,0 +1,119 @@ + +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'name.default': { + 'query': 'test', + 'boost': 1, + 'analyzer': 'peliasQueryFullToken' + } + } + }], + 'should': [{ + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2 + } + } + }, { + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2, + 'query': 'test' + } + } + }, + 'functions': [{ + 'linear': { + 'center_point': { + 'origin': { + 'lat': 29.49136, + 'lon': -82.50622 + }, + 'offset': '0km', + 'scale': '50km', + 'decay': 0.5 + } + }, + 'weight': 2 + }], + 'score_mode': 'avg', + 'boost_mode': 'replace' + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + }] + } + }], + 'filter': [ + { + 'terms': { + 'layer': [ + 'test' + ] + } + } + ] + } + }, + 'sort': [ '_score' ], + 'size': 10, + 'track_scores': true +}; diff --git a/test/unit/fixture/search_linguistic_only.js b/test/unit/fixture/search_linguistic_only.js index 490eb0c9..caa4aefa 100644 --- a/test/unit/fixture/search_linguistic_only.js +++ b/test/unit/fixture/search_linguistic_only.js @@ -1,89 +1,83 @@ - module.exports = { 'query': { - 'bool': { - 'must': [{ - 'match': { - 'name.default': { - 'query': 'test', - 'boost': 1, - 'analyzer': 'peliasQueryFullToken' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2 - } - } - },{ - 'function_score': { + 'function_score': { + 'query': { + 'filtered': { 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } + } + ] } }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - }] - } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'filter': { + 'bool': { + 'must': [ + { + 'terms': { + 'layer': [ + 'test' + ] + } + } + ] } - }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 - }] + } } - }], - 'filter': [ + }, + 'max_boost': 20, + 'functions': [ { - 'terms': { - 'layer': [ - 'test' - ] - } + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 } - ] + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, - 'sort': [ '_score' ], 'size': 10, - 'track_scores': true + 'track_scores': true, + 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, + '_score' + ] }; diff --git a/test/unit/fixture/search_linguistic_only_original.js b/test/unit/fixture/search_linguistic_only_original.js new file mode 100644 index 00000000..490eb0c9 --- /dev/null +++ b/test/unit/fixture/search_linguistic_only_original.js @@ -0,0 +1,89 @@ + +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'name.default': { + 'query': 'test', + 'boost': 1, + 'analyzer': 'peliasQueryFullToken' + } + } + }], + 'should': [{ + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2 + } + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + }] + } + }], + 'filter': [ + { + 'terms': { + 'layer': [ + 'test' + ] + } + } + ] + } + }, + 'sort': [ '_score' ], + 'size': 10, + 'track_scores': true +}; diff --git a/test/unit/fixture/search_linguistic_viewport.js b/test/unit/fixture/search_linguistic_viewport.js index ca6414a7..caa4aefa 100644 --- a/test/unit/fixture/search_linguistic_viewport.js +++ b/test/unit/fixture/search_linguistic_viewport.js @@ -1,133 +1,83 @@ module.exports = { 'query': { - 'bool': { - 'must': [ - { - 'match': { - 'name.default': { - 'analyzer': 'peliasQueryFullToken', - 'boost': 1, - 'query': 'test' - } - } - } - ], - 'should': [ - { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } - } - }, - { - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' + 'function_score': { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } } - } - }, - 'functions': [ - { - 'weight': 2, - 'linear': { - 'center_point': { - 'origin': { - 'lat': 29.49136, - 'lon': -82.50622 - }, - 'offset': '0km', - 'scale': '50km', - 'decay': 0.5 + ] + } + }, + 'filter': { + 'bool': { + 'must': [ + { + 'terms': { + 'layer': [ + 'test' + ] } } - } - ], - 'score_mode': 'avg', - 'boost_mode': 'replace' + ] + } } - }, + } + }, + 'max_boost': 20, + 'functions': [ { - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } - } - }, - 'max_boost': 20, - 'functions': [ - { - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - } - ], - 'score_mode': 'first', - 'boost_mode': 'replace' - } + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 }, { - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } - } - }, - 'max_boost': 20, - 'functions': [ - { - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 - } - ], - 'score_mode': 'first', - 'boost_mode': 'replace' - } + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 } ], - 'filter': [ - { - 'terms': { - 'layer': [ - 'test' - ] - } - } - ] + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, 'size': 10, 'track_scores': true, 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, '_score' ] }; diff --git a/test/unit/fixture/search_linguistic_viewport_min_diagonal.js b/test/unit/fixture/search_linguistic_viewport_min_diagonal.js index e5dbb862..caa4aefa 100644 --- a/test/unit/fixture/search_linguistic_viewport_min_diagonal.js +++ b/test/unit/fixture/search_linguistic_viewport_min_diagonal.js @@ -1,128 +1,83 @@ module.exports = { 'query': { - 'filtered': { + 'function_score': { 'query': { - 'bool': { - 'must': [ - { - 'match': { - 'name.default': { - 'analyzer': 'peliasQueryFullToken', - 'boost': 1, - 'query': 'test' - } - } - } - ], - 'should': [ - { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } - } - }, - { - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } - } - }, - 'functions': [ - { - 'weight': 2, - 'linear': { - 'center_point': { - 'origin': { - 'lat': 28.49136, - 'lon': -87.50623 - }, - 'offset': '0km', - 'scale': '1km', - 'decay': 0.5 + 'filtered': { + 'query': { + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' } } } - ], - 'score_mode': 'avg', - 'boost_mode': 'replace' - } - }, - { - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } - } - }, - 'max_boost': 20, - 'functions': [ - { - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - } - ], - 'score_mode': 'first', - 'boost_mode': 'replace' - } - }, - { - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2, - 'query': 'test' - } - } - }, - 'max_boost': 20, - 'functions': [ - { - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 + } + ] + } + }, + 'filter': { + 'bool': { + 'must': [ + { + 'terms': { + 'layer': [ + 'test' + ] } - ], - 'score_mode': 'first', - 'boost_mode': 'replace' - } + } + ] } - ] + } } - } + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + } + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, 'size': 10, 'track_scores': true, 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, '_score' ] }; diff --git a/test/unit/fixture/search_partial_address.js b/test/unit/fixture/search_partial_address_original.js similarity index 100% rename from test/unit/fixture/search_partial_address.js rename to test/unit/fixture/search_partial_address_original.js diff --git a/test/unit/fixture/search_regions_address.js b/test/unit/fixture/search_regions_address_original.js similarity index 100% rename from test/unit/fixture/search_regions_address.js rename to test/unit/fixture/search_regions_address_original.js diff --git a/test/unit/fixture/search_with_category_filtering.js b/test/unit/fixture/search_with_category_filtering.js index ca1f26bb..9913b19c 100644 --- a/test/unit/fixture/search_with_category_filtering.js +++ b/test/unit/fixture/search_with_category_filtering.js @@ -1,86 +1,84 @@ module.exports = { 'query': { - 'bool': { - 'must': [{ - 'match': { - 'name.default': { - 'query': 'test', - 'boost': 1, - 'analyzer': 'peliasQueryFullToken' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2 - } - } - }, { - 'function_score': { + 'function_score': { + 'query': { + 'filtered': { 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } + } + ] } }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - }] - } - }, { - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'filter': { + 'bool': { + 'must': [ + { + 'terms': { + 'category': [ + 'retail', + 'food' + ] + } + } + ] } - }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 - }] + } } - }], - 'filter': [{ - 'terms': { - 'category': ['retail', 'food'] + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 } - }] + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, 'size': 20, 'track_scores': true, 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, '_score' ] }; diff --git a/test/unit/fixture/search_with_category_filtering_original.js b/test/unit/fixture/search_with_category_filtering_original.js new file mode 100644 index 00000000..ca1f26bb --- /dev/null +++ b/test/unit/fixture/search_with_category_filtering_original.js @@ -0,0 +1,86 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'name.default': { + 'query': 'test', + 'boost': 1, + 'analyzer': 'peliasQueryFullToken' + } + } + }], + 'should': [{ + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2 + } + } + }, { + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + }, { + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + }] + } + }], + 'filter': [{ + 'terms': { + 'category': ['retail', 'food'] + } + }] + } + }, + 'size': 20, + 'track_scores': true, + 'sort': [ + '_score' + ] +}; diff --git a/test/unit/fixture/search_with_source_filtering.js b/test/unit/fixture/search_with_source_filtering.js index 24da9468..78889325 100644 --- a/test/unit/fixture/search_with_source_filtering.js +++ b/test/unit/fixture/search_with_source_filtering.js @@ -1,85 +1,83 @@ - module.exports = { 'query': { - 'bool': { - 'must': [{ - 'match': { - 'name.default': { - 'query': 'test', - 'boost': 1, - 'analyzer': 'peliasQueryFullToken' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'boost': 1, - 'slop': 2 - } - } - },{ - 'function_score': { + 'function_score': { + 'query': { + 'filtered': { 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'bool': { + 'should': [ + { + 'bool': { + '_name': 'fallback.street', + 'must': [ + { + 'match_phrase': { + 'address_parts.street': 'street value' + } + } + ], + 'should': [], + 'filter': { + 'term': { + 'layer': 'street' + } + } + } + } + ] } }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'popularity', - 'missing': 1 - }, - 'weight': 1 - }] - } - },{ - 'function_score': { - 'query': { - 'match': { - 'phrase.default': { - 'query': 'test', - 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 - } + 'filter': { + 'bool': { + 'must': [ + { + 'terms': { + 'source': [ + 'test_source' + ] + } + } + ] } - }, - 'max_boost': 20, - 'score_mode': 'first', - 'boost_mode': 'replace', - 'functions': [{ - 'field_value_factor': { - 'modifier': 'log1p', - 'field': 'population', - 'missing': 1 - }, - 'weight': 2 - }] + } } - }], - 'filter': [{ - 'terms': { - 'source': ['test_source'] + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }, + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 } - }] + ], + 'score_mode': 'avg', + 'boost_mode': 'multiply' } }, - 'sort': [ '_score' ], 'size': 20, - 'track_scores': true + 'track_scores': true, + 'sort': [ + { + 'population': { + 'order': 'desc' + } + }, + { + 'popularity': { + 'order': 'desc' + } + }, + '_score' + ] }; diff --git a/test/unit/fixture/search_with_source_filtering_original.js b/test/unit/fixture/search_with_source_filtering_original.js new file mode 100644 index 00000000..24da9468 --- /dev/null +++ b/test/unit/fixture/search_with_source_filtering_original.js @@ -0,0 +1,85 @@ + +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'name.default': { + 'query': 'test', + 'boost': 1, + 'analyzer': 'peliasQueryFullToken' + } + } + }], + 'should': [{ + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2 + } + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match': { + 'phrase.default': { + 'query': 'test', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'slop': 2, + 'boost': 1 + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 2 + }] + } + }], + 'filter': [{ + 'terms': { + 'source': ['test_source'] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/helper/diffPlaces.js b/test/unit/helper/diffPlaces.js new file mode 100644 index 00000000..a7dd692d --- /dev/null +++ b/test/unit/helper/diffPlaces.js @@ -0,0 +1,180 @@ +var isDifferent= require('../../../helper/diffPlaces').isDifferent; + +module.exports.tests = {}; + +module.exports.tests.dedupe = function(test, common) { + + test('match same object', function(t) { + var item1 = { + 'parent': { + 'country': [ 'United States' ], + 'county': [ 'Otsego County' ], + 'region_a': [ 'NY' ], + 'localadmin': [ 'Cherry Valley' ], + 'county_id': [ '102082399' ], + 'localadmin_id': [ '404522887' ], + 'country_a': [ 'USA' ], + 'region_id': [ '85688543' ], + 'locality': [ 'Cherry Valley' ], + 'locality_id': [ '85978799' ], + 'region': [ 'New York' ], + 'country_id': [ '85633793' ] + }, + 'name': { + 'default': '1 Main Street' + }, + 'address_parts': { + 'number': '1', + 'street': 'Main Street' + }, + 'layer': 'address' + }; + + t.false(isDifferent(item1, item1), 'should be the same'); + t.end(); + }); + + test('catch diff layers', function(t) { + var item1 = { 'layer': 'address' }; + var item2 = { 'layer': 'venue' }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('catch diff parent', function(t) { + var item1 = { + 'layer': 'same', + 'parent': { + 'country_id': '12345' + } + }; + var item2 = { + 'layer': 'same', + 'parent': { + 'country_id': '54321' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('catch diff name', function(t) { + var item1 = { + 'name': { + 'default': '1 Main St' + } + }; + var item2 = { + 'name': { + 'default': '1 Broad St' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('match diff capitalization in name', function(t) { + var item1 = { + 'name': { + 'default': '1 MAIN ST' + } + }; + var item2 = { + 'name': { + 'default': '1 Main St' + } + }; + + t.false(isDifferent(item1, item2), 'should be the same'); + t.end(); + }); + + test('do not handle expansions', function(t) { + // we currently don't handle expansions and abbreviations and + // this is a test waiting to be updated as soon as we fix it + + var item1 = { + 'name': { + 'default': '1 Main Street' + } + }; + var item2 = { + 'name': { + 'default': '1 Main St' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('missing names in other langs should not be a diff', function(t) { + var item1 = { + 'name': { + 'default': 'Moscow', + 'rus': 'Москва' + } + }; + var item2 = { + 'name': { + 'default': 'Moscow' + } + }; + + t.false(isDifferent(item1, item2), 'should be the same'); + t.end(); + }); + + test('catch diff address', function(t) { + var item1 = { + 'address_parts': { + 'number': '1', + 'street': 'Main Street', + 'zip': '90210' + } + }; + var item2 = { + 'address_parts': { + 'number': '2', + 'street': 'Main Street', + 'zip': '90210' + } + }; + + t.true(isDifferent(item1, item2), 'should be different'); + t.end(); + }); + + test('catch diff address', function(t) { + var item1 = { + 'address_parts': { + 'number': '1', + 'street': 'Main Street', + 'zip': '90210' + } + }; + var item2 = { + 'address_parts': { + 'number': '1', + 'street': 'Main Street' + } + }; + + t.false(isDifferent(item1, item2), 'should be the same'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('[helper] diffPlaces: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/middleware/accuracy.js b/test/unit/middleware/accuracy.js new file mode 100644 index 00000000..13c14431 --- /dev/null +++ b/test/unit/middleware/accuracy.js @@ -0,0 +1,90 @@ +var accuracy = require('../../../middleware/accuracy')(); + +module.exports.tests = {}; + +module.exports.tests.accuracy = function(test, common) { + + test('empty res and req should not throw exception', function(t) { + function testIt() { + accuracy({}, {}, function() {}); + } + + t.doesNotThrow(testIt, 'an exception should not have been thrown'); + t.end(); + }); + + test('res.results without parsed_text should not throw exception', function(t) { + var res = { + data: [{ + layer: 'venue' + }] + }; + + accuracy({}, res, function() { + t.equal(res.data[0].accuracy, 'point', 'accuracy was set'); + t.end(); + }); + }); + + test('venue should have accuracy set to point', function(t) { + var res = { + data: [{ + layer: 'venue' + }] + }; + + accuracy({}, res, function() { + t.equal(res.data[0].accuracy, 'point', 'accuracy was set'); + t.end(); + }); + }); + + test('address should have accuracy set to point', function(t) { + var res = { + data: [{ + layer: 'address' + }] + }; + + accuracy({}, res, function() { + t.equal(res.data[0].accuracy, 'point', 'accuracy was set'); + t.end(); + }); + }); + + test('region should have accuracy set to centroid', function(t) { + var res = { + data: [{ + layer: 'region' + }] + }; + + accuracy({}, res, function() { + t.equal(res.data[0].accuracy, 'centroid', 'accuracy was set'); + t.end(); + }); + }); + + test('street should have accuracy set to centroid', function(t) { + var res = { + data: [{ + layer: 'street' + }] + }; + + accuracy({}, res, function() { + t.equal(res.data[0].accuracy, 'centroid', 'accuracy was set'); + t.end(); + }); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('[middleware] confidenceScore: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/middleware/confidenceScore.js b/test/unit/middleware/confidenceScore.js index 7d6ba87d..a0b4de6f 100644 --- a/test/unit/middleware/confidenceScore.js +++ b/test/unit/middleware/confidenceScore.js @@ -46,7 +46,8 @@ module.exports.tests.confidenceScore = function(test, common) { } }], meta: { - scores: [10] + scores: [10], + query_type: 'original' } }; @@ -86,7 +87,10 @@ module.exports.tests.confidenceScore = function(test, common) { county: ['city2'] } }], - meta: {scores: [10]} + meta: { + scores: [10], + query_type: 'original' + } }; confidenceScore(req, res, function() {}); @@ -119,13 +123,52 @@ module.exports.tests.confidenceScore = function(test, common) { county: ['city1'] } }], - meta: {scores: [10]} + meta: { + scores: [10], + query_type: 'original' + } }; confidenceScore(req, res, function() {}); t.equal(res.data[0].confidence, 0.28, 'score was set'); t.end(); }); + + test('should only work for original query_type', function(t) { + var req = { + clean: { + text: '123 Main St, City, NM', + parsed_text: { + number: 123, + street: 'Main St', + state: 'NM' + } + } + }; + var res = { + data: [{ + _score: 10, + found: true, + value: 1, + center_point: { lat: 100.1, lon: -50.5 }, + name: { default: 'test name1' }, + parent: { + country: ['country1'], + region: undefined, + region_a: undefined, + county: ['city1'] + } + }], + meta: { + scores: [10], + query_type: 'fallback' + } + }; + + confidenceScore(req, res, function() {}); + t.false(res.data[0].hasOwnProperty('confidence'), 'score was not set'); + t.end(); + }); }; module.exports.all = function (tape, common) { diff --git a/test/unit/middleware/confidenceScoreFallback.js b/test/unit/middleware/confidenceScoreFallback.js new file mode 100644 index 00000000..5fcd03de --- /dev/null +++ b/test/unit/middleware/confidenceScoreFallback.js @@ -0,0 +1,250 @@ +var confidenceScore = require('../../../middleware/confidenceScoreFallback')(); + +module.exports.tests = {}; + +module.exports.tests.confidenceScore = function(test, common) { + + test('empty res and req should not throw exception', function(t) { + function testIt() { + confidenceScore({}, {}, function() {}); + } + + t.doesNotThrow(testIt, 'an exception should not have been thrown'); + t.end(); + }); + + test('res.results without parsed_text should not throw exception', function(t) { + var req = {}; + var res = { + data: [{ + name: 'foo' + }], + meta: [10] + }; + + function testIt() { + confidenceScore(req, res, function() {}); + } + + t.doesNotThrow(testIt, 'an exception should not have been thrown'); + t.end(); + }); + + test('hit without address should not error', function(t) { + var req = { + clean: { + text: 'test name3', + parsed_text: { + postalcode: 12345 + } + } + }; + var res = { + data: [{ + name: { + default: 'foo' + } + }], + meta: { + scores: [10], + query_type: 'original' + } + }; + + function testIt() { + confidenceScore(req, res, function() {}); + } + + t.doesNotThrow(testIt, 'an exception should not have been thrown with no address'); + t.end(); + }); + + + test('res.results without parsed_text should not throw exception', function(t) { + var req = { + clean: { text: 'test name1' } + }; + var res = { + data: [{ + _score: 10, + found: true, + value: 1, + center_point: { lat: 100.1, lon: -50.5 }, + name: { default: 'test name1' }, + parent: { + country: ['country1'], + region: ['state1'], + county: ['city1'] + } + }, { + _score: 20, + value: 2, + center_point: { lat: 100.2, lon: -51.5 }, + name: { default: 'test name2' }, + parent: { + country: ['country2'], + region: ['state2'], + county: ['city2'] + } + }], + meta: { + scores: [10], + query_type: 'fallback' + } + }; + + confidenceScore(req, res, function() {}); + t.equal(res.data[0].confidence, 0.1, 'score was set'); + t.end(); + }); + + test('no fallback addresses should have max score', function(t) { + var req = { + clean: { + text: '123 Main St, City, NM', + parsed_text: { + number: 123, + street: 'Main St', + state: 'NM' + } + } + }; + var res = { + data: [{ + _score: 10, + found: true, + value: 1, + layer: 'address', + center_point: { lat: 100.1, lon: -50.5 }, + name: { default: 'test name1' }, + parent: { + country: ['country1'], + region: ['region1'], + county: ['city1'] + } + }], + meta: { + scores: [10], + query_type: 'fallback' + } + }; + + confidenceScore(req, res, function() {}); + t.equal(res.data[0].confidence, 1.0, 'max score was set'); + t.end(); + }); + + test('no fallback street query should have max score', function(t) { + var req = { + clean: { + text: 'Main St, City, NM', + parsed_text: { + street: 'Main St', + state: 'NM' + } + } + }; + var res = { + data: [{ + _score: 10, + found: true, + value: 1, + layer: 'street', + center_point: { lat: 100.1, lon: -50.5 }, + name: { default: 'test name1' }, + parent: { + country: ['country1'], + region: ['region1'], + county: ['city1'] + } + }], + meta: { + scores: [10], + query_type: 'fallback' + } + }; + + confidenceScore(req, res, function() {}); + t.equal(res.data[0].confidence, 1.0, 'max score was set'); + t.end(); + }); + + test('fallback to locality should have score deduction', function(t) { + var req = { + clean: { + text: '123 Main St, City, NM', + parsed_text: { + number: 123, + street: 'Main St', + state: 'NM' + } + } + }; + var res = { + data: [{ + _score: 10, + found: true, + value: 1, + layer: 'locality', + center_point: { lat: 100.1, lon: -50.5 }, + name: { default: 'test name1' }, + parent: { + country: ['country1'] + } + }], + meta: { + scores: [10], + query_type: 'fallback' + } + }; + + confidenceScore(req, res, function() {}); + t.equal(res.data[0].confidence, 0.6, 'score was set'); + t.end(); + }); + + test('fallback to country should have score deduction', function(t) { + var req = { + clean: { + text: '123 Main St, City, NM, USA', + parsed_text: { + number: 123, + street: 'Main St', + state: 'NM', + country: 'USA' + } + } + }; + var res = { + data: [{ + _score: 10, + found: true, + value: 1, + layer: 'country', + center_point: { lat: 100.1, lon: -50.5 }, + name: { default: 'test name1' }, + parent: { + country: ['country1'] + } + }], + meta: { + scores: [10], + query_type: 'fallback' + } + }; + + confidenceScore(req, res, function() {}); + t.equal(res.data[0].confidence, 0.1, 'score was set'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('[middleware] confidenceScore: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/middleware/dedupe.js b/test/unit/middleware/dedupe.js index ad553f9c..b8100955 100644 --- a/test/unit/middleware/dedupe.js +++ b/test/unit/middleware/dedupe.js @@ -16,7 +16,7 @@ module.exports.tests.dedupe = function(test, common) { data: data }; - var expectedCount = 9; + var expectedCount = 8; dedupe(req, res, function () { t.equal(res.data.length, expectedCount, 'results have fewer items than before'); t.end(); diff --git a/test/unit/middleware/trimByGranularity.js b/test/unit/middleware/trimByGranularity.js new file mode 100644 index 00000000..1eb65206 --- /dev/null +++ b/test/unit/middleware/trimByGranularity.js @@ -0,0 +1,431 @@ +var trimByGranularity = require('../../../middleware/trimByGranularity')(); + +module.exports.tests = {}; + +module.exports.tests.trimByGranularity = function(test, common) { + test('empty res and req should not throw exception', function(t) { + function testIt() { + trimByGranularity({}, {}, function() {}); + } + + t.doesNotThrow(testIt, 'an exception should not have been thrown'); + t.end(); + }); + + test('all records with fallback.* matched_queries name should retain only venues when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'venue 1', _matched_queries: ['fallback.venue'] }, + { name: 'venue 2', _matched_queries: ['fallback.venue'] }, + { name: 'address 1', _matched_queries: ['fallback.address'] }, + { name: 'street 1', _matched_queries: ['fallback.street'] }, + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'venue 1', _matched_queries: ['fallback.venue'] }, + { name: 'venue 2', _matched_queries: ['fallback.venue'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only venue records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only addresses when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'address 1', _matched_queries: ['fallback.address'] }, + { name: 'address 2', _matched_queries: ['fallback.address'] }, + { name: 'street 1', _matched_queries: ['fallback.street'] }, + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'address 1', _matched_queries: ['fallback.address'] }, + { name: 'address 2', _matched_queries: ['fallback.address'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only address records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only streets when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'street 1', _matched_queries: ['fallback.street'] }, + { name: 'street 2', _matched_queries: ['fallback.street'] }, + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'street 1', _matched_queries: ['fallback.street'] }, + { name: 'street 2', _matched_queries: ['fallback.street'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only street records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only neighbourhoods when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'neighbourhood 2', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'neighbourhood 1', _matched_queries: ['fallback.neighbourhood'] }, + { name: 'neighbourhood 2', _matched_queries: ['fallback.neighbourhood'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only neighbourhood records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only localities when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'locality 2', _matched_queries: ['fallback.locality'] }, + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'locality 1', _matched_queries: ['fallback.locality'] }, + { name: 'locality 2', _matched_queries: ['fallback.locality'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only locality records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only localadmins when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'localadmin 2', _matched_queries: ['fallback.localadmin'] }, + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'localadmin 1', _matched_queries: ['fallback.localadmin'] }, + { name: 'localadmin 2', _matched_queries: ['fallback.localadmin'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only localadmin records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only counties when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'county 2', _matched_queries: ['fallback.county'] }, + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'county 1', _matched_queries: ['fallback.county'] }, + { name: 'county 2', _matched_queries: ['fallback.county'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only county records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only macrocounties when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'macrocounty 2', _matched_queries: ['fallback.macrocounty'] }, + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'macrocounty 1', _matched_queries: ['fallback.macrocounty'] }, + { name: 'macrocounty 2', _matched_queries: ['fallback.macrocounty'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only macrocounty records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only regions when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'region 2', _matched_queries: ['fallback.region'] }, + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'region 1', _matched_queries: ['fallback.region'] }, + { name: 'region 2', _matched_queries: ['fallback.region'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only region records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only macroregions when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'macroregion 2', _matched_queries: ['fallback.macroregion'] }, + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'macroregion 1', _matched_queries: ['fallback.macroregion'] }, + { name: 'macroregion 2', _matched_queries: ['fallback.macroregion'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only macroregion records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only dependencies when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'dependency 2', _matched_queries: ['fallback.dependency'] }, + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'dependency 1', _matched_queries: ['fallback.dependency'] }, + { name: 'dependency 2', _matched_queries: ['fallback.dependency'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only dependency records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('all records with fallback.* matched_queries name should retain only countries when they are most granular', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'country 2', _matched_queries: ['fallback.country'] }, + { name: 'unknown', _matched_queries: ['fallback.unknown'] } + ] + }; + + var expected_data = [ + { name: 'country 1', _matched_queries: ['fallback.country'] }, + { name: 'country 2', _matched_queries: ['fallback.country'] }, + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'only country records should be here'); + t.end(); + }); + } + + testIt(); + }); + + test('presence of any non-fallback.* named queries should not trim', function(t) { + var req = { clean: {} }; + + var res = { + data: [ + { name: 'region', _matched_queries: ['fallback.region'] }, + { name: 'country', _matched_queries: ['fallback.country'] }, + { name: 'result with non-named query' } + ] + }; + + var expected_data = [ + { name: 'region', _matched_queries: ['fallback.region'] }, + { name: 'country', _matched_queries: ['fallback.country'] }, + { name: 'result with non-named query' } + ]; + + function testIt() { + trimByGranularity(req, res, function() { + t.deepEquals(res.data, expected_data, 'all should results should have been retained'); + t.end(); + }); + } + + testIt(); + + }); + +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('[middleware] trimByGranularity: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/mock/backend.js b/test/unit/mock/backend.js index 7d347a30..739ed2cb 100644 --- a/test/unit/mock/backend.js +++ b/test/unit/mock/backend.js @@ -11,6 +11,7 @@ responses['client/search/ok/1'] = function( cmd, cb ){ _id: 'myid1', _type: 'mytype1', _score: 10, + matched_queries: ['query 1', 'query 2'], _source: { value: 1, center_point: { lat: 100.1, lon: -50.5 }, @@ -21,6 +22,7 @@ responses['client/search/ok/1'] = function( cmd, cb ){ _id: 'myid2', _type: 'mytype2', _score: 20, + matched_queries: ['query 3'], _source: { value: 2, center_point: { lat: 100.2, lon: -51.5 }, diff --git a/test/unit/mock/query.js b/test/unit/mock/query.js index 2a5f21fd..a3209a4d 100644 --- a/test/unit/mock/query.js +++ b/test/unit/mock/query.js @@ -4,7 +4,10 @@ function setup(){ } function query( clean ){ - return clean; + return { + type: 'mock', + body: clean + }; } module.exports = setup; \ No newline at end of file diff --git a/test/unit/query/autocomplete.js b/test/unit/query/autocomplete.js index 437142f3..64b6c447 100644 --- a/test/unit/query/autocomplete.js +++ b/test/unit/query/autocomplete.js @@ -1,4 +1,3 @@ - var generate = require('../../../query/autocomplete'); module.exports.tests = {}; @@ -22,7 +21,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_linguistic_only'); - t.deepEqual(compiled, expected, 'autocomplete_linguistic_only'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_only'); t.end(); }); @@ -37,7 +37,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_linguistic_multiple_tokens'); - t.deepEqual(compiled, expected, 'autocomplete_linguistic_multiple_tokens'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_multiple_tokens'); t.end(); }); @@ -57,7 +58,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_linguistic_with_admin'); - t.deepEqual(compiled, expected, 'autocomplete_linguistic_with_admin'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_with_admin'); t.end(); }); @@ -75,7 +77,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_linguistic_final_token'); - t.deepEqual(compiled, expected, 'autocomplete_linguistic_final_token'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_final_token'); t.end(); }); @@ -92,7 +95,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_linguistic_focus'); - t.deepEqual(compiled, expected, 'autocomplete_linguistic_focus'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_focus'); t.end(); }); @@ -109,7 +113,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_linguistic_focus_null_island'); - t.deepEqual(compiled, expected, 'autocomplete_linguistic_focus_null_island'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_focus_null_island'); t.end(); }); @@ -125,7 +130,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_with_source_filtering'); - t.deepEqual(compiled, expected, 'valid autocomplete query with source filtering'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid autocomplete query with source filtering'); t.end(); }); @@ -141,7 +147,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_with_layer_filtering'); - t.deepEqual(compiled, expected, 'valid autocomplete query with layer filtering'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid autocomplete query with layer filtering'); t.end(); }); @@ -161,7 +168,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_single_character_street'); - t.deepEqual(compiled, expected, 'autocomplete_single_character_street'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_single_character_street'); t.end(); }); @@ -177,7 +185,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/autocomplete_boundary_country'); - t.deepEqual(compiled, expected, 'autocomplete: valid boundary.country query'); + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete: valid boundary.country query'); t.end(); }); }; diff --git a/test/unit/query/reverse.js b/test/unit/query/reverse.js index 03985fbd..24ada46a 100644 --- a/test/unit/query/reverse.js +++ b/test/unit/query/reverse.js @@ -22,7 +22,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/reverse_standard'); - t.deepEqual(compiled, expected, 'reverse_standard'); + t.deepEqual(compiled.type, 'reverse', 'query type set'); + t.deepEqual(compiled.body, expected, 'reverse_standard'); t.end(); }); @@ -38,7 +39,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/reverse_null_island'); - t.deepEqual(compiled, expected, 'reverse_null_island'); + t.deepEqual(compiled.type, 'reverse', 'query type set'); + t.deepEqual(compiled.body, expected, 'reverse_null_island'); t.end(); }); @@ -54,7 +56,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = '123km'; - t.deepEqual(compiled.query.bool.filter[0].geo_distance.distance, expected, 'distance set to boundary circle radius'); + t.deepEqual(compiled.type, 'reverse', 'query type set'); + t.deepEqual(compiled.body.query.bool.filter[0].geo_distance.distance, expected, 'distance set to boundary circle radius'); t.end(); }); @@ -71,8 +74,9 @@ module.exports.tests.query = function(test, common) { // this should not equal `point.lat` and `point.lon` as it was explitely specified var expected = { lat: clean['boundary.circle.lat'], lon: clean['boundary.circle.lon'] }; - var centroid = compiled.query.bool.filter[0].geo_distance.center_point; + var centroid = compiled.body.query.bool.filter[0].geo_distance.center_point; + t.deepEqual(compiled.type, 'reverse', 'query type set'); t.deepEqual(centroid, expected, 'reverse: boundary.circle/lon overrides point.lat/lon'); t.end(); }); @@ -87,7 +91,7 @@ module.exports.tests.query = function(test, common) { }); var compiled = JSON.parse( JSON.stringify( query ) ); - t.equal( compiled.size, expected[index], 'valid reverse query for size: '+ size); + t.equal( compiled.body.size, expected[index], 'valid reverse query for size: '+ size); }); t.end(); }); @@ -105,7 +109,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/reverse_with_boundary_country'); - t.deepEqual(compiled, expected, 'valid reverse query with boundary.country'); + t.deepEqual(compiled.type, 'reverse', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid reverse query with boundary.country'); t.end(); }); @@ -122,7 +127,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/reverse_with_source_filtering'); - t.deepEqual(compiled, expected, 'valid reverse query with source filtering'); + t.deepEqual(compiled.type, 'reverse', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid reverse query with source filtering'); t.end(); }); @@ -139,7 +145,8 @@ module.exports.tests.query = function(test, common) { var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/reverse_with_layer_filtering'); - t.deepEqual(compiled, expected, 'valid reverse query with source filtering'); + t.deepEqual(compiled.type, 'reverse', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid reverse query with source filtering'); t.end(); }); }; diff --git a/test/unit/query/search.js b/test/unit/query/search.js index 5abb35f7..36fbd17d 100644 --- a/test/unit/query/search.js +++ b/test/unit/query/search.js @@ -1,4 +1,5 @@ var generate = require('../../../query/search'); +var fs = require('fs'); module.exports.tests = {}; @@ -11,175 +12,262 @@ module.exports.tests.interface = function(test, common) { module.exports.tests.query = function(test, common) { test('valid search + focus + bbox', function(t) { - var query = generate({ - text: 'test', querySize: 10, + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', + querySize: 10, 'focus.point.lat': 29.49136, 'focus.point.lon': -82.50622, 'boundary.rect.min_lat': 47.47, 'boundary.rect.max_lon': -61.84, 'boundary.rect.max_lat': 11.51, 'boundary.rect.min_lon': -103.16, layers: ['test'] - }); + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/search_linguistic_focus_bbox'); - t.deepEqual(compiled, expected, 'search_linguistic_focus_bbox'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_focus_bbox'); t.end(); }); test('valid search + bbox', function(t) { - var query = generate({ - text: 'test', querySize: 10, + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', + querySize: 10, 'boundary.rect.min_lat': 47.47, 'boundary.rect.max_lon': -61.84, 'boundary.rect.max_lat': 11.51, 'boundary.rect.min_lon': -103.16, layers: ['test'] - }); + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/search_linguistic_bbox'); - t.deepEqual(compiled, expected, 'search_linguistic_bbox'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_bbox'); t.end(); }); test('valid lingustic-only search', function(t) { - var query = generate({ + var clean = { + parsed_text: { + street: 'street value' + }, text: 'test', querySize: 10, layers: ['test'] - }); + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/search_linguistic_only'); - t.deepEqual(compiled, expected, 'search_linguistic_only'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_only'); t.end(); }); test('search search + focus', function(t) { - var query = generate({ + var clean = { + parsed_text: { + street: 'street value' + }, text: 'test', querySize: 10, 'focus.point.lat': 29.49136, 'focus.point.lon': -82.50622, layers: ['test'] - }); + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/search_linguistic_focus'); - t.deepEqual(compiled, expected, 'search_linguistic_focus'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_focus'); t.end(); }); - test('search search + focus on null island', function(t) { - var query = generate({ + test('search search + viewport', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, text: 'test', querySize: 10, - 'focus.point.lat': 0, 'focus.point.lon': 0, + 'focus.viewport.min_lat': 28.49136, + 'focus.viewport.max_lat': 30.49136, + 'focus.viewport.min_lon': -87.50622, + 'focus.viewport.max_lon': -77.50622, layers: ['test'] - }); + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); - var expected = require('../fixture/search_linguistic_focus_null_island'); + var expected = require('../fixture/search_linguistic_viewport'); - t.deepEqual(compiled, expected, 'search_linguistic_focus_null_island'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_viewport'); t.end(); }); - test('valid query with a full valid address', function(t) { - var query = generate({ text: '123 main st new york ny 10010 US', - layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ], - querySize: 10, + // viewport scale sizing currently disabled. + // ref: https://github.com/pelias/api/pull/388 + test('search with viewport diagonal < 1km should set scale to 1km', function(t) { + var clean = { parsed_text: { - number: '123', - street: 'main st', - state: 'NY', - country: 'USA', - postalcode: '10010', - regions: [ 'new york' ] - } - }); + street: 'street value' + }, + text: 'test', querySize: 10, + 'focus.viewport.min_lat': 28.49135, + 'focus.viewport.max_lat': 28.49137, + 'focus.viewport.min_lon': -87.50622, + 'focus.viewport.max_lon': -87.50624, + layers: ['test'] + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); - var expected = require('../fixture/search_full_address'); + var expected = require('../fixture/search_linguistic_viewport_min_diagonal'); - t.deepEqual(compiled, expected, 'search_full_address'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid search query'); t.end(); }); - test('valid query with partial address', function(t) { - var query = generate({ text: 'soho grand, new york', - layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ], - querySize: 10, - parsed_text: { name: 'soho grand', - state: 'NY', - regions: [ 'soho grand' ], - admin_parts: 'new york' - } - }); + test('search search + focus on null island', function(t) { + var clean = { + parsed_text: { + street: 'street value' + }, + text: 'test', querySize: 10, + 'focus.point.lat': 0, 'focus.point.lon': 0, + layers: ['test'] + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); - var expected = require('../fixture/search_partial_address'); + var expected = require('../fixture/search_linguistic_focus_null_island'); - t.deepEqual(compiled, expected, 'search_partial_address'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_focus_null_island'); t.end(); }); - test('valid query with regions in address', function(t) { - var query = generate({ text: '1 water st manhattan ny', - layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ], - querySize: 10, - parsed_text: { number: '1', - street: 'water st', - state: 'NY', - regions: [ 'manhattan' ] - }, - }); + test('parsed_text with all fields should use FallbackQuery', function(t) { + var clean = { + parsed_text: { + query: 'query value', + category: 'category value', + number: 'number value', + street: 'street value', + neighbourhood: 'neighbourhood value', + borough: 'borough value', + postalcode: 'postalcode value', + city: 'city value', + county: 'county value', + state: 'state value', + country: 'country value' + } + }; - var compiled = JSON.parse( JSON.stringify( query ) ); - var expected = require('../fixture/search_regions_address'); + var query = generate(clean); + + var compiled = JSON.parse(JSON.stringify(query)); + var expected = require('../fixture/search_fallback'); - t.deepEqual(compiled, expected, 'search_regions_address'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'fallbackQuery'); t.end(); + + }); + + test('parsed_text with single admin field should return undefined', function(t) { + ['neighbourhood', 'borough', 'city', 'county', 'state', 'country'].forEach(function(placeType) { + var clean = { + parsed_text: {} + }; + + clean.parsed_text[placeType] = placeType + ' value'; + + var query = generate(clean); + + t.equals(query, undefined, 'geodisambiguationQuery'); + + }); + + t.end(); + }); test('valid boundary.country search', function(t) { - var query = generate({ + var clean = { + parsed_text: { + street: 'street value' + }, text: 'test', querySize: 10, layers: ['test'], 'boundary.country': 'ABC' - }); + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/search_boundary_country'); - t.deepEqual(compiled, expected, 'search: valid boundary.country query'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search: valid boundary.country query'); t.end(); }); test('valid sources filter', function(t) { - var query = generate({ + var clean = { + parsed_text: { + street: 'street value' + }, 'text': 'test', 'sources': ['test_source'] - }); + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/search_with_source_filtering'); - t.deepEqual(compiled, expected, 'search: valid search query with source filtering'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'search: valid search query with source filtering'); t.end(); }); test('categories filter', function(t) { - var query = generate({ + var clean = { + parsed_text: { + street: 'street value' + }, 'text': 'test', 'categories': ['retail','food'] - }); + }; + + var query = generate(clean); var compiled = JSON.parse( JSON.stringify( query ) ); var expected = require('../fixture/search_with_category_filtering'); - t.deepEqual(compiled, expected, 'valid search query with category filtering'); + t.deepEqual(compiled.type, 'fallback', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid search query with category filtering'); t.end(); }); }; diff --git a/test/unit/query/search_original.js b/test/unit/query/search_original.js new file mode 100644 index 00000000..5e035679 --- /dev/null +++ b/test/unit/query/search_original.js @@ -0,0 +1,206 @@ +var generate = require('../../../query/search_original'); + +module.exports.tests = {}; + +module.exports.tests.interface = function(test, common) { + test('valid interface', function(t) { + t.equal(typeof generate, 'function', 'valid function'); + t.end(); + }); +}; + +module.exports.tests.query = function(test, common) { + test('valid search + focus + bbox', function(t) { + var query = generate({ + text: 'test', querySize: 10, + 'focus.point.lat': 29.49136, 'focus.point.lon': -82.50622, + 'boundary.rect.min_lat': 47.47, + 'boundary.rect.max_lon': -61.84, + 'boundary.rect.max_lat': 11.51, + 'boundary.rect.min_lon': -103.16, + layers: ['test'] + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_linguistic_focus_bbox_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_focus_bbox'); + t.end(); + }); + + test('valid search + bbox', function(t) { + var query = generate({ + text: 'test', querySize: 10, + 'boundary.rect.min_lat': 47.47, + 'boundary.rect.max_lon': -61.84, + 'boundary.rect.max_lat': 11.51, + 'boundary.rect.min_lon': -103.16, + layers: ['test'] + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_linguistic_bbox_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_bbox'); + t.end(); + }); + + test('valid lingustic-only search', function(t) { + var query = generate({ + text: 'test', querySize: 10, + layers: ['test'] + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_linguistic_only_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_only'); + t.end(); + }); + + test('search search + focus', function(t) { + var query = generate({ + text: 'test', querySize: 10, + 'focus.point.lat': 29.49136, 'focus.point.lon': -82.50622, + layers: ['test'] + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_linguistic_focus_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_focus'); + t.end(); + }); + + test('search search + focus on null island', function(t) { + var query = generate({ + text: 'test', querySize: 10, + 'focus.point.lat': 0, 'focus.point.lon': 0, + layers: ['test'] + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_linguistic_focus_null_island_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_linguistic_focus_null_island'); + t.end(); + }); + + test('valid query with a full valid address', function(t) { + var query = generate({ text: '123 main st new york ny 10010 US', + layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ], + querySize: 10, + parsed_text: { + number: '123', + street: 'main st', + state: 'NY', + country: 'USA', + postalcode: '10010', + regions: [ 'new york' ] + } + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_full_address_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_full_address'); + t.end(); + }); + + test('valid query with partial address', function(t) { + var query = generate({ text: 'soho grand, new york', + layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ], + querySize: 10, + parsed_text: { name: 'soho grand', + state: 'NY', + regions: [ 'soho grand' ], + admin_parts: 'new york' + } + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_partial_address_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_partial_address'); + t.end(); + }); + + test('valid query with regions in address', function(t) { + var query = generate({ text: '1 water st manhattan ny', + layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ], + querySize: 10, + parsed_text: { number: '1', + street: 'water st', + state: 'NY', + regions: [ 'manhattan' ] + } + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_regions_address_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search_regions_address'); + t.end(); + }); + + test('valid boundary.country search', function(t) { + var query = generate({ + text: 'test', querySize: 10, + layers: ['test'], + 'boundary.country': 'ABC' + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_boundary_country_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search: valid boundary.country query'); + t.end(); + }); + + test('valid sources filter', function(t) { + var query = generate({ + 'text': 'test', + 'sources': ['test_source'] + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_with_source_filtering_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'search: valid search query with source filtering'); + t.end(); + }); + + test('categories filter', function(t) { + var query = generate({ + 'text': 'test', + 'categories': ['retail','food'] + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/search_with_category_filtering_original'); + + t.deepEqual(compiled.type, 'original', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid search query with category filtering'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('search query ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/query/text_parser.js b/test/unit/query/text_parser.js new file mode 100644 index 00000000..34830c7f --- /dev/null +++ b/test/unit/query/text_parser.js @@ -0,0 +1,165 @@ +var VariableStore = require('pelias-query').Vars; +var text_parser = require('../../../query/text_parser'); + +module.exports.tests = {}; + +module.exports.tests.interface = function(test, common) { + test('valid interface', function(t) { + t.equal(typeof text_parser, 'function', 'valid function'); + t.end(); + }); +}; + +module.exports.tests.query = function(test, common) { + test('parsed_text without properties should leave vs properties unset', function(t) { + var parsed_text = {}; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.false(vs.isset('input:query')); + t.false(vs.isset('input:category')); + t.false(vs.isset('input:housenumber')); + t.false(vs.isset('input:street')); + t.false(vs.isset('input:neighbourhood')); + t.false(vs.isset('input:borough')); + t.false(vs.isset('input:postcode')); + t.false(vs.isset('input:locality')); + t.false(vs.isset('input:county')); + t.false(vs.isset('input:region')); + t.false(vs.isset('input:country')); + t.end(); + + }); + + test('parsed_text without properties should leave vs properties unset', function(t) { + var parsed_text = { + query: 'query value', + category: 'category value', + number: 'number value', + street: 'street value', + neighbourhood: 'neighbourhood value', + borough: 'borough value', + postalcode: 'postalcode value', + city: 'city value', + county: 'county value', + state: 'state value', + country: 'country value' + }; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.equals(vs.var('input:query').toString(), 'query value'); + t.equals(vs.var('input:category').toString(), 'category value'); + t.equals(vs.var('input:housenumber').toString(), 'number value'); + t.equals(vs.var('input:street').toString(), 'street value'); + t.equals(vs.var('input:neighbourhood').toString(), 'neighbourhood value'); + t.equals(vs.var('input:borough').toString(), 'borough value'); + t.equals(vs.var('input:postcode').toString(), 'postalcode value'); + t.equals(vs.var('input:locality').toString(), 'city value'); + t.equals(vs.var('input:county').toString(), 'county value'); + t.equals(vs.var('input:region').toString(), 'state value'); + t.equals(vs.var('input:country').toString(), 'country value'); + t.end(); + + }); + +}; + +module.exports.tests.housenumber_special_cases = function(test, common) { + test('numeric query with street but no number should reassign query to housenumber', function(t) { + var parsed_text = { + query: '17', + // no house number set + street: 'street value' + }; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.false(vs.isset('input:query')); + t.equals(vs.var('input:housenumber').toString(), '17'); + t.equals(vs.var('input:street').toString(), 'street value'); + t.end(); + + }); + + test('numeric query with street but without number should not change anything', function(t) { + var parsed_text = { + query: '17', + number: 'housenumber value', + street: 'street value' + // no number or street + }; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.equals(vs.var('input:query').toString(), '17'); + t.equals(vs.var('input:housenumber').toString(), 'housenumber value'); + t.equals(vs.var('input:street').toString(), 'street value'); + t.end(); + + }); + + test('numeric query with number but without street should not change anything', function(t) { + var parsed_text = { + query: '17', + number: 'number value' + // no number or street + }; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.equals(vs.var('input:query').toString(), '17'); + t.equals(vs.var('input:housenumber').toString(), 'number value'); + t.false(vs.isset('input:street')); + t.end(); + + }); + + test('numeric query without street or number should not change anything', function(t) { + var parsed_text = { + query: '17' + // no number or street + }; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.equals(vs.var('input:query').toString(), '17'); + t.false(vs.isset('input:housenumber')); + t.false(vs.isset('input:street')); + t.end(); + + }); + + test('non-numeric query with street but no number should not change anything', function(t) { + var parsed_text = { + query: '13 this is 15 not a number 17', + street: 'street value' + }; + var vs = new VariableStore(); + + text_parser(parsed_text, vs); + + t.equals(vs.var('input:query').toString(), '13 this is 15 not a number 17'); + t.false(vs.isset('input:housenumber')); + t.equals(vs.var('input:street').toString(), 'street value'); + t.end(); + + }); + +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('text_parser ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index de8d770a..be804d97 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -12,6 +12,7 @@ var tests = [ require('./controller/index'), require('./controller/place'), require('./controller/search'), + require('./helper/diffPlaces'), require('./helper/geojsonify'), require('./helper/labelGenerator_examples'), require('./helper/labelGenerator_default'), @@ -24,7 +25,9 @@ var tests = [ require('./helper/type_mapping'), require('./helper/sizeCalculator'), require('./middleware/access_log'), + require('./middleware/accuracy'), require('./middleware/confidenceScore'), + require('./middleware/confidenceScoreFallback'), require('./middleware/confidenceScoreReverse'), require('./middleware/distance'), require('./middleware/localNamingConventions'), @@ -32,12 +35,15 @@ var tests = [ require('./middleware/parseBBox'), require('./middleware/sendJSON'), require('./middleware/normalizeParentIds'), + require('./middleware/trimByGranularity'), require('./query/autocomplete'), require('./query/autocomplete_defaults'), require('./query/search_defaults'), require('./query/reverse_defaults'), require('./query/reverse'), require('./query/search'), + require('./query/search_original'), + require('./query/text_parser'), require('./sanitiser/_boundary_country'), require('./sanitiser/_flag_bool'), require('./sanitiser/_geo_common'), @@ -50,6 +56,7 @@ var tests = [ require('./sanitiser/_sources'), require('./sanitiser/_sources_and_layers'), require('./sanitiser/_text'), + require('./sanitiser/_text_addressit'), require('./sanitiser/_tokenizer'), require('./sanitiser/_deprecate_quattroshapes'), require('./sanitiser/_categories'), @@ -58,10 +65,12 @@ var tests = [ require('./sanitiser/autocomplete'), require('./sanitiser/place'), require('./sanitiser/reverse'), + require('./sanitiser/sanitizeAll'), require('./sanitiser/search'), + require('./sanitiser/search_fallback'), require('./sanitiser/wrap'), require('./service/mget'), - require('./service/search'), + require('./service/search') ]; tests.map(function(t) { diff --git a/test/unit/sanitiser/_sources_and_layers.js b/test/unit/sanitiser/_sources_and_layers.js index b943770c..5b8bf3a2 100644 --- a/test/unit/sanitiser/_sources_and_layers.js +++ b/test/unit/sanitiser/_sources_and_layers.js @@ -73,6 +73,17 @@ test('valid combination', function(t) { t.end(); }); + test('valid combination: wof venues', function(t) { + var raw = {}; + var clean = { sources: ['whosonfirst'], layers: ['venue'] }; + + var messages = sanitize(raw, clean); + + t.equal(messages.errors.length, 0, 'should return no errors'); + t.equal(messages.warnings.length, 0, 'should return no warnings'); + t.end(); + }); + test('valid combination because of multiple sources', function(t) { var raw = {}; var clean = { sources: ['openstreetmap', 'openaddresses'], layers: ['venue'] }; diff --git a/test/unit/sanitiser/_text.js b/test/unit/sanitiser/_text.js index 3868c86c..82dfb5e0 100644 --- a/test/unit/sanitiser/_text.js +++ b/test/unit/sanitiser/_text.js @@ -1,23 +1,147 @@ -var sanitiser = require('../../../sanitiser/_text'); var type_mapping = require('../../../helper/type_mapping'); +var proxyquire = require('proxyquire').noCallThru(); module.exports.tests = {}; module.exports.tests.text_parser = function(test, common) { - test('short input text has admin layers set ', function(t) { + test('non-empty raw.text should call analyzer and set clean.text and clean.parsed_text', function(t) { + var mock_analyzer_response = { + key1: 'value 1', + key2: 'value 2' + }; + + var sanitiser = proxyquire('../../../sanitiser/_text', { + 'pelias-text-analyzer': { parse: function(query) { + return mock_analyzer_response; + } + }}); + + var raw = { + text: 'raw input' + }; + var clean = { + }; + + var expected_clean = { + text: raw.text, + parsed_text: mock_analyzer_response + }; + + var messages = sanitiser(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('empty raw.text should add error message', function(t) { + var sanitiser = proxyquire('../../../sanitiser/_text', { + 'pelias-text-analyzer': { parse: function(query) { + throw new Error('analyzer should not have been called'); + } + }}); + + var raw = { + text: '' + }; + var clean = { + }; + + var expected_clean = { + }; + + var messages = sanitiser(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0'], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('undefined raw.text should add error message', function(t) { + var sanitiser = proxyquire('../../../sanitiser/_text', { + 'pelias-text-analyzer': { parse: function(query) { + throw new Error('analyzer should not have been called'); + } + }}); + var raw = { - text: 'emp' //start of empire state building + text: undefined }; var clean = { }; + var expected_clean = { + }; + + var messages = sanitiser(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0'], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('text_analyzer.parse returning undefined should not overwrite clean.parsed_text', function(t) { + var sanitiser = proxyquire('../../../sanitiser/_text', { + 'pelias-text-analyzer': { parse: function(query) { + return undefined; + } + }}); + + var raw = { + text: 'raw input' + }; + var clean = { + parsed_text: 'original clean.parsed_text' + }; + + var expected_clean = { + text: raw.text, + parsed_text: 'original clean.parsed_text' + }; + var messages = sanitiser(raw, clean); + t.deepEquals(clean, expected_clean); t.deepEquals(messages.errors, [], 'no errors'); t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('text_analyzer.parse returning null should not overwrite clean.parsed_text', function(t) { + var sanitiser = proxyquire('../../../sanitiser/_text', { + 'pelias-text-analyzer': { parse: function(query) { + return null; + } + }}); + + var raw = { + text: 'raw input' + }; + var clean = { + parsed_text: 'original clean.parsed_text' + }; + var expected_clean = { + text: raw.text, + parsed_text: 'original clean.parsed_text' + }; + + var messages = sanitiser(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); t.end(); + }); + }; module.exports.all = function (tape, common) { diff --git a/test/unit/sanitiser/_text_addressit.js b/test/unit/sanitiser/_text_addressit.js new file mode 100644 index 00000000..91e1665a --- /dev/null +++ b/test/unit/sanitiser/_text_addressit.js @@ -0,0 +1,339 @@ +var sanitiser = require('../../../sanitiser/_text_addressit'); +var type_mapping = require('../../../helper/type_mapping'); + +module.exports.tests = {}; + +module.exports.tests.text_parser = function(test, common) { + test('short input text has admin layers set ', function(t) { + var raw = { + text: 'emp' //start of empire state building + }; + var clean = { + }; + + var messages = sanitiser(raw, clean); + + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + + t.end(); + }); + + var usQueries = [ + { name: 'soho', admin_parts: 'new york', state: 'NY' }, + { name: '123 main', admin_parts: 'new york', state: 'NY' } + ]; + + usQueries.forEach(function (query) { + test('naive parsing ' + query, function(t) { + var raw = { + text: query.name + ', ' + query.admin_parts + }; + var clean = {}; + + var expected_clean = { + text: query.name + ', ' + query.admin_parts, + parsed_text: { + name: query.name, + regions: [ query.name ], + admin_parts: query.admin_parts, + state: query.state + } + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + test('naive parsing ' + query + ' without spaces', function(t) { + var raw = { + text: query.name + ',' + query.admin_parts + }; + var clean = {}; + + var expected_clean = { + text: query.name + ',' + query.admin_parts, + parsed_text: { + name: query.name, + regions: [ query.name ], + admin_parts: query.admin_parts, + state: query.state + } + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + }); + + var nonUSQueries = [ + { name: 'chelsea', admin_parts: 'london' }, + ]; + + nonUSQueries.forEach(function (query) { + test('naive parsing ' + query, function(t) { + var raw = { + text: query.name + ', ' + query.admin_parts + }; + var clean = {}; + + var expected_clean = { + text: query.name + ', ' + query.admin_parts, + parsed_text: { + name: query.name, + regions: [ query.name, query.admin_parts ], + admin_parts: query.admin_parts + } + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + test('naive parsing ' + query + ' without spaces', function(t) { + var raw = { + text: query.name + ',' + query.admin_parts + }; + var clean = {}; + + var expected_clean = { + text: query.name + ',' + query.admin_parts, + parsed_text: { + name: query.name, + regions: [ query.name, query.admin_parts ], + admin_parts: query.admin_parts + } + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + }); + + test('query with one token', function (t) { + var raw = { + text: 'yugolsavia' + }; + var clean = {}; + clean.parsed_text = 'this should be removed'; + + var expected_clean = { + text: 'yugolsavia' + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + test('query with two tokens, no numbers', function (t) { + var raw = { + text: 'small town' + }; + var clean = {}; + clean.parsed_text = 'this should be removed'; + + var expected_clean = { + text: 'small town' + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + test('query with two tokens, number first', function (t) { + var raw = { + text: '123 main' + }; + var clean = {}; + clean.parsed_text = 'this should be removed'; + + var expected_clean = { + text: '123 main' + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + test('query with two tokens, number second', function (t) { + var raw = { + text: 'main 123' + }; + var clean = {}; + clean.parsed_text = 'this should be removed'; + + var expected_clean = { + text: 'main 123' + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + test('query with many tokens', function(t) { + var raw = { + text: 'main particle new york' + }; + var clean = {}; + clean.parsed_text = 'this should be removed'; + + var expected_clean = { + text: 'main particle new york', + parsed_text: { + regions: [ 'main particle' ], + state: 'NY' + } + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + test('valid address, house number', function(t) { + var raw = { + text: '123 main st new york ny' + }; + var clean = {}; + + var expected_clean = { + text: '123 main st new york ny', + parsed_text: { + number: '123', + street: 'main st', + state: 'NY', + regions: [ 'new york' ] + } + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + + test('valid address, zipcode', function(t) { + var raw = { + text: '123 main st new york ny 10010' + }; + var clean = {}; + + var expected_clean = { + text: '123 main st new york ny 10010', + parsed_text: { + number: '123', + street: 'main st', + state: 'NY', + postalcode: '10010', + regions: [ 'new york' ] + } + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + }); + + test('valid address with leading 0s in zipcode', function(t) { + var raw = { + text: '339 W Main St, Cheshire, 06410' + }; + var clean = {}; + + var expected_clean = { + text: '339 W Main St, Cheshire, 06410', + parsed_text: { + name: '339 W Main St', + number: '339', + street: 'W Main St', + postalcode: '06410', + regions: [ 'Cheshire' ], + admin_parts: 'Cheshire, 06410' + } + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + }); + + test('valid address without spaces after commas', function(t) { + var raw = { + text: '339 W Main St,Lancaster,PA' + }; + var clean = {}; + + var expected_clean = { + text: '339 W Main St,Lancaster,PA', + parsed_text: { + name: '339 W Main St', + number: '339', + street: 'W Main St', + state: 'PA', + regions: [ 'Lancaster' ], + admin_parts: 'Lancaster, PA' + } + }; + + var messages = sanitiser(raw, clean); + + t.deepEqual(messages, { errors: [], warnings: [] } ); + t.deepEqual(clean, expected_clean); + t.end(); + + }); + +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('SANITISER _text: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/sanitiser/sanitizeAll.js b/test/unit/sanitiser/sanitizeAll.js new file mode 100644 index 00000000..3d78e599 --- /dev/null +++ b/test/unit/sanitiser/sanitizeAll.js @@ -0,0 +1,161 @@ +var sanitizeAll = require('../../../sanitiser/sanitizeAll'); + +module.exports.tests = {}; + +module.exports.tests.all = function(test, common) { + test('req.clean/errors/warnings should be initialized when they are not', function(t) { + var req = {}; + var sanitizers = [ + function() { + req.clean.a = 'first sanitizer'; + return { + errors: ['error 1', 'error 2'], + warnings: ['warning 1', 'warning 2'] + }; + }, + function() { + req.clean.b = 'second sanitizer'; + return { + errors: ['error 3'], + warnings: ['warning 3'] + }; + } + ]; + + var expected_req = { + clean: { + a: 'first sanitizer', + b: 'second sanitizer' + }, + errors: ['error 1', 'error 2', 'error 3'], + warnings: ['warning 1', 'warning 2', 'warning 3'] + }; + + sanitizeAll(req, sanitizers, function(){ + t.deepEquals(req, expected_req); + t.end(); + }); + + }); + + test('req.clean/errors/warnings should not be initialized when they already have been', function(t) { + var req = { + clean: { + alreadyInitialized: true + }, + errors: ['pre-existing error'], + warnings: ['pre-existing warning'] + }; + + var sanitizers = [ + function() { + req.clean.a = 'first sanitizer'; + return { + errors: ['error 1', 'error 2'], + warnings: ['warning 1', 'warning 2'] + }; + }, + function() { + req.clean.b = 'second sanitizer'; + return { + errors: ['error 3'], + warnings: ['warning 3'] + }; + } + ]; + + var expected_req = { + clean: { + alreadyInitialized: true, + a: 'first sanitizer', + b: 'second sanitizer' + }, + errors: ['pre-existing error', 'error 1', 'error 2', 'error 3'], + warnings: ['pre-existing warning', 'warning 1', 'warning 2', 'warning 3'] + }; + + sanitizeAll(req, sanitizers, function(){ + t.deepEquals(req, expected_req); + t.end(); + }); + + }); + + test('req.query should be passed to individual sanitizers when available', function(t) { + var req = { + query: { + value: 'query value' + } + }; + var sanitizers = [ + function(params) { + req.clean.query = params; + return { + errors: [], + warnings: [] + }; + } + ]; + + var expected_req = { + query: { + value: 'query value' + }, + clean: { + query: { + value: 'query value' + } + }, + errors: [], + warnings: [] + }; + + sanitizeAll(req, sanitizers, function(){ + t.deepEquals(req, expected_req); + t.end(); + }); + + }); + + test('an empty object should be passed to individual sanitizers when req.query is unavailable', function(t) { + var req = {}; + var sanitizers = [ + function(params) { + if (Object.keys(params).length === 0) { + req.clean.empty_object_was_passed = true; + } + + return { + errors: [], + warnings: [] + }; + } + ]; + + var expected_req = { + clean: { + empty_object_was_passed: true + }, + errors: [], + warnings: [] + }; + + sanitizeAll(req, sanitizers, function(){ + t.deepEquals(req, expected_req); + t.end(); + }); + + }); + +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('SANITIZE sanitizeAll ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/sanitiser/search.js b/test/unit/sanitiser/search.js index 35dbcda5..3c5c684d 100644 --- a/test/unit/sanitiser/search.js +++ b/test/unit/sanitiser/search.js @@ -1,299 +1,106 @@ -var extend = require('extend'), - search = require('../../../sanitiser/search'), - text_analyzer = require('pelias-text-analyzer'), - sanitize = search.sanitize, - middleware = search.middleware, - defaultError = 'invalid param \'text\': text length, must be >0'; -// these are the default values you would expect when no input params are specified. -var emptyClean = { private: false, size: 10 }; +var proxyquire = require('proxyquire').noCallThru(); module.exports.tests = {}; -module.exports.tests.interface = function(test, common) { - test('sanitize interface', function(t) { - t.equal(typeof sanitize, 'function', 'sanitize is a function'); - t.equal(sanitize.length, 2, 'sanitize interface'); - t.end(); - }); - test('middleware interface', function(t) { - t.equal(typeof middleware, 'function', 'middleware is a function'); - t.equal(middleware.length, 3, 'sanitize has a valid middleware'); - t.end(); - }); -}; - -module.exports.tests.sanitisers = function(test, common) { - test('check sanitiser list', function (t) { - var expected = ['quattroshapes_deprecation', 'singleScalarParameters', 'text', 'size', - 'layers', 'sources', 'sources_and_layers', 'private', 'geo_search', 'boundary_country', 'categories' ]; - t.deepEqual(Object.keys(search.sanitiser_list), expected); - t.end(); - }); -}; - -module.exports.tests.sanitize_invalid_text = function(test, common) { - test('invalid text', function(t) { - var invalid = [ '', 100, null, undefined ]; - invalid.forEach( function( text ){ - var req = { query: { text: text } }; - sanitize(req, function(){ - t.equal(req.errors[0], 'invalid param \'text\': text length, must be >0', text + ' is an invalid text'); - t.deepEqual(req.clean, emptyClean, 'clean only has default values set'); - }); - }); - t.end(); - }); -}; - -module.exports.tests.sanitise_valid_text = function(test, common) { - test('valid short text', function(t) { - var req = { query: { text: 'a' } }; - sanitize(req, function(){ - t.equal(req.errors[0], undefined, 'no error'); - }); - t.end(); - }); - - test('valid not-quite-as-short text', function(t) { - var req = { query: { text: 'aa' } }; - sanitize(req, function(){ - t.equal(req.errors[0], undefined, 'no error'); - }); - t.end(); - }); - - test('valid longer text', function(t) { - var req = { query: { text: 'aaaaaaaa' } }; - sanitize(req, function(){ - t.equal(req.errors[0], undefined, 'no error'); - }); - t.end(); - }); -}; - -module.exports.tests.sanitize_text_with_delim = function(test, common) { - var texts = [ 'a,bcd', '123 main st, region', ',,,', ' ' ]; - - test('valid texts with a comma', function(t) { - texts.forEach( function( text ){ - var req = { query: { text: text } }; - sanitize( req, function( ){ - var expected_text = text; - - var expected_parsed_text = text_analyzer.parse(text); - t.equal(req.errors[0], undefined, 'no error'); - t.equal(req.clean.parsed_text.name, expected_parsed_text.name, 'clean name set correctly'); - t.equal(req.clean.text, expected_text, 'text should match'); - - }); - }); - t.end(); - }); -}; - -module.exports.tests.sanitize_private_no_value = function(test, common) { - test('default private should be set to true', function(t) { - var req = { query: { text: 'test' } }; - sanitize(req, function(){ - t.equal(req.clean.private, false, 'private set to false'); - }); - t.end(); - }); -}; - -module.exports.tests.sanitize_private_explicit_true_value = function(test, common) { - test('explicit private should be set to true', function(t) { - var req = { query: { text: 'test', private: true } }; - sanitize(req, function(){ - t.equal(req.clean.private, true, 'private set to true'); - }); - t.end(); - }); -}; - -module.exports.tests.sanitize_private_explicit_false_value = function(test, common) { - test('explicit private should be set to false', function(t) { - var req = { query: { text: 'test', private: false } }; - sanitize(req, function(){ - t.equal(req.clean.private, false, 'private set to false'); - }); - t.end(); - }); -}; - -module.exports.tests.sanitize_lat = function(test, common) { - var valid_lats = [ 0, 45, 90, -0, '0', '45', '90', -181, -120, -91, 91, 120, 181 ]; - test('valid lat', function(t) { - valid_lats.forEach( function( lat ){ - var req = { query: { text: 'test', 'focus.point.lat': lat, 'focus.point.lon': 0 } }; - sanitize(req, function(){ - var expected_lat = parseFloat( lat ); - t.equal(req.errors[0], undefined, 'no error'); - }); - }); - t.end(); - }); -}; - -module.exports.tests.sanitize_lon = function(test, common) { - var lons = { - valid: [ -381, -181, -180, -1, -0, 0, 45, 90, '-180', '0', '180', 181 ] - }; - test('valid lon', function(t) { - lons.valid.forEach( function( lon ){ - var req = { query: { text: 'test', 'focus.point.lat': 0, 'focus.point.lon': lon } }; - sanitize( req, function(){ - var expected_lon = parseFloat( lon ); - t.equal(req.errors[0], undefined, 'no error'); - }); - }); - t.end(); - }); -}; - -module.exports.tests.sanitize_optional_geo = function(test, common) { - test('no lat/lon', function(t) { - var req = { query: { text: 'test' } }; - sanitize(req, function(){ - t.equal(req.errors[0], undefined, 'no error'); - t.equal(req.clean['focus.point.lat'], undefined, 'clean set without lat'); - t.equal(req.clean['focus.point.lon'], undefined, 'clean set without lon'); - }); - t.end(); - }); - test('no lat', function(t) { - var req = { query: { text: 'test', 'focus.point.lon': 0 } }; - sanitize(req, function(){ - var expected_lon = 0; - t.equal(req.errors[0], 'parameters focus.point.lat and focus.point.lon must both be specified'); - t.equal(req.clean['focus.point.lat'], undefined); - t.equal(req.clean['focus.point.lon'], undefined); - }); - t.end(); - }); - test('no lon', function(t) { - var req = { query: { text: 'test', 'focus.point.lat': 0 } }; - sanitize(req, function(){ - var expected_lat = 0; - t.equal(req.errors[0], 'parameters focus.point.lat and focus.point.lon must both be specified'); - t.equal(req.clean['focus.point.lat'], undefined); - t.equal(req.clean['focus.point.lon'], undefined); - }); - t.end(); - }); -}; - -module.exports.tests.sanitize_bounding_rect = function(test, common) { - test('valid bounding rect', function(t) { - var req = { - query: { - text: 'test', - 'boundary.rect.min_lat': -40.659, - 'boundary.rect.max_lat': -41.614, - 'boundary.rect.min_lon': 174.612, - 'boundary.rect.max_lon': 176.333 - } - }; - - sanitize(req, function(){ - t.equal(req.errors[0], undefined, 'no error'); - t.equal(req.clean['boundary.rect.min_lon'], parseFloat(req.query['boundary.rect.min_lon'])); - t.equal(req.clean['boundary.rect.max_lat'], parseFloat(req.query['boundary.rect.max_lat'])); - t.equal(req.clean['boundary.rect.max_lon'], parseFloat(req.query['boundary.rect.max_lon'])); - t.equal(req.clean['boundary.rect.min_lat'], parseFloat(req.query['boundary.rect.min_lat'])); +module.exports.tests.sanitize = function(test, common) { + test('verify that all sanitisers were called as expected', function(t) { + var called_sanitisers = []; + + // rather than re-verify the functionality of all the sanitisers, this test just verifies that they + // were all called correctly + var search = proxyquire('../../../sanitiser/search', { + '../sanitiser/_deprecate_quattroshapes': function() { + called_sanitisers.push('_deprecate_quattroshapes'); + return { errors: [], warnings: [] }; + }, + '../sanitiser/_single_scalar_parameters': function() { + called_sanitisers.push('_single_scalar_parameters'); + return { errors: [], warnings: [] }; + }, + '../sanitiser/_text': function() { + called_sanitisers.push('_text'); + return { errors: [], warnings: [] }; + }, + '../sanitiser/_size': function() { + if (arguments.length === 0) { + return function() { + called_sanitisers.push('_size'); + return { errors: [], warnings: [] }; + }; + + } else { + throw new Error('should not have passed any parameters to _size'); + } + + }, + '../sanitiser/_targets': function(type) { + if (['layers', 'sources'].indexOf(type) !== -1) { + return function() { + called_sanitisers.push('_targets/' + type); + return { errors: [], warnings: [] }; + }; + + } + else { + throw new Error('incorrect parameters passed to _targets'); + } + + }, + '../sanitiser/_sources_and_layers': function() { + called_sanitisers.push('_sources_and_layers'); + return { errors: [], warnings: [] }; + }, + '../sanitiser/_flag_bool': function() { + if (arguments[0] === 'private' && arguments[1] === false) { + return function() { + called_sanitisers.push('_flag_bool'); + return { errors: [], warnings: [] }; + }; + + } + else { + throw new Error('incorrect parameters passed to _flag_bool'); + } + + }, + '../sanitiser/_geo_search': function() { + called_sanitisers.push('_geo_search'); + return { errors: [], warnings: [] }; + }, + '../sanitiser/_boundary_country': function() { + called_sanitisers.push('_boundary_country'); + return { errors: [], warnings: [] }; + }, + '../sanitiser/_categories': function() { + called_sanitisers.push('_categories'); + return { errors: [], warnings: [] }; + }, + }); + + var expected_sanitisers = [ + '_deprecate_quattroshapes', + '_single_scalar_parameters', + '_text', + '_size', + '_targets/layers', + '_targets/sources', + '_sources_and_layers', + '_flag_bool', + '_geo_search', + '_boundary_country', + '_categories' + ]; + + var req = {}; + var res = {}; + + search.middleware(req, res, function(){ + t.deepEquals(called_sanitisers, expected_sanitisers); t.end(); }); }); }; -module.exports.tests.sanitize_size = function(test, common) { - test('invalid size value', function(t) { - var req = { query: { size: 'a', text: 'test', lat: 0, lon: 0 } }; - sanitize(req, function(){ - t.equal(req.clean.size, 10, 'default size set'); - t.end(); - }); - }); - test('below min size value', function(t) { - var req = { query: { size: -100, text: 'test', lat: 0, lon: 0 } }; - sanitize(req, function(){ - t.equal(req.clean.size, 1, 'min size set'); - t.end(); - }); - }); - test('above max size value', function(t) { - var req = { query: { size: 9999, text: 'test', lat: 0, lon: 0 } }; - sanitize(req, function(){ - t.equal(req.clean.size, 40, 'max size set'); - t.end(); - }); - }); -}; - -module.exports.tests.sanitize_private = function(test, common) { - var invalid_values = [null, -1, 123, NaN, 'abc']; - invalid_values.forEach(function(value) { - test('invalid private param ' + value, function(t) { - var req = { query: { text: 'test', lat: 0, lon: 0, 'private': value } }; - sanitize(req, function(){ - t.equal(req.clean.private, false, 'default private set (to false)'); - t.end(); - }); - }); - }); - - var valid_values = ['true', true, 1, '1']; - valid_values.forEach(function(value) { - test('valid private ' + value, function(t) { - var req = { query: { text: 'test', 'private': value } }; - sanitize(req, function(){ - t.equal(req.clean.private, true, 'private set to true'); - t.end(); - }); - }); - }); - - var valid_false_values = ['false', false, 0, '0']; - valid_false_values.forEach(function(value) { - test('test setting false explicitly ' + value, function(t) { - var req = { query: { text: 'test', 'private': value } }; - sanitize(req, function(){ - t.equal(req.clean.private, false, 'private set to false'); - t.end(); - }); - }); - }); - - test('test default behavior', function(t) { - var req = { query: { text: 'test' } }; - sanitize(req, function(){ - t.equal(req.clean.private, false, 'private set to false'); - t.end(); - }); - }); -}; - -module.exports.tests.invalid_params = function(test, common) { - test('invalid text params', function(t) { - var req = { query: {} }; - sanitize( req, function(){ - t.equal(req.errors[0], defaultError, 'handle invalid params gracefully'); - t.end(); - }); - }); -}; - -module.exports.tests.middleware_success = function(test, common) { - test('middleware success', function(t) { - var req = { query: { text: 'test' }}; - var next = function( message ){ - t.deepEqual(req.errors, [], 'no error messages set'); - t.end(); - }; - middleware( req, undefined, next ); - }); -}; - module.exports.all = function (tape, common) { function test(name, testFunction) { diff --git a/test/unit/sanitiser/search_fallback.js b/test/unit/sanitiser/search_fallback.js new file mode 100644 index 00000000..dd8fff2a --- /dev/null +++ b/test/unit/sanitiser/search_fallback.js @@ -0,0 +1,121 @@ +var proxyquire = require('proxyquire').noCallThru(); + +module.exports.tests = {}; + +module.exports.tests.sanitize = function(test, common) { + test('verify that all sanitisers were called as expected when `res` is undefined', function(t) { + var called_sanitisers = []; + + // rather than re-verify the functionality of all the sanitisers, this test just verifies that they + // were all called correctly + var search = proxyquire('../../../sanitiser/search_fallback', { + '../sanitiser/_text_addressit': function() { + called_sanitisers.push('_text_addressit'); + return { errors: [], warnings: [] }; + } + }); + + var expected_sanitisers = [ + '_text_addressit' + ]; + + var req = {}; + + search.middleware(req, undefined, function(){ + t.deepEquals(called_sanitisers, expected_sanitisers); + t.end(); + }); + + }); + + test('verify that all sanitisers were called as expected when `res` has no `data` property', function(t) { + var called_sanitisers = []; + + // rather than re-verify the functionality of all the sanitisers, this test just verifies that they + // were all called correctly + var search = proxyquire('../../../sanitiser/search_fallback', { + '../sanitiser/_text_addressit': function() { + called_sanitisers.push('_text_addressit'); + return { errors: [], warnings: [] }; + } + }); + + var expected_sanitisers = [ + '_text_addressit' + ]; + + var req = {}; + var res = {}; + + search.middleware(req, res, function(){ + t.deepEquals(called_sanitisers, expected_sanitisers); + t.end(); + }); + + }); + + test('verify that all sanitisers were called as expected when res.data is empty', function(t) { + var called_sanitisers = []; + + // rather than re-verify the functionality of all the sanitisers, this test just verifies that they + // were all called correctly + var search = proxyquire('../../../sanitiser/search_fallback', { + '../sanitiser/_text_addressit': function() { + called_sanitisers.push('_text_addressit'); + return { errors: [], warnings: [] }; + } + }); + + var expected_sanitisers = [ + '_text_addressit' + ]; + + var req = {}; + var res = { + data: [] + }; + + search.middleware(req, res, function(){ + t.deepEquals(called_sanitisers, expected_sanitisers); + t.end(); + }); + + }); + + test('non-empty res.data should not call the _text_autocomplete sanitiser', function(t) { + var called_sanitisers = []; + + // rather than re-verify the functionality of all the sanitisers, this test just verifies that they + // were all called correctly + var search = proxyquire('../../../sanitiser/search_fallback', { + '../sanitiser/_text_autocomplete': function() { + throw new Error('_text_autocomplete sanitiser should not have been called'); + } + }); + + var expected_sanitisers = []; + + var req = {}; + var res = { + data: [{}] + }; + + search.middleware(req, res, function(){ + t.deepEquals(called_sanitisers, expected_sanitisers); + t.end(); + }); + + }); + +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('SANITIZE /search_fallback ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/service/search.js b/test/unit/service/search.js index 0de8e0fa..cf72b75e 100644 --- a/test/unit/service/search.js +++ b/test/unit/service/search.js @@ -13,21 +13,23 @@ module.exports.tests.interface = function(test, common) { }); }; -// functionally test service +// functionally test service module.exports.tests.functional_success = function(test, common) { var expected = [ { _id: 'myid1', _type: 'mytype1', _score: 10, + _matched_queries: ['query 1', 'query 2'], value: 1, center_point: { lat: 100.1, lon: -50.5 }, name: { default: 'test name1' }, parent: { country: ['country1'], region: ['state1'], county: ['city1'] } - }, + }, { _id: 'myid2', _type: 'mytype2', _score: 20, + _matched_queries: ['query 3'], value: 2, center_point: { lat: 100.2, lon: -51.5 }, name: { default: 'test name2' }, @@ -88,4 +90,4 @@ module.exports.all = function (tape, common) { for( var testCase in module.exports.tests ){ module.exports.tests[testCase](test, common); } -}; \ No newline at end of file +};