From a06683ff686c78948c8fd74204e3b0b683f36bfa Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Wed, 10 Oct 2018 10:52:22 +0200 Subject: [PATCH] feat(query): Modify custom boosts feature to use function_score queries --- query/autocomplete.js | 4 +- query/autocomplete_defaults.js | 10 +- query/search.js | 4 +- query/search_defaults.js | 13 +- query/search_original.js | 8 +- query/view/boost_sources_and_layers.js | 152 +++++++++++++----- .../fixture/autocomplete_custom_boosts.json | 44 ++--- .../fixture/search_with_custom_boosts.json | 97 +++++------ test/unit/query/search_with_custom_boosts.js | 2 - .../query/view/boost_sources_and_layers.js | 102 +++++++----- 10 files changed, 264 insertions(+), 172 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index a12ec979..f16adc44 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -45,9 +45,7 @@ query.score( views.boost_exact_matches ); query.score( peliasQuery.view.focus( views.ngrams_strict ) ); query.score( peliasQuery.view.popularity( views.pop_subquery ) ); query.score( peliasQuery.view.population( views.pop_subquery ) ); - -const boostConfig = config.customBoosts || {}; -query.score( views.custom_boosts(config.customBoosts) ); +query.score( views.custom_boosts( config.customBoosts ) ); // non-scoring hard filters query.filter( peliasQuery.view.sources ); diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index dbba5861..29cd819b 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -91,6 +91,12 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'population:field': 'population', 'population:modifier': 'log1p', 'population:max_boost': 20, - 'population:weight': 3 - + 'population:weight': 3, + + // boost_sources_and_layers view + 'custom:boosting:min_score': 1, // score applied to documents which don't score anything via functions + 'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost + 'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score) + 'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost + 'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section }); diff --git a/query/search.js b/query/search.js index e45b5f6b..8232b55e 100644 --- a/query/search.js +++ b/query/search.js @@ -166,11 +166,11 @@ function isPostalCodeWithCountry(vs) { var isSet = (layer) => { return vs.isset(`input:${layer}`); }; - + var allowedFields = ['postcode', 'country']; var disallowedFields = ['query', 'category', 'housenumber', 'street', 'locality', 'neighbourhood', 'borough', 'county', 'region']; - + return allowedFields.every(isSet) && !disallowedFields.some(isSet); } diff --git a/query/search_defaults.js b/query/search_defaults.js index 92154ee8..8acf3c26 100644 --- a/query/search_defaults.js +++ b/query/search_defaults.js @@ -93,7 +93,16 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'population:max_boost': 20, 'population:weight': 2, + // used by fallback queries + // @todo: it is also possible to specify layer boosting + // via pelias/config, consider deprecating this config. 'boost:address': 10, - 'boost:street': 5 - + 'boost:street': 5, + + // boost_sources_and_layers view + 'custom:boosting:min_score': 1, // score applied to documents which don't score anything via functions + 'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost + 'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score) + 'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost + 'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section }); diff --git a/query/search_original.js b/query/search_original.js index a627a8d9..2bcaeb70 100644 --- a/query/search_original.js +++ b/query/search_original.js @@ -6,10 +6,8 @@ const logger = require('pelias-logger').get('api'); const config = require('pelias-config').generate().api; var placeTypes = require('../helper/placeTypes'); +var views = { custom_boosts: require('./view/boost_sources_and_layers') }; -var views = { - custom_boosts: require('./view/boost_sources_and_layers'), -}; // region_a is also an admin field. addressit tries to detect // region_a, in which case we use a match query specifically for it. // but address it doesn't know about all of them so it helps to search @@ -42,9 +40,7 @@ query.score( peliasQuery.view.address('postcode') ); query.score( peliasQuery.view.admin('country_a') ); query.score( peliasQuery.view.admin('region_a') ); query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') ); - -const boostConfig = config.customBoosts || {}; -query.score( views.custom_boosts(config.customBoosts) ); +query.score( views.custom_boosts( config.customBoosts ) ); // non-scoring hard filters query.filter( peliasQuery.view.boundary_circle ); diff --git a/query/view/boost_sources_and_layers.js b/query/view/boost_sources_and_layers.js index 504724da..50801f06 100644 --- a/query/view/boost_sources_and_layers.js +++ b/query/view/boost_sources_and_layers.js @@ -1,51 +1,121 @@ -//example input -//{ -// "source": { -// "openstreetmap": 5 -// }, -// "layer": { -// "street": 3, -// "country": 5 -// } -//} - -function generateTermQuery(field, value, boost) { - return { - constant_score: { - boost: boost, - query: { - term: { - [field]: value, - } - } +/** + This view allows users to specify a custom boost for sources and layers. + + The view is implemented using a 'function_score' query, which enumerates multiple 'functions', each + function will assign a 'score' to each document when matched. + + A document can match more than one function, in this case the 'score_mode' is used to decide how these + scores are combined, the default is 'sum'. + + Likewise, a document can also match zero functions, in this case it is assigned a score of 'min_score'. + + The computed score is then multiplied by the 'boost' value in order to come up with the final boost value + which will be assigned to that document. The 'boost' value is essentially a hard-coded multiplier for the score. + + The 'max_boost' property is simply a ceiling for this computed boost, if the computed boosted is higher than + max_boost it will be assigned the value of max_boost instead. + + Note: This is a simple use of the 'function_score' query, as such we don't use the 'boost_mode' property + (because there is no query section) and the 'weight' values we assign are simply returned verbatim + (because we use filter queries for the function scoring). + + ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html + + example config section: + { + "source": { + "openstreetmap": 5 + }, + "layer": { + "street": 3, + "country": 5 } - }; -} + } + + example query: + { + "function_score": { + "query": { + "match_all": {} + }, + "functions": [{ + "filter": { + "match": { + "layer": "intersections" + } + }, + "weight": 1.6 + },{ + "filter": { + "match": { + "layer": "stops" + } + }, + "weight": 2.4 + }], + "boost": 5, + "max_boost": 40, + "score_mode": "sum", + "boost_mode": "multiply", + "min_score": 1 + } + } +**/ + +// supported top-level config items +const TARGETS = ['source', 'layer']; + +module.exports = function( config ) { -module.exports = function( configuration ) { - return function( ) { - if (!configuration) { + // no valid config to use, fail now, don't render this view. + if( !config ) { return function(){ return null; }; } + + return function( vs ) { + + // validate required params + if( !vs.isset('custom:boosting:min_score') || + !vs.isset('custom:boosting:boost') || + !vs.isset('custom:boosting:max_boost') || + !vs.isset('custom:boosting:score_mode') || + !vs.isset('custom:boosting:boost_mode') ){ return null; } - const filters = []; - ['source', 'layer'].forEach(function(target) { - if (configuration[target]) { - Object.keys(configuration[target]).forEach(function(item) { - filters.push(generateTermQuery(target, item, configuration[target][item])); + + // base 'function_score' view + var view = { + 'function_score': { + 'query': { 'match_all': {} }, // apply to all documents + 'functions': [], // a list of functions which contribute to a 'score' for each document + 'min_score': vs.var('custom:boosting:min_score'), + 'boost': vs.var('custom:boosting:boost'), + 'max_boost': vs.var('custom:boosting:max_boost'), + 'score_mode': vs.var('custom:boosting:score_mode'), + 'boost_mode': vs.var('custom:boosting:boost_mode') + }, + }; + + // iterate over supported targets and their values + TARGETS.forEach( function( target ) { + if( 'object' === typeof config[target] ) { + Object.keys(config[target]).forEach(function(value) { + + // add a scoring function for this target, assigning a weight + let weight = config[target][value]; + view.function_score.functions.push({ + 'weight': isNaN(weight) ? 1 : weight, + 'filter': { + 'match': { + [target]: value + } + } + }); }); } }); - if (filters.length === 0) { - return null; - } else if (filters.length === 1) { - return filters[0]; - } else { - return { - bool: { - should: filters - } - }; - } + // no functions were generated, fail now, don't render this view. + if( view.function_score.functions.length === 0 ) { return null; } + + return view; }; }; diff --git a/test/unit/fixture/autocomplete_custom_boosts.json b/test/unit/fixture/autocomplete_custom_boosts.json index 51bb3839..cf0ea7b0 100644 --- a/test/unit/fixture/autocomplete_custom_boosts.json +++ b/test/unit/fixture/autocomplete_custom_boosts.json @@ -67,31 +67,31 @@ "score_mode": "first", "boost_mode": "replace" } - }, - { - "bool": { - "should": [ - { - "constant_score": { - "boost": 5, - "query": { - "term": { - "source": "openstreetmap" - } - } + },{ + "function_score": { + "query": { + "match_all": {} + }, + "min_score": 1, + "boost": 5, + "max_boost": 50, + "score_mode": "sum", + "boost_mode": "multiply", + "functions": [{ + "filter": { + "match": { + "source": "openstreetmap" } }, - { - "constant_score": { - "boost": 3, - "query": { - "term": { - "layer": "transit" - } - } + "weight": 5 + },{ + "filter": { + "match": { + "layer": "transit" } - } - ] + }, + "weight": 3 + }] } } ] diff --git a/test/unit/fixture/search_with_custom_boosts.json b/test/unit/fixture/search_with_custom_boosts.json index 06e2ca3b..c743f9a9 100644 --- a/test/unit/fixture/search_with_custom_boosts.json +++ b/test/unit/fixture/search_with_custom_boosts.json @@ -23,30 +23,30 @@ } } },{ - "function_score": { - "query": { - "match": { - "phrase.default": { - "query": "test", - "analyzer": "peliasPhrase", - "type": "phrase", - "slop": 2, - "boost": 1 + "function_score": { + "query": { + "match": { + "phrase.default": { + "query": "test", + "analyzer": "peliasPhrase", + "type": "phrase", + "slop": 2, + "boost": 1 + } } - } - }, - "max_boost": 20, - "score_mode": "first", - "boost_mode": "replace", - "functions": [{ - "field_value_factor": { - "modifier": "log1p", - "field": "popularity", - "missing": 1 }, - "weight": 1 - }] - } + "max_boost": 20, + "score_mode": "first", + "boost_mode": "replace", + "functions": [{ + "field_value_factor": { + "modifier": "log1p", + "field": "popularity", + "missing": 1 + }, + "weight": 1 + }] + } },{ "function_score": { "query": { @@ -72,32 +72,33 @@ "weight": 2 }] } - }, { - "bool": { - "should": [ - { - "constant_score": { - "boost": 5, - "query": { - "term": { - "source": "openstreetmap" - } - } - } - }, - { - "constant_score": { - "boost": 3, - "query": { - "term": { - "layer": "transit" - } - } - } - } - ] - } - }] + },{ + "function_score": { + "query": { + "match_all": {} + }, + "min_score": 1, + "boost": 5, + "max_boost": 50, + "score_mode": "sum", + "boost_mode": "multiply", + "functions": [{ + "filter": { + "match": { + "source": "openstreetmap" + } + }, + "weight": 5 + },{ + "filter": { + "match": { + "layer": "transit" + } + }, + "weight": 3 + }] + } + }] } }, "sort": [ "_score" ], diff --git a/test/unit/query/search_with_custom_boosts.js b/test/unit/query/search_with_custom_boosts.js index af751444..c40e8a2f 100644 --- a/test/unit/query/search_with_custom_boosts.js +++ b/test/unit/query/search_with_custom_boosts.js @@ -36,8 +36,6 @@ module.exports.tests.query = function(test, common) { }); const actual_query = JSON.parse( JSON.stringify( search_query_module(clean) ) ); - console.log(JSON.stringify(actual_query.body.query.bool, null, 2)); - t.deepEqual(actual_query, expected_query, 'query as expected'); t.pass(); t.end(); diff --git a/test/unit/query/view/boost_sources_and_layers.js b/test/unit/query/view/boost_sources_and_layers.js index f8c7babb..b4318310 100644 --- a/test/unit/query/view/boost_sources_and_layers.js +++ b/test/unit/query/view/boost_sources_and_layers.js @@ -1,50 +1,62 @@ +const query = require('pelias-query'); +const vs = new query.Vars(require('../../../../query/search_defaults')); const boost_sources_and_layers = require('../../../../query/view/boost_sources_and_layers'); module.exports.tests = {}; module.exports.tests.empty_config = function(test, common) { test('empty configuration returns empty query', function(t) { - const view_instance = boost_sources_and_layers({}); - const query = view_instance(); - t.equal(query, null, 'query is empty'); + const view = boost_sources_and_layers({}); + const rendered = view(vs); + t.equal(rendered, null, 'query is empty'); t.end(); }); test('undefined configuration returns empty query', function(t) { - const view_instance = boost_sources_and_layers(undefined); - const query = view_instance(); - t.equal(query, null, 'query is empty'); + const view = boost_sources_and_layers(undefined); + const rendered = view(vs); + t.equal(rendered, null, 'query is empty'); t.end(); }); }; module.exports.tests.single_item_config = function(test, common) { - test('config with single layer entry returns single term query with boost', function(t) { + test('config with single layer entry produces a single scoring function with weight', function(t) { const config = { layer: { locality: 5 } }; const expected_query = { - constant_score: { - boost: 5, - query: { - term: { - layer: 'locality' - } - } + 'function_score': { + 'query': { + 'match_all': {} + }, + 'functions': [{ + 'filter': { + 'match': { + 'layer': 'locality' + } + }, + 'weight': 5 + }], + 'boost': vs.var('custom:boosting:boost'), + 'max_boost': vs.var('custom:boosting:max_boost'), + 'score_mode': vs.var('custom:boosting:score_mode'), + 'boost_mode': vs.var('custom:boosting:boost_mode'), + 'min_score': vs.var('custom:boosting:min_score') } }; - const view_instance = boost_sources_and_layers(config); + const view = boost_sources_and_layers(config); - t.deepEquals(view_instance(), expected_query, 'query is a single term query'); + t.deepEquals(view(vs), expected_query, 'query contains a single scoring function'); t.end(); }); }; module.exports.tests.mulitple_item_config = function(test, common) { - test('config with multiple items returns bool query with multiple should conditions', function(t) { + test('config with multiple items produces multiple scoring functions', function(t) { const config = { source: { whosonfirst: 6 @@ -55,40 +67,42 @@ module.exports.tests.mulitple_item_config = function(test, common) { }, }; const expected_query = { - bool: { - should: [{ - constant_score: { - boost: 6, - query: { - term: { - source: 'whosonfirst', - } + 'function_score': { + 'query': { + 'match_all': {} + }, + 'functions': [{ + 'filter': { + 'match': { + 'source': 'whosonfirst' } - } - }, { - constant_score: { - boost: 2, - query: { - term: { - layer: 'country' - } + }, + 'weight': 6 + },{ + 'filter': { + 'match': { + 'layer': 'country' } - } + }, + 'weight': 2 },{ - constant_score: { - boost: 0.5, - query: { - term: { - layer: 'borough' - } + 'filter': { + 'match': { + 'layer': 'borough' } - } - }] + }, + 'weight': 0.5 + }], + 'boost': vs.var('custom:boosting:boost'), + 'max_boost': vs.var('custom:boosting:max_boost'), + 'score_mode': vs.var('custom:boosting:score_mode'), + 'boost_mode': vs.var('custom:boosting:boost_mode'), + 'min_score': vs.var('custom:boosting:min_score') } }; - const view_instance = boost_sources_and_layers(config); + const view = boost_sources_and_layers(config); - t.deepEquals(view_instance(), expected_query, 'query is a bool query with multiple term queres'); + t.deepEquals(view(vs), expected_query, 'query contains multiple scoring functions'); t.end(); });