Browse Source

feat(query): Modify custom boosts feature to use function_score queries

configurable-boosts
Peter Johnson 6 years ago committed by Julian Simioni
parent
commit
a06683ff68
No known key found for this signature in database
GPG Key ID: B9EEB0C6EE0910A1
  1. 2
      query/autocomplete.js
  2. 10
      query/autocomplete_defaults.js
  3. 13
      query/search_defaults.js
  4. 6
      query/search_original.js
  5. 146
      query/view/boost_sources_and_layers.js
  6. 36
      test/unit/fixture/autocomplete_custom_boosts.json
  7. 35
      test/unit/fixture/search_with_custom_boosts.json
  8. 2
      test/unit/query/search_with_custom_boosts.js
  9. 98
      test/unit/query/view/boost_sources_and_layers.js

2
query/autocomplete.js

@ -45,8 +45,6 @@ query.score( views.boost_exact_matches );
query.score( peliasQuery.view.focus( views.ngrams_strict ) ); query.score( peliasQuery.view.focus( views.ngrams_strict ) );
query.score( peliasQuery.view.popularity( views.pop_subquery ) ); query.score( peliasQuery.view.popularity( views.pop_subquery ) );
query.score( peliasQuery.view.population( views.pop_subquery ) ); query.score( peliasQuery.view.population( views.pop_subquery ) );
const boostConfig = config.customBoosts || {};
query.score( views.custom_boosts( config.customBoosts ) ); query.score( views.custom_boosts( config.customBoosts ) );
// non-scoring hard filters // non-scoring hard filters

10
query/autocomplete_defaults.js

@ -91,6 +91,12 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'population:field': 'population', 'population:field': 'population',
'population:modifier': 'log1p', 'population:modifier': 'log1p',
'population:max_boost': 20, 'population:max_boost': 20,
'population:weight': 3 'population:weight': 3,
// boost_sources_and_layers view
'custom:boosting:min_score': 1, // score applied to documents which don't score anything via functions
'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost
'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score)
'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost
'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section
}); });

13
query/search_defaults.js

@ -93,7 +93,16 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'population:max_boost': 20, 'population:max_boost': 20,
'population:weight': 2, 'population:weight': 2,
// used by fallback queries
// @todo: it is also possible to specify layer boosting
// via pelias/config, consider deprecating this config.
'boost:address': 10, 'boost:address': 10,
'boost:street': 5 'boost:street': 5,
// boost_sources_and_layers view
'custom:boosting:min_score': 1, // score applied to documents which don't score anything via functions
'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost
'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score)
'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost
'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section
}); });

6
query/search_original.js

@ -6,10 +6,8 @@ const logger = require('pelias-logger').get('api');
const config = require('pelias-config').generate().api; const config = require('pelias-config').generate().api;
var placeTypes = require('../helper/placeTypes'); var placeTypes = require('../helper/placeTypes');
var views = { custom_boosts: require('./view/boost_sources_and_layers') };
var views = {
custom_boosts: require('./view/boost_sources_and_layers'),
};
// region_a is also an admin field. addressit tries to detect // region_a is also an admin field. addressit tries to detect
// region_a, in which case we use a match query specifically for it. // region_a, in which case we use a match query specifically for it.
// but address it doesn't know about all of them so it helps to search // but address it doesn't know about all of them so it helps to search
@ -42,8 +40,6 @@ query.score( peliasQuery.view.address('postcode') );
query.score( peliasQuery.view.admin('country_a') ); query.score( peliasQuery.view.admin('country_a') );
query.score( peliasQuery.view.admin('region_a') ); query.score( peliasQuery.view.admin('region_a') );
query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') ); query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') );
const boostConfig = config.customBoosts || {};
query.score( views.custom_boosts( config.customBoosts ) ); query.score( views.custom_boosts( config.customBoosts ) );
// non-scoring hard filters // non-scoring hard filters

146
query/view/boost_sources_and_layers.js

@ -1,51 +1,121 @@
//example input /**
//{ This view allows users to specify a custom boost for sources and layers.
// "source": {
// "openstreetmap": 5 The view is implemented using a 'function_score' query, which enumerates multiple 'functions', each
// }, function will assign a 'score' to each document when matched.
// "layer": {
// "street": 3, A document can match more than one function, in this case the 'score_mode' is used to decide how these
// "country": 5 scores are combined, the default is 'sum'.
// }
//} Likewise, a document can also match zero functions, in this case it is assigned a score of 'min_score'.
function generateTermQuery(field, value, boost) { The computed score is then multiplied by the 'boost' value in order to come up with the final boost value
return { which will be assigned to that document. The 'boost' value is essentially a hard-coded multiplier for the score.
constant_score: {
boost: boost, The 'max_boost' property is simply a ceiling for this computed boost, if the computed boosted is higher than
query: { max_boost it will be assigned the value of max_boost instead.
term: {
[field]: value, Note: This is a simple use of the 'function_score' query, as such we don't use the 'boost_mode' property
(because there is no query section) and the 'weight' values we assign are simply returned verbatim
(because we use filter queries for the function scoring).
ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
example config section:
{
"source": {
"openstreetmap": 5
},
"layer": {
"street": 3,
"country": 5
} }
} }
example query:
{
"function_score": {
"query": {
"match_all": {}
},
"functions": [{
"filter": {
"match": {
"layer": "intersections"
} }
}; },
"weight": 1.6
},{
"filter": {
"match": {
"layer": "stops"
} }
},
module.exports = function( configuration ) { "weight": 2.4
return function( ) { }],
if (!configuration) { "boost": 5,
return null; "max_boost": 40,
"score_mode": "sum",
"boost_mode": "multiply",
"min_score": 1
} }
const filters = [];
['source', 'layer'].forEach(function(target) {
if (configuration[target]) {
Object.keys(configuration[target]).forEach(function(item) {
filters.push(generateTermQuery(target, item, configuration[target][item]));
});
} }
}); **/
if (filters.length === 0) { // supported top-level config items
const TARGETS = ['source', 'layer'];
module.exports = function( config ) {
// no valid config to use, fail now, don't render this view.
if( !config ) { return function(){ return null; }; }
return function( vs ) {
// validate required params
if( !vs.isset('custom:boosting:min_score') ||
!vs.isset('custom:boosting:boost') ||
!vs.isset('custom:boosting:max_boost') ||
!vs.isset('custom:boosting:score_mode') ||
!vs.isset('custom:boosting:boost_mode') ){
return null; return null;
} else if (filters.length === 1) {
return filters[0];
} else {
return {
bool: {
should: filters
} }
// base 'function_score' view
var view = {
'function_score': {
'query': { 'match_all': {} }, // apply to all documents
'functions': [], // a list of functions which contribute to a 'score' for each document
'min_score': vs.var('custom:boosting:min_score'),
'boost': vs.var('custom:boosting:boost'),
'max_boost': vs.var('custom:boosting:max_boost'),
'score_mode': vs.var('custom:boosting:score_mode'),
'boost_mode': vs.var('custom:boosting:boost_mode')
},
}; };
// iterate over supported targets and their values
TARGETS.forEach( function( target ) {
if( 'object' === typeof config[target] ) {
Object.keys(config[target]).forEach(function(value) {
// add a scoring function for this target, assigning a weight
let weight = config[target][value];
view.function_score.functions.push({
'weight': isNaN(weight) ? 1 : weight,
'filter': {
'match': {
[target]: value
} }
}
});
});
}
});
// no functions were generated, fail now, don't render this view.
if( view.function_score.functions.length === 0 ) { return null; }
return view;
}; };
}; };

36
test/unit/fixture/autocomplete_custom_boosts.json

@ -67,31 +67,31 @@
"score_mode": "first", "score_mode": "first",
"boost_mode": "replace" "boost_mode": "replace"
} }
},{
"function_score": {
"query": {
"match_all": {}
}, },
{ "min_score": 1,
"bool": {
"should": [
{
"constant_score": {
"boost": 5, "boost": 5,
"query": { "max_boost": 50,
"term": { "score_mode": "sum",
"boost_mode": "multiply",
"functions": [{
"filter": {
"match": {
"source": "openstreetmap" "source": "openstreetmap"
} }
}
}
}, },
{ "weight": 5
"constant_score": { },{
"boost": 3, "filter": {
"query": { "match": {
"term": {
"layer": "transit" "layer": "transit"
} }
} },
} "weight": 3
} }]
]
} }
} }
] ]

35
test/unit/fixture/search_with_custom_boosts.json

@ -73,29 +73,30 @@
}] }]
} }
},{ },{
"bool": { "function_score": {
"should": [
{
"constant_score": {
"boost": 5,
"query": { "query": {
"term": { "match_all": {}
},
"min_score": 1,
"boost": 5,
"max_boost": 50,
"score_mode": "sum",
"boost_mode": "multiply",
"functions": [{
"filter": {
"match": {
"source": "openstreetmap" "source": "openstreetmap"
} }
}
}
}, },
{ "weight": 5
"constant_score": { },{
"boost": 3, "filter": {
"query": { "match": {
"term": {
"layer": "transit" "layer": "transit"
} }
} },
} "weight": 3
} }]
]
} }
}] }]
} }

2
test/unit/query/search_with_custom_boosts.js

@ -36,8 +36,6 @@ module.exports.tests.query = function(test, common) {
}); });
const actual_query = JSON.parse( JSON.stringify( search_query_module(clean) ) ); const actual_query = JSON.parse( JSON.stringify( search_query_module(clean) ) );
console.log(JSON.stringify(actual_query.body.query.bool, null, 2));
t.deepEqual(actual_query, expected_query, 'query as expected'); t.deepEqual(actual_query, expected_query, 'query as expected');
t.pass(); t.pass();
t.end(); t.end();

98
test/unit/query/view/boost_sources_and_layers.js

@ -1,50 +1,62 @@
const query = require('pelias-query');
const vs = new query.Vars(require('../../../../query/search_defaults'));
const boost_sources_and_layers = require('../../../../query/view/boost_sources_and_layers'); const boost_sources_and_layers = require('../../../../query/view/boost_sources_and_layers');
module.exports.tests = {}; module.exports.tests = {};
module.exports.tests.empty_config = function(test, common) { module.exports.tests.empty_config = function(test, common) {
test('empty configuration returns empty query', function(t) { test('empty configuration returns empty query', function(t) {
const view_instance = boost_sources_and_layers({}); const view = boost_sources_and_layers({});
const query = view_instance(); const rendered = view(vs);
t.equal(query, null, 'query is empty'); t.equal(rendered, null, 'query is empty');
t.end(); t.end();
}); });
test('undefined configuration returns empty query', function(t) { test('undefined configuration returns empty query', function(t) {
const view_instance = boost_sources_and_layers(undefined); const view = boost_sources_and_layers(undefined);
const query = view_instance(); const rendered = view(vs);
t.equal(query, null, 'query is empty'); t.equal(rendered, null, 'query is empty');
t.end(); t.end();
}); });
}; };
module.exports.tests.single_item_config = function(test, common) { module.exports.tests.single_item_config = function(test, common) {
test('config with single layer entry returns single term query with boost', function(t) { test('config with single layer entry produces a single scoring function with weight', function(t) {
const config = { const config = {
layer: { layer: {
locality: 5 locality: 5
} }
}; };
const expected_query = { const expected_query = {
constant_score: { 'function_score': {
boost: 5, 'query': {
query: { 'match_all': {}
term: { },
layer: 'locality' 'functions': [{
} 'filter': {
'match': {
'layer': 'locality'
} }
},
'weight': 5
}],
'boost': vs.var('custom:boosting:boost'),
'max_boost': vs.var('custom:boosting:max_boost'),
'score_mode': vs.var('custom:boosting:score_mode'),
'boost_mode': vs.var('custom:boosting:boost_mode'),
'min_score': vs.var('custom:boosting:min_score')
} }
}; };
const view_instance = boost_sources_and_layers(config); const view = boost_sources_and_layers(config);
t.deepEquals(view_instance(), expected_query, 'query is a single term query'); t.deepEquals(view(vs), expected_query, 'query contains a single scoring function');
t.end(); t.end();
}); });
}; };
module.exports.tests.mulitple_item_config = function(test, common) { module.exports.tests.mulitple_item_config = function(test, common) {
test('config with multiple items returns bool query with multiple should conditions', function(t) { test('config with multiple items produces multiple scoring functions', function(t) {
const config = { const config = {
source: { source: {
whosonfirst: 6 whosonfirst: 6
@ -55,40 +67,42 @@ module.exports.tests.mulitple_item_config = function(test, common) {
}, },
}; };
const expected_query = { const expected_query = {
bool: { 'function_score': {
should: [{ 'query': {
constant_score: { 'match_all': {}
boost: 6, },
query: { 'functions': [{
term: { 'filter': {
source: 'whosonfirst', 'match': {
} 'source': 'whosonfirst'
}
} }
},
'weight': 6
},{ },{
constant_score: { 'filter': {
boost: 2, 'match': {
query: { 'layer': 'country'
term: {
layer: 'country'
}
}
} }
},
'weight': 2
},{ },{
constant_score: { 'filter': {
boost: 0.5, 'match': {
query: { 'layer': 'borough'
term: {
layer: 'borough'
} }
} },
} 'weight': 0.5
}] }],
'boost': vs.var('custom:boosting:boost'),
'max_boost': vs.var('custom:boosting:max_boost'),
'score_mode': vs.var('custom:boosting:score_mode'),
'boost_mode': vs.var('custom:boosting:boost_mode'),
'min_score': vs.var('custom:boosting:min_score')
} }
}; };
const view_instance = boost_sources_and_layers(config); const view = boost_sources_and_layers(config);
t.deepEquals(view_instance(), expected_query, 'query is a bool query with multiple term queres'); t.deepEquals(view(vs), expected_query, 'query contains multiple scoring functions');
t.end(); t.end();
}); });

Loading…
Cancel
Save