Browse Source

feat(query): Modify custom boosts feature to use function_score queries

configurable-boosts
Peter Johnson 6 years ago committed by Julian Simioni
parent
commit
a06683ff68
No known key found for this signature in database
GPG Key ID: B9EEB0C6EE0910A1
  1. 4
      query/autocomplete.js
  2. 10
      query/autocomplete_defaults.js
  3. 13
      query/search_defaults.js
  4. 8
      query/search_original.js
  5. 152
      query/view/boost_sources_and_layers.js
  6. 44
      test/unit/fixture/autocomplete_custom_boosts.json
  7. 97
      test/unit/fixture/search_with_custom_boosts.json
  8. 2
      test/unit/query/search_with_custom_boosts.js
  9. 102
      test/unit/query/view/boost_sources_and_layers.js

4
query/autocomplete.js

@ -45,9 +45,7 @@ query.score( views.boost_exact_matches );
query.score( peliasQuery.view.focus( views.ngrams_strict ) );
query.score( peliasQuery.view.popularity( views.pop_subquery ) );
query.score( peliasQuery.view.population( views.pop_subquery ) );
const boostConfig = config.customBoosts || {};
query.score( views.custom_boosts(config.customBoosts) );
query.score( views.custom_boosts( config.customBoosts ) );
// non-scoring hard filters
query.filter( peliasQuery.view.sources );

10
query/autocomplete_defaults.js

@ -91,6 +91,12 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'population:field': 'population',
'population:modifier': 'log1p',
'population:max_boost': 20,
'population:weight': 3
'population:weight': 3,
// boost_sources_and_layers view
'custom:boosting:min_score': 1, // score applied to documents which don't score anything via functions
'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost
'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score)
'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost
'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section
});

13
query/search_defaults.js

@ -93,7 +93,16 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'population:max_boost': 20,
'population:weight': 2,
// used by fallback queries
// @todo: it is also possible to specify layer boosting
// via pelias/config, consider deprecating this config.
'boost:address': 10,
'boost:street': 5
'boost:street': 5,
// boost_sources_and_layers view
'custom:boosting:min_score': 1, // score applied to documents which don't score anything via functions
'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost
'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score)
'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost
'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section
});

8
query/search_original.js

@ -6,10 +6,8 @@ const logger = require('pelias-logger').get('api');
const config = require('pelias-config').generate().api;
var placeTypes = require('../helper/placeTypes');
var views = { custom_boosts: require('./view/boost_sources_and_layers') };
var views = {
custom_boosts: require('./view/boost_sources_and_layers'),
};
// region_a is also an admin field. addressit tries to detect
// region_a, in which case we use a match query specifically for it.
// but address it doesn't know about all of them so it helps to search
@ -42,9 +40,7 @@ query.score( peliasQuery.view.address('postcode') );
query.score( peliasQuery.view.admin('country_a') );
query.score( peliasQuery.view.admin('region_a') );
query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') );
const boostConfig = config.customBoosts || {};
query.score( views.custom_boosts(config.customBoosts) );
query.score( views.custom_boosts( config.customBoosts ) );
// non-scoring hard filters
query.filter( peliasQuery.view.boundary_circle );

152
query/view/boost_sources_and_layers.js

@ -1,51 +1,121 @@
//example input
//{
// "source": {
// "openstreetmap": 5
// },
// "layer": {
// "street": 3,
// "country": 5
// }
//}
function generateTermQuery(field, value, boost) {
return {
constant_score: {
boost: boost,
query: {
term: {
[field]: value,
}
}
/**
This view allows users to specify a custom boost for sources and layers.
The view is implemented using a 'function_score' query, which enumerates multiple 'functions', each
function will assign a 'score' to each document when matched.
A document can match more than one function, in this case the 'score_mode' is used to decide how these
scores are combined, the default is 'sum'.
Likewise, a document can also match zero functions, in this case it is assigned a score of 'min_score'.
The computed score is then multiplied by the 'boost' value in order to come up with the final boost value
which will be assigned to that document. The 'boost' value is essentially a hard-coded multiplier for the score.
The 'max_boost' property is simply a ceiling for this computed boost, if the computed boosted is higher than
max_boost it will be assigned the value of max_boost instead.
Note: This is a simple use of the 'function_score' query, as such we don't use the 'boost_mode' property
(because there is no query section) and the 'weight' values we assign are simply returned verbatim
(because we use filter queries for the function scoring).
ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
example config section:
{
"source": {
"openstreetmap": 5
},
"layer": {
"street": 3,
"country": 5
}
};
}
}
example query:
{
"function_score": {
"query": {
"match_all": {}
},
"functions": [{
"filter": {
"match": {
"layer": "intersections"
}
},
"weight": 1.6
},{
"filter": {
"match": {
"layer": "stops"
}
},
"weight": 2.4
}],
"boost": 5,
"max_boost": 40,
"score_mode": "sum",
"boost_mode": "multiply",
"min_score": 1
}
}
**/
// supported top-level config items
const TARGETS = ['source', 'layer'];
module.exports = function( config ) {
module.exports = function( configuration ) {
return function( ) {
if (!configuration) {
// no valid config to use, fail now, don't render this view.
if( !config ) { return function(){ return null; }; }
return function( vs ) {
// validate required params
if( !vs.isset('custom:boosting:min_score') ||
!vs.isset('custom:boosting:boost') ||
!vs.isset('custom:boosting:max_boost') ||
!vs.isset('custom:boosting:score_mode') ||
!vs.isset('custom:boosting:boost_mode') ){
return null;
}
const filters = [];
['source', 'layer'].forEach(function(target) {
if (configuration[target]) {
Object.keys(configuration[target]).forEach(function(item) {
filters.push(generateTermQuery(target, item, configuration[target][item]));
// base 'function_score' view
var view = {
'function_score': {
'query': { 'match_all': {} }, // apply to all documents
'functions': [], // a list of functions which contribute to a 'score' for each document
'min_score': vs.var('custom:boosting:min_score'),
'boost': vs.var('custom:boosting:boost'),
'max_boost': vs.var('custom:boosting:max_boost'),
'score_mode': vs.var('custom:boosting:score_mode'),
'boost_mode': vs.var('custom:boosting:boost_mode')
},
};
// iterate over supported targets and their values
TARGETS.forEach( function( target ) {
if( 'object' === typeof config[target] ) {
Object.keys(config[target]).forEach(function(value) {
// add a scoring function for this target, assigning a weight
let weight = config[target][value];
view.function_score.functions.push({
'weight': isNaN(weight) ? 1 : weight,
'filter': {
'match': {
[target]: value
}
}
});
});
}
});
if (filters.length === 0) {
return null;
} else if (filters.length === 1) {
return filters[0];
} else {
return {
bool: {
should: filters
}
};
}
// no functions were generated, fail now, don't render this view.
if( view.function_score.functions.length === 0 ) { return null; }
return view;
};
};

44
test/unit/fixture/autocomplete_custom_boosts.json

@ -67,31 +67,31 @@
"score_mode": "first",
"boost_mode": "replace"
}
},
{
"bool": {
"should": [
{
"constant_score": {
"boost": 5,
"query": {
"term": {
"source": "openstreetmap"
}
}
},{
"function_score": {
"query": {
"match_all": {}
},
"min_score": 1,
"boost": 5,
"max_boost": 50,
"score_mode": "sum",
"boost_mode": "multiply",
"functions": [{
"filter": {
"match": {
"source": "openstreetmap"
}
},
{
"constant_score": {
"boost": 3,
"query": {
"term": {
"layer": "transit"
}
}
"weight": 5
},{
"filter": {
"match": {
"layer": "transit"
}
}
]
},
"weight": 3
}]
}
}
]

97
test/unit/fixture/search_with_custom_boosts.json

@ -23,30 +23,30 @@
}
}
},{
"function_score": {
"query": {
"match": {
"phrase.default": {
"query": "test",
"analyzer": "peliasPhrase",
"type": "phrase",
"slop": 2,
"boost": 1
"function_score": {
"query": {
"match": {
"phrase.default": {
"query": "test",
"analyzer": "peliasPhrase",
"type": "phrase",
"slop": 2,
"boost": 1
}
}
}
},
"max_boost": 20,
"score_mode": "first",
"boost_mode": "replace",
"functions": [{
"field_value_factor": {
"modifier": "log1p",
"field": "popularity",
"missing": 1
},
"weight": 1
}]
}
"max_boost": 20,
"score_mode": "first",
"boost_mode": "replace",
"functions": [{
"field_value_factor": {
"modifier": "log1p",
"field": "popularity",
"missing": 1
},
"weight": 1
}]
}
},{
"function_score": {
"query": {
@ -72,32 +72,33 @@
"weight": 2
}]
}
}, {
"bool": {
"should": [
{
"constant_score": {
"boost": 5,
"query": {
"term": {
"source": "openstreetmap"
}
}
}
},
{
"constant_score": {
"boost": 3,
"query": {
"term": {
"layer": "transit"
}
}
}
}
]
}
}]
},{
"function_score": {
"query": {
"match_all": {}
},
"min_score": 1,
"boost": 5,
"max_boost": 50,
"score_mode": "sum",
"boost_mode": "multiply",
"functions": [{
"filter": {
"match": {
"source": "openstreetmap"
}
},
"weight": 5
},{
"filter": {
"match": {
"layer": "transit"
}
},
"weight": 3
}]
}
}]
}
},
"sort": [ "_score" ],

2
test/unit/query/search_with_custom_boosts.js

@ -36,8 +36,6 @@ module.exports.tests.query = function(test, common) {
});
const actual_query = JSON.parse( JSON.stringify( search_query_module(clean) ) );
console.log(JSON.stringify(actual_query.body.query.bool, null, 2));
t.deepEqual(actual_query, expected_query, 'query as expected');
t.pass();
t.end();

102
test/unit/query/view/boost_sources_and_layers.js

@ -1,50 +1,62 @@
const query = require('pelias-query');
const vs = new query.Vars(require('../../../../query/search_defaults'));
const boost_sources_and_layers = require('../../../../query/view/boost_sources_and_layers');
module.exports.tests = {};
module.exports.tests.empty_config = function(test, common) {
test('empty configuration returns empty query', function(t) {
const view_instance = boost_sources_and_layers({});
const query = view_instance();
t.equal(query, null, 'query is empty');
const view = boost_sources_and_layers({});
const rendered = view(vs);
t.equal(rendered, null, 'query is empty');
t.end();
});
test('undefined configuration returns empty query', function(t) {
const view_instance = boost_sources_and_layers(undefined);
const query = view_instance();
t.equal(query, null, 'query is empty');
const view = boost_sources_and_layers(undefined);
const rendered = view(vs);
t.equal(rendered, null, 'query is empty');
t.end();
});
};
module.exports.tests.single_item_config = function(test, common) {
test('config with single layer entry returns single term query with boost', function(t) {
test('config with single layer entry produces a single scoring function with weight', function(t) {
const config = {
layer: {
locality: 5
}
};
const expected_query = {
constant_score: {
boost: 5,
query: {
term: {
layer: 'locality'
}
}
'function_score': {
'query': {
'match_all': {}
},
'functions': [{
'filter': {
'match': {
'layer': 'locality'
}
},
'weight': 5
}],
'boost': vs.var('custom:boosting:boost'),
'max_boost': vs.var('custom:boosting:max_boost'),
'score_mode': vs.var('custom:boosting:score_mode'),
'boost_mode': vs.var('custom:boosting:boost_mode'),
'min_score': vs.var('custom:boosting:min_score')
}
};
const view_instance = boost_sources_and_layers(config);
const view = boost_sources_and_layers(config);
t.deepEquals(view_instance(), expected_query, 'query is a single term query');
t.deepEquals(view(vs), expected_query, 'query contains a single scoring function');
t.end();
});
};
module.exports.tests.mulitple_item_config = function(test, common) {
test('config with multiple items returns bool query with multiple should conditions', function(t) {
test('config with multiple items produces multiple scoring functions', function(t) {
const config = {
source: {
whosonfirst: 6
@ -55,40 +67,42 @@ module.exports.tests.mulitple_item_config = function(test, common) {
},
};
const expected_query = {
bool: {
should: [{
constant_score: {
boost: 6,
query: {
term: {
source: 'whosonfirst',
}
'function_score': {
'query': {
'match_all': {}
},
'functions': [{
'filter': {
'match': {
'source': 'whosonfirst'
}
}
}, {
constant_score: {
boost: 2,
query: {
term: {
layer: 'country'
}
},
'weight': 6
},{
'filter': {
'match': {
'layer': 'country'
}
}
},
'weight': 2
},{
constant_score: {
boost: 0.5,
query: {
term: {
layer: 'borough'
}
'filter': {
'match': {
'layer': 'borough'
}
}
}]
},
'weight': 0.5
}],
'boost': vs.var('custom:boosting:boost'),
'max_boost': vs.var('custom:boosting:max_boost'),
'score_mode': vs.var('custom:boosting:score_mode'),
'boost_mode': vs.var('custom:boosting:boost_mode'),
'min_score': vs.var('custom:boosting:min_score')
}
};
const view_instance = boost_sources_and_layers(config);
const view = boost_sources_and_layers(config);
t.deepEquals(view_instance(), expected_query, 'query is a bool query with multiple term queres');
t.deepEquals(view(vs), expected_query, 'query contains multiple scoring functions');
t.end();
});

Loading…
Cancel
Save