Browse Source

feat(query): Modify custom boosts feature to use function_score queries

configurable-boosts
Peter Johnson 6 years ago committed by Julian Simioni
parent
commit
a06683ff68
No known key found for this signature in database
GPG Key ID: B9EEB0C6EE0910A1
  1. 4
      query/autocomplete.js
  2. 10
      query/autocomplete_defaults.js
  3. 4
      query/search.js
  4. 13
      query/search_defaults.js
  5. 8
      query/search_original.js
  6. 152
      query/view/boost_sources_and_layers.js
  7. 44
      test/unit/fixture/autocomplete_custom_boosts.json
  8. 97
      test/unit/fixture/search_with_custom_boosts.json
  9. 2
      test/unit/query/search_with_custom_boosts.js
  10. 102
      test/unit/query/view/boost_sources_and_layers.js

4
query/autocomplete.js

@ -45,9 +45,7 @@ query.score( views.boost_exact_matches );
query.score( peliasQuery.view.focus( views.ngrams_strict ) ); query.score( peliasQuery.view.focus( views.ngrams_strict ) );
query.score( peliasQuery.view.popularity( views.pop_subquery ) ); query.score( peliasQuery.view.popularity( views.pop_subquery ) );
query.score( peliasQuery.view.population( views.pop_subquery ) ); query.score( peliasQuery.view.population( views.pop_subquery ) );
query.score( views.custom_boosts( config.customBoosts ) );
const boostConfig = config.customBoosts || {};
query.score( views.custom_boosts(config.customBoosts) );
// non-scoring hard filters // non-scoring hard filters
query.filter( peliasQuery.view.sources ); query.filter( peliasQuery.view.sources );

10
query/autocomplete_defaults.js

@ -91,6 +91,12 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'population:field': 'population', 'population:field': 'population',
'population:modifier': 'log1p', 'population:modifier': 'log1p',
'population:max_boost': 20, 'population:max_boost': 20,
'population:weight': 3 'population:weight': 3,
// boost_sources_and_layers view
'custom:boosting:min_score': 1, // score applied to documents which don't score anything via functions
'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost
'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score)
'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost
'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section
}); });

4
query/search.js

@ -166,11 +166,11 @@ function isPostalCodeWithCountry(vs) {
var isSet = (layer) => { var isSet = (layer) => {
return vs.isset(`input:${layer}`); return vs.isset(`input:${layer}`);
}; };
var allowedFields = ['postcode', 'country']; var allowedFields = ['postcode', 'country'];
var disallowedFields = ['query', 'category', 'housenumber', 'street', 'locality', var disallowedFields = ['query', 'category', 'housenumber', 'street', 'locality',
'neighbourhood', 'borough', 'county', 'region']; 'neighbourhood', 'borough', 'county', 'region'];
return allowedFields.every(isSet) && return allowedFields.every(isSet) &&
!disallowedFields.some(isSet); !disallowedFields.some(isSet);
} }

13
query/search_defaults.js

@ -93,7 +93,16 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'population:max_boost': 20, 'population:max_boost': 20,
'population:weight': 2, 'population:weight': 2,
// used by fallback queries
// @todo: it is also possible to specify layer boosting
// via pelias/config, consider deprecating this config.
'boost:address': 10, 'boost:address': 10,
'boost:street': 5 'boost:street': 5,
// boost_sources_and_layers view
'custom:boosting:min_score': 1, // score applied to documents which don't score anything via functions
'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost
'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score)
'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost
'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section
}); });

8
query/search_original.js

@ -6,10 +6,8 @@ const logger = require('pelias-logger').get('api');
const config = require('pelias-config').generate().api; const config = require('pelias-config').generate().api;
var placeTypes = require('../helper/placeTypes'); var placeTypes = require('../helper/placeTypes');
var views = { custom_boosts: require('./view/boost_sources_and_layers') };
var views = {
custom_boosts: require('./view/boost_sources_and_layers'),
};
// region_a is also an admin field. addressit tries to detect // region_a is also an admin field. addressit tries to detect
// region_a, in which case we use a match query specifically for it. // region_a, in which case we use a match query specifically for it.
// but address it doesn't know about all of them so it helps to search // but address it doesn't know about all of them so it helps to search
@ -42,9 +40,7 @@ query.score( peliasQuery.view.address('postcode') );
query.score( peliasQuery.view.admin('country_a') ); query.score( peliasQuery.view.admin('country_a') );
query.score( peliasQuery.view.admin('region_a') ); query.score( peliasQuery.view.admin('region_a') );
query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') ); query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') );
query.score( views.custom_boosts( config.customBoosts ) );
const boostConfig = config.customBoosts || {};
query.score( views.custom_boosts(config.customBoosts) );
// non-scoring hard filters // non-scoring hard filters
query.filter( peliasQuery.view.boundary_circle ); query.filter( peliasQuery.view.boundary_circle );

152
query/view/boost_sources_and_layers.js

@ -1,51 +1,121 @@
//example input /**
//{ This view allows users to specify a custom boost for sources and layers.
// "source": {
// "openstreetmap": 5 The view is implemented using a 'function_score' query, which enumerates multiple 'functions', each
// }, function will assign a 'score' to each document when matched.
// "layer": {
// "street": 3, A document can match more than one function, in this case the 'score_mode' is used to decide how these
// "country": 5 scores are combined, the default is 'sum'.
// }
//} Likewise, a document can also match zero functions, in this case it is assigned a score of 'min_score'.
function generateTermQuery(field, value, boost) { The computed score is then multiplied by the 'boost' value in order to come up with the final boost value
return { which will be assigned to that document. The 'boost' value is essentially a hard-coded multiplier for the score.
constant_score: {
boost: boost, The 'max_boost' property is simply a ceiling for this computed boost, if the computed boosted is higher than
query: { max_boost it will be assigned the value of max_boost instead.
term: {
[field]: value, Note: This is a simple use of the 'function_score' query, as such we don't use the 'boost_mode' property
} (because there is no query section) and the 'weight' values we assign are simply returned verbatim
} (because we use filter queries for the function scoring).
ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html
example config section:
{
"source": {
"openstreetmap": 5
},
"layer": {
"street": 3,
"country": 5
} }
}; }
}
example query:
{
"function_score": {
"query": {
"match_all": {}
},
"functions": [{
"filter": {
"match": {
"layer": "intersections"
}
},
"weight": 1.6
},{
"filter": {
"match": {
"layer": "stops"
}
},
"weight": 2.4
}],
"boost": 5,
"max_boost": 40,
"score_mode": "sum",
"boost_mode": "multiply",
"min_score": 1
}
}
**/
// supported top-level config items
const TARGETS = ['source', 'layer'];
module.exports = function( config ) {
module.exports = function( configuration ) { // no valid config to use, fail now, don't render this view.
return function( ) { if( !config ) { return function(){ return null; }; }
if (!configuration) {
return function( vs ) {
// validate required params
if( !vs.isset('custom:boosting:min_score') ||
!vs.isset('custom:boosting:boost') ||
!vs.isset('custom:boosting:max_boost') ||
!vs.isset('custom:boosting:score_mode') ||
!vs.isset('custom:boosting:boost_mode') ){
return null; return null;
} }
const filters = [];
['source', 'layer'].forEach(function(target) { // base 'function_score' view
if (configuration[target]) { var view = {
Object.keys(configuration[target]).forEach(function(item) { 'function_score': {
filters.push(generateTermQuery(target, item, configuration[target][item])); 'query': { 'match_all': {} }, // apply to all documents
'functions': [], // a list of functions which contribute to a 'score' for each document
'min_score': vs.var('custom:boosting:min_score'),
'boost': vs.var('custom:boosting:boost'),
'max_boost': vs.var('custom:boosting:max_boost'),
'score_mode': vs.var('custom:boosting:score_mode'),
'boost_mode': vs.var('custom:boosting:boost_mode')
},
};
// iterate over supported targets and their values
TARGETS.forEach( function( target ) {
if( 'object' === typeof config[target] ) {
Object.keys(config[target]).forEach(function(value) {
// add a scoring function for this target, assigning a weight
let weight = config[target][value];
view.function_score.functions.push({
'weight': isNaN(weight) ? 1 : weight,
'filter': {
'match': {
[target]: value
}
}
});
}); });
} }
}); });
if (filters.length === 0) { // no functions were generated, fail now, don't render this view.
return null; if( view.function_score.functions.length === 0 ) { return null; }
} else if (filters.length === 1) {
return filters[0]; return view;
} else {
return {
bool: {
should: filters
}
};
}
}; };
}; };

44
test/unit/fixture/autocomplete_custom_boosts.json

@ -67,31 +67,31 @@
"score_mode": "first", "score_mode": "first",
"boost_mode": "replace" "boost_mode": "replace"
} }
}, },{
{ "function_score": {
"bool": { "query": {
"should": [ "match_all": {}
{ },
"constant_score": { "min_score": 1,
"boost": 5, "boost": 5,
"query": { "max_boost": 50,
"term": { "score_mode": "sum",
"source": "openstreetmap" "boost_mode": "multiply",
} "functions": [{
} "filter": {
"match": {
"source": "openstreetmap"
} }
}, },
{ "weight": 5
"constant_score": { },{
"boost": 3, "filter": {
"query": { "match": {
"term": { "layer": "transit"
"layer": "transit"
}
}
} }
} },
] "weight": 3
}]
} }
} }
] ]

97
test/unit/fixture/search_with_custom_boosts.json

@ -23,30 +23,30 @@
} }
} }
},{ },{
"function_score": { "function_score": {
"query": { "query": {
"match": { "match": {
"phrase.default": { "phrase.default": {
"query": "test", "query": "test",
"analyzer": "peliasPhrase", "analyzer": "peliasPhrase",
"type": "phrase", "type": "phrase",
"slop": 2, "slop": 2,
"boost": 1 "boost": 1
}
} }
}
},
"max_boost": 20,
"score_mode": "first",
"boost_mode": "replace",
"functions": [{
"field_value_factor": {
"modifier": "log1p",
"field": "popularity",
"missing": 1
}, },
"weight": 1 "max_boost": 20,
}] "score_mode": "first",
} "boost_mode": "replace",
"functions": [{
"field_value_factor": {
"modifier": "log1p",
"field": "popularity",
"missing": 1
},
"weight": 1
}]
}
},{ },{
"function_score": { "function_score": {
"query": { "query": {
@ -72,32 +72,33 @@
"weight": 2 "weight": 2
}] }]
} }
}, { },{
"bool": { "function_score": {
"should": [ "query": {
{ "match_all": {}
"constant_score": { },
"boost": 5, "min_score": 1,
"query": { "boost": 5,
"term": { "max_boost": 50,
"source": "openstreetmap" "score_mode": "sum",
} "boost_mode": "multiply",
} "functions": [{
} "filter": {
}, "match": {
{ "source": "openstreetmap"
"constant_score": { }
"boost": 3, },
"query": { "weight": 5
"term": { },{
"layer": "transit" "filter": {
} "match": {
} "layer": "transit"
} }
} },
] "weight": 3
} }]
}] }
}]
} }
}, },
"sort": [ "_score" ], "sort": [ "_score" ],

2
test/unit/query/search_with_custom_boosts.js

@ -36,8 +36,6 @@ module.exports.tests.query = function(test, common) {
}); });
const actual_query = JSON.parse( JSON.stringify( search_query_module(clean) ) ); const actual_query = JSON.parse( JSON.stringify( search_query_module(clean) ) );
console.log(JSON.stringify(actual_query.body.query.bool, null, 2));
t.deepEqual(actual_query, expected_query, 'query as expected'); t.deepEqual(actual_query, expected_query, 'query as expected');
t.pass(); t.pass();
t.end(); t.end();

102
test/unit/query/view/boost_sources_and_layers.js

@ -1,50 +1,62 @@
const query = require('pelias-query');
const vs = new query.Vars(require('../../../../query/search_defaults'));
const boost_sources_and_layers = require('../../../../query/view/boost_sources_and_layers'); const boost_sources_and_layers = require('../../../../query/view/boost_sources_and_layers');
module.exports.tests = {}; module.exports.tests = {};
module.exports.tests.empty_config = function(test, common) { module.exports.tests.empty_config = function(test, common) {
test('empty configuration returns empty query', function(t) { test('empty configuration returns empty query', function(t) {
const view_instance = boost_sources_and_layers({}); const view = boost_sources_and_layers({});
const query = view_instance(); const rendered = view(vs);
t.equal(query, null, 'query is empty'); t.equal(rendered, null, 'query is empty');
t.end(); t.end();
}); });
test('undefined configuration returns empty query', function(t) { test('undefined configuration returns empty query', function(t) {
const view_instance = boost_sources_and_layers(undefined); const view = boost_sources_and_layers(undefined);
const query = view_instance(); const rendered = view(vs);
t.equal(query, null, 'query is empty'); t.equal(rendered, null, 'query is empty');
t.end(); t.end();
}); });
}; };
module.exports.tests.single_item_config = function(test, common) { module.exports.tests.single_item_config = function(test, common) {
test('config with single layer entry returns single term query with boost', function(t) { test('config with single layer entry produces a single scoring function with weight', function(t) {
const config = { const config = {
layer: { layer: {
locality: 5 locality: 5
} }
}; };
const expected_query = { const expected_query = {
constant_score: { 'function_score': {
boost: 5, 'query': {
query: { 'match_all': {}
term: { },
layer: 'locality' 'functions': [{
} 'filter': {
} 'match': {
'layer': 'locality'
}
},
'weight': 5
}],
'boost': vs.var('custom:boosting:boost'),
'max_boost': vs.var('custom:boosting:max_boost'),
'score_mode': vs.var('custom:boosting:score_mode'),
'boost_mode': vs.var('custom:boosting:boost_mode'),
'min_score': vs.var('custom:boosting:min_score')
} }
}; };
const view_instance = boost_sources_and_layers(config); const view = boost_sources_and_layers(config);
t.deepEquals(view_instance(), expected_query, 'query is a single term query'); t.deepEquals(view(vs), expected_query, 'query contains a single scoring function');
t.end(); t.end();
}); });
}; };
module.exports.tests.mulitple_item_config = function(test, common) { module.exports.tests.mulitple_item_config = function(test, common) {
test('config with multiple items returns bool query with multiple should conditions', function(t) { test('config with multiple items produces multiple scoring functions', function(t) {
const config = { const config = {
source: { source: {
whosonfirst: 6 whosonfirst: 6
@ -55,40 +67,42 @@ module.exports.tests.mulitple_item_config = function(test, common) {
}, },
}; };
const expected_query = { const expected_query = {
bool: { 'function_score': {
should: [{ 'query': {
constant_score: { 'match_all': {}
boost: 6, },
query: { 'functions': [{
term: { 'filter': {
source: 'whosonfirst', 'match': {
} 'source': 'whosonfirst'
} }
} },
}, { 'weight': 6
constant_score: { },{
boost: 2, 'filter': {
query: { 'match': {
term: { 'layer': 'country'
layer: 'country'
}
} }
} },
'weight': 2
},{ },{
constant_score: { 'filter': {
boost: 0.5, 'match': {
query: { 'layer': 'borough'
term: {
layer: 'borough'
}
} }
} },
}] 'weight': 0.5
}],
'boost': vs.var('custom:boosting:boost'),
'max_boost': vs.var('custom:boosting:max_boost'),
'score_mode': vs.var('custom:boosting:score_mode'),
'boost_mode': vs.var('custom:boosting:boost_mode'),
'min_score': vs.var('custom:boosting:min_score')
} }
}; };
const view_instance = boost_sources_and_layers(config); const view = boost_sources_and_layers(config);
t.deepEquals(view_instance(), expected_query, 'query is a bool query with multiple term queres'); t.deepEquals(view(vs), expected_query, 'query contains multiple scoring functions');
t.end(); t.end();
}); });

Loading…
Cancel
Save