From a553201de4efd1457f9e8a8a1e93cd783e81aab2 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Tue, 9 Oct 2018 15:32:30 +0200 Subject: [PATCH] feat(autocomplete_boost_exact_matches): improved boosting for exact matches --- query/autocomplete.js | 3 +- query/view/boost_exact_matches.js | 60 ++++++++++++++----- .../fixture/autocomplete_boundary_country.js | 10 ++++ ...ocomplete_linguistic_bbox_san_francisco.js | 10 ++++ .../autocomplete_linguistic_final_token.js | 10 ++++ .../fixture/autocomplete_linguistic_focus.js | 10 ++++ ...tocomplete_linguistic_focus_null_island.js | 10 ++++ ...autocomplete_linguistic_multiple_tokens.js | 11 ++++ .../fixture/autocomplete_linguistic_only.js | 10 ++++ .../autocomplete_with_layer_filtering.js | 10 ++++ .../autocomplete_with_source_filtering.js | 10 ++++ test/unit/query/autocomplete.js | 2 +- 12 files changed, 140 insertions(+), 16 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index cba5d2e8..cde6f175 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -42,7 +42,8 @@ query.score( peliasQuery.view.admin('locality') ); query.score( peliasQuery.view.admin('neighbourhood') ); // scoring boost -query.score( views.boost_exact_matches ); +query.score( views.boost_exact_matches(false) ); +query.score( views.boost_exact_matches(true) ); query.score( peliasQuery.view.focus( views.ngrams_strict ) ); query.score( peliasQuery.view.popularity( views.pop_subquery ) ); query.score( peliasQuery.view.population( views.pop_subquery ) ); diff --git a/query/view/boost_exact_matches.js b/query/view/boost_exact_matches.js index 06155260..75929751 100644 --- a/query/view/boost_exact_matches.js +++ b/query/view/boost_exact_matches.js @@ -14,26 +14,58 @@ var peliasQuery = require('pelias-query'), the view uses some of the values from the 'search_defaults.js' file to add an additional 'SHOULD' condition which scores exact matches slighly higher than partial matches. + + the 'includePartialTokens' variable was introduced in order to allow the view + to be reused as an additional boost for tokens which are in fact complete, + despite us not knowing for sure whether they are complete or not. + + an example is 'Stop 2', without partial tokens the boost will only apply to + documents matching 'stop', with an additional view we can further boost + documents matching 'stop 2'. + + note: it is most likely insufficent to include a version of this view in your + query which has includePartialTokens=true without also having a copy with + includePartialTokens=false. One view will boost the tokens that are known to + be complete and the other will additionally boost tokens which may or may not be + complete, as per the example above. + + note: a clause has been included in the code which disables the view for + includePartialTokens=true if it would generate the exact same view as for + includePartialTokens=false. **/ -module.exports = function( vs ){ +module.exports = function( includePartialTokens ){ + return function( vs ){ + + // make a copy of the variables so we don't interfere with the values + // passed to other views. + var vsCopy = new peliasQuery.Vars( vs.export() ); + + // copy phrase:* values from search defaults + vsCopy.var('phrase:analyzer').set(searchDefaults['phrase:analyzer']); + vsCopy.var('phrase:field').set(searchDefaults['phrase:field']); + + // get a copy of only the *complete* tokens produced from the input:name + var tokens = vs.var('input:name:tokens_complete').get(); - // make a copy of the variables so we don't interfere with the values - // passed to other views. - var vsCopy = new peliasQuery.Vars( vs.export() ); + if( includePartialTokens ){ + // get a copy of *all* tokens produced from the input:name (including partial tokens) + var allTokens = vs.var('input:name:tokens').get(); - // copy phrase:* values from search defaults - vsCopy.var('phrase:analyzer').set(searchDefaults['phrase:analyzer']); - vsCopy.var('phrase:field').set(searchDefaults['phrase:field']); + // a duplicate view would be generated, fail now, don't render this view. + // see file comments for more info + if( allTokens.join(' ') === tokens.join(' ') ){ return null; } - // get a copy of the *complete* tokens produced from the input:name - var tokens = vs.var('input:name:tokens_complete').get(); + // use *all* the tokens for this view instead of only the complete tokens. + tokens = allTokens; + } - // no valid tokens to use, fail now, don't render this view. - if( !tokens || tokens.length < 1 ){ return null; } + // no valid tokens to use, fail now, don't render this view. + if( !tokens || tokens.length < 1 ){ return null; } - // set 'input:name' to be only the fully completed characters - vsCopy.var('input:name').set( tokens.join(' ') ); + // set 'input:name' to be only the fully completed characters + vsCopy.var('input:name').set( tokens.join(' ') ); - return peliasQuery.view.phrase( vsCopy ); + return peliasQuery.view.phrase( vsCopy ); + }; }; diff --git a/test/unit/fixture/autocomplete_boundary_country.js b/test/unit/fixture/autocomplete_boundary_country.js index 928c3efe..7459ec85 100644 --- a/test/unit/fixture/autocomplete_boundary_country.js +++ b/test/unit/fixture/autocomplete_boundary_country.js @@ -25,6 +25,16 @@ module.exports = { } }], 'should':[{ + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'boost': 1, + 'slop': 3, + 'query': 'test', + 'type': 'phrase' + } + } + },{ 'function_score': { 'query': { 'match_all': {} diff --git a/test/unit/fixture/autocomplete_linguistic_bbox_san_francisco.js b/test/unit/fixture/autocomplete_linguistic_bbox_san_francisco.js index 5e47e603..3c6fe644 100644 --- a/test/unit/fixture/autocomplete_linguistic_bbox_san_francisco.js +++ b/test/unit/fixture/autocomplete_linguistic_bbox_san_francisco.js @@ -18,6 +18,16 @@ module.exports = { } }], 'should':[{ + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 3, + 'query': 'test' + } + } + },{ 'function_score': { 'query': { 'match_all': {} diff --git a/test/unit/fixture/autocomplete_linguistic_final_token.js b/test/unit/fixture/autocomplete_linguistic_final_token.js index 4f250ec3..2c9fe7eb 100644 --- a/test/unit/fixture/autocomplete_linguistic_final_token.js +++ b/test/unit/fixture/autocomplete_linguistic_final_token.js @@ -22,6 +22,16 @@ module.exports = { 'type': 'phrase' } } + },{ + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'boost': 1, + 'slop': 3, + 'query': 'one t', + 'type': 'phrase' + } + } },{ 'function_score': { 'query': { diff --git a/test/unit/fixture/autocomplete_linguistic_focus.js b/test/unit/fixture/autocomplete_linguistic_focus.js index 2acc0e1d..781c9b7b 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus.js +++ b/test/unit/fixture/autocomplete_linguistic_focus.js @@ -18,6 +18,16 @@ module.exports = { } }], 'should': [{ + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'boost': 1, + 'slop': 3, + 'query': 'test', + 'type': 'phrase' + } + } + },{ 'function_score': { 'query': { 'match': { diff --git a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js index d3029455..1be210e1 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js +++ b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js @@ -18,6 +18,16 @@ module.exports = { } }], 'should': [{ + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'boost': 1, + 'slop': 3, + 'query': 'test', + 'type': 'phrase' + } + } + },{ 'function_score': { 'query': { 'match': { diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js index b07219ac..117bf570 100644 --- a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js +++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js @@ -40,6 +40,17 @@ module.exports = { } } }, + { + 'match': { + 'phrase.default': { + 'analyzer' : 'peliasPhrase', + 'type' : 'phrase', + 'boost' : 1, + 'slop' : 3, + 'query' : 'one two three' + } + } + }, { 'function_score': { 'query': { diff --git a/test/unit/fixture/autocomplete_linguistic_only.js b/test/unit/fixture/autocomplete_linguistic_only.js index 036bd180..dadeb9e9 100644 --- a/test/unit/fixture/autocomplete_linguistic_only.js +++ b/test/unit/fixture/autocomplete_linguistic_only.js @@ -18,6 +18,16 @@ module.exports = { } }], 'should':[{ + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 3, + 'query': 'test' + } + } + },{ 'function_score': { 'query': { 'match_all': {} diff --git a/test/unit/fixture/autocomplete_with_layer_filtering.js b/test/unit/fixture/autocomplete_with_layer_filtering.js index ac5971c4..509f8561 100644 --- a/test/unit/fixture/autocomplete_with_layer_filtering.js +++ b/test/unit/fixture/autocomplete_with_layer_filtering.js @@ -18,6 +18,16 @@ module.exports = { } }], 'should':[{ + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'boost': 1, + 'slop': 3, + 'query': 'test', + 'type': 'phrase' + } + } + },{ 'function_score': { 'query': { 'match_all': {} diff --git a/test/unit/fixture/autocomplete_with_source_filtering.js b/test/unit/fixture/autocomplete_with_source_filtering.js index ada4c953..00d7c21a 100644 --- a/test/unit/fixture/autocomplete_with_source_filtering.js +++ b/test/unit/fixture/autocomplete_with_source_filtering.js @@ -18,6 +18,16 @@ module.exports = { } }], 'should':[{ + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'boost': 1, + 'slop': 3, + 'query': 'test', + 'type': 'phrase' + } + } + },{ 'function_score': { 'query': { 'match_all': {} diff --git a/test/unit/query/autocomplete.js b/test/unit/query/autocomplete.js index 3903d468..fbf1fdb8 100644 --- a/test/unit/query/autocomplete.js +++ b/test/unit/query/autocomplete.js @@ -206,7 +206,7 @@ module.exports.tests.query = function(test, common) { var expected = require('../fixture/autocomplete_linguistic_bbox_san_francisco'); t.deepEqual(compiled.type, 'autocomplete', 'query type set'); - t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_focus_null_island'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_bbox_san_francisco'); t.end(); }); };