From 35d48dc15487bf7e6f262937617a1bacc2cdf80c Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 7 Dec 2015 17:15:25 +0100 Subject: [PATCH 01/18] autocomplete tweaks --- query/autocomplete.js | 30 ++++++++++++++++++++++++++++-- query/autocomplete_defaults.js | 8 ++++---- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index 0e9b9d68..3d575712 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -13,7 +13,29 @@ query.score( peliasQuery.view.ngrams, 'must' ); // scoring boost query.score( peliasQuery.view.phrase ); -query.score( peliasQuery.view.focus( peliasQuery.view.ngrams ) ); + +var focus = peliasQuery.view.focus( peliasQuery.view.phrase ); + +var _tmpview = function( vs ){ + + var view = focus( vs ); + view.function_score.filter = { + 'or': [ + { 'type': { 'value': 'osmnode' } }, + { 'type': { 'value': 'osmway' } }, + { 'type': { 'value': 'osmaddress' } }, + { 'type': { 'value': 'openaddresses' } }, + { 'type': { 'value': 'geoname' } }, + ] + }; + + // console.log( JSON.stringify( view, null, 2 ) ); + return view; +}; + +// console.log( focus ); + +query.score( _tmpview ); query.score( peliasQuery.view.popularity( peliasQuery.view.phrase ) ); query.score( peliasQuery.view.population( peliasQuery.view.phrase ) ); @@ -42,7 +64,11 @@ function generateQuery( clean ){ }); } - return query.render( vs ); + var q = query.render( vs ); + + console.log( JSON.stringify( q, null, 2 ) ); + + return q; } module.exports = generateQuery; diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index 401ebf73..e5ca0fc3 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -30,13 +30,13 @@ module.exports = extend( false, peliasQuery.defaults, { 'phrase:slop': 2, 'focus:function': 'linear', - 'focus:offset': '1km', - 'focus:scale': '50km', + 'focus:offset': '100km', + 'focus:scale': '250km', 'focus:decay': 0.5, - 'focus:weight': 2, + 'focus:weight': 10, 'function_score:score_mode': 'avg', - 'function_score:boost_mode': 'replace', + 'function_score:boost_mode': 'multiply', 'address:housenumber:analyzer': 'peliasHousenumber', 'address:housenumber:field': 'address.number', From cf9c56928c51dd97b45ff18295d25d808bba80a2 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 7 Dec 2015 17:28:00 +0100 Subject: [PATCH 02/18] fix --- query/autocomplete.js | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index 3d575712..12ca982b 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -19,15 +19,18 @@ var focus = peliasQuery.view.focus( peliasQuery.view.phrase ); var _tmpview = function( vs ){ var view = focus( vs ); - view.function_score.filter = { - 'or': [ - { 'type': { 'value': 'osmnode' } }, - { 'type': { 'value': 'osmway' } }, - { 'type': { 'value': 'osmaddress' } }, - { 'type': { 'value': 'openaddresses' } }, - { 'type': { 'value': 'geoname' } }, - ] - }; + + if( view && view.hasOwnProperty('function_score') ){ + view.function_score.filter = { + 'or': [ + { 'type': { 'value': 'osmnode' } }, + { 'type': { 'value': 'osmway' } }, + { 'type': { 'value': 'osmaddress' } }, + { 'type': { 'value': 'openaddresses' } }, + { 'type': { 'value': 'geoname' } }, + ] + }; + } // console.log( JSON.stringify( view, null, 2 ) ); return view; From 95f2d367c22d2328dd5cb0c64721e5c2c22adc20 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 7 Dec 2015 17:39:25 +0100 Subject: [PATCH 03/18] enable text parsing --- query/autocomplete.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/query/autocomplete.js b/query/autocomplete.js index 12ca982b..71e7acf0 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -1,8 +1,10 @@ var peliasQuery = require('pelias-query'), defaults = require('./autocomplete_defaults'), + textParser = require('./text_parser'), check = require('check-types'); + //------------------------------ // autocomplete query //------------------------------ @@ -11,6 +13,16 @@ var query = new peliasQuery.layout.FilteredBooleanQuery(); // mandatory matches query.score( peliasQuery.view.ngrams, 'must' ); +// admin components +query.score( peliasQuery.view.admin('alpha3') ); +query.score( peliasQuery.view.admin('admin0') ); +query.score( peliasQuery.view.admin('admin1') ); +query.score( peliasQuery.view.admin('admin1_abbr') ); +query.score( peliasQuery.view.admin('admin2') ); +query.score( peliasQuery.view.admin('local_admin') ); +query.score( peliasQuery.view.admin('locality') ); +query.score( peliasQuery.view.admin('neighborhood') ); + // scoring boost query.score( peliasQuery.view.phrase ); @@ -67,6 +79,11 @@ function generateQuery( clean ){ }); } + // run the address parser + if( clean.parsed_text ){ + textParser( clean.parsed_text, vs ); + } + var q = query.render( vs ); console.log( JSON.stringify( q, null, 2 ) ); From 033345dbb3547f23788f66acf08e67e6783f881f Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Mon, 7 Dec 2015 12:33:33 -0500 Subject: [PATCH 04/18] Check that hit.address exists Fixes #379 --- middleware/confidenceScore.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js index b100b973..0eae0f8d 100644 --- a/middleware/confidenceScore.js +++ b/middleware/confidenceScore.js @@ -100,7 +100,8 @@ function checkForDealBreakers(req, hit) { return true; } - if (check.assigned(req.clean.parsed_text.postalcode) && req.clean.parsed_text.postalcode !== hit.address.zip) { + if (check.assigned(req.clean.parsed_text.postalcode) && check.assigned(hit.address) && + req.clean.parsed_text.postalcode !== hit.address.zip) { logger.debug('[confidence][deal-breaker]: postalcode !== zip (' + req.clean.parsed_text.postalcode + ' !== ' + hit.address.zip + ')'); return true; } From a022e62acb0d69e5c1a9a590d5beb265e5331159 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 17 Dec 2015 00:16:48 +0100 Subject: [PATCH 05/18] more tweaks --- query/autocomplete.js | 52 +++++++++++++++++++++++++++------- query/autocomplete_defaults.js | 22 +++++++------- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index 71e7acf0..1f759714 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -4,6 +4,20 @@ var peliasQuery = require('pelias-query'), textParser = require('./text_parser'), check = require('check-types'); +var ngrams = function( vs ){ + var view = peliasQuery.view.ngrams( vs ); + view.match['name.default'].type = 'phrase'; + view.match['name.default'].operator = 'and'; + // console.log( JSON.stringify( view, null, 2 ) ); + return view; +}; + +var phrase = function( vs ){ + var view = peliasQuery.view.phrase( vs ); + view.match['phrase.default'].type = 'phrase'; + // console.log( JSON.stringify( view, null, 2 ) ); + return view; +}; //------------------------------ // autocomplete query @@ -11,7 +25,12 @@ var peliasQuery = require('pelias-query'), var query = new peliasQuery.layout.FilteredBooleanQuery(); // mandatory matches -query.score( peliasQuery.view.ngrams, 'must' ); +query.score( ngrams, 'must' ); + +// address components +query.score( peliasQuery.view.address('housenumber') ); +query.score( peliasQuery.view.address('street') ); +query.score( peliasQuery.view.address('postcode') ); // admin components query.score( peliasQuery.view.admin('alpha3') ); @@ -24,11 +43,10 @@ query.score( peliasQuery.view.admin('locality') ); query.score( peliasQuery.view.admin('neighborhood') ); // scoring boost -query.score( peliasQuery.view.phrase ); - -var focus = peliasQuery.view.focus( peliasQuery.view.phrase ); +query.score( phrase ); -var _tmpview = function( vs ){ +var focus = peliasQuery.view.focus( ngrams ); +var localView = function( vs ){ var view = focus( vs ); @@ -38,8 +56,7 @@ var _tmpview = function( vs ){ { 'type': { 'value': 'osmnode' } }, { 'type': { 'value': 'osmway' } }, { 'type': { 'value': 'osmaddress' } }, - { 'type': { 'value': 'openaddresses' } }, - { 'type': { 'value': 'geoname' } }, + { 'type': { 'value': 'openaddresses' } } ] }; } @@ -50,9 +67,21 @@ var _tmpview = function( vs ){ // console.log( focus ); -query.score( _tmpview ); -query.score( peliasQuery.view.popularity( peliasQuery.view.phrase ) ); -query.score( peliasQuery.view.population( peliasQuery.view.phrase ) ); +query.score( localView ); + +var simpleNgramsView = function( vs ){ + + var view = ngrams( vs ); + + delete view.match['name.default'].type; + delete view.match['name.default'].boost; + + // console.log( JSON.stringify( view, null, 2 ) ); + return view; +}; + +query.score( peliasQuery.view.popularity( simpleNgramsView ) ); +query.score( peliasQuery.view.population( simpleNgramsView ) ); // -------------------------------- @@ -64,6 +93,9 @@ function generateQuery( clean ){ var vs = new peliasQuery.Vars( defaults ); + // remove single grams at end + clean.text = clean.text.replace(/( .$)/g,''); + // input text vs.var( 'input:name', clean.text ); diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index e5ca0fc3..633c07d2 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -20,9 +20,9 @@ module.exports = extend( false, peliasQuery.defaults, { 'boundary:rect:type': 'indexed', 'boundary:rect:_cache': true, - 'ngram:analyzer': 'peliasOneEdgeGram', + 'ngram:analyzer': 'standard', 'ngram:field': 'name.default', - 'ngram:boost': 1, + 'ngram:boost': 100, 'phrase:analyzer': 'peliasPhrase', 'phrase:field': 'phrase.default', @@ -33,7 +33,7 @@ module.exports = extend( false, peliasQuery.defaults, { 'focus:offset': '100km', 'focus:scale': '250km', 'focus:decay': 0.5, - 'focus:weight': 10, + 'focus:weight': 2, 'function_score:score_mode': 'avg', 'function_score:boost_mode': 'multiply', @@ -52,35 +52,35 @@ module.exports = extend( false, peliasQuery.defaults, { 'admin:alpha3:analyzer': 'standard', 'admin:alpha3:field': 'alpha3', - 'admin:alpha3:boost': 5, + 'admin:alpha3:boost': 50, 'admin:admin0:analyzer': 'peliasAdmin', 'admin:admin0:field': 'admin0', - 'admin:admin0:boost': 4, + 'admin:admin0:boost': 40, 'admin:admin1:analyzer': 'peliasAdmin', 'admin:admin1:field': 'admin1', - 'admin:admin1:boost': 3, + 'admin:admin1:boost': 30, 'admin:admin1_abbr:analyzer': 'peliasAdmin', 'admin:admin1_abbr:field': 'admin1_abbr', - 'admin:admin1_abbr:boost': 3, + 'admin:admin1_abbr:boost': 30, 'admin:admin2:analyzer': 'peliasAdmin', 'admin:admin2:field': 'admin2', - 'admin:admin2:boost': 2, + 'admin:admin2:boost': 20, 'admin:local_admin:analyzer': 'peliasAdmin', 'admin:local_admin:field': 'local_admin', - 'admin:local_admin:boost': 1, + 'admin:local_admin:boost': 10, 'admin:locality:analyzer': 'peliasAdmin', 'admin:locality:field': 'locality', - 'admin:locality:boost': 1, + 'admin:locality:boost': 10, 'admin:neighborhood:analyzer': 'peliasAdmin', 'admin:neighborhood:field': 'neighborhood', - 'admin:neighborhood:boost': 1, + 'admin:neighborhood:boost': 10, 'popularity:field': 'popularity', 'popularity:modifier': 'log1p', From 661d19ccf29a4a18931c7e3f1ea1b9962f2e9396 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 17 Dec 2015 06:36:56 +0100 Subject: [PATCH 06/18] more tweaks --- query/autocomplete.js | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index 1f759714..7af48db5 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -12,6 +12,19 @@ var ngrams = function( vs ){ return view; }; +var ngrams_last_only = function( vs ){ + + var name = vs.var('input:name').get(); + + var vs2 = new peliasQuery.Vars( vs.export() ); + vs2.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) ); + + var view = ngrams( vs2 ); + view.match['name.default'].analyzer = 'peliasOneEdgeGram'; + + return view; +}; + var phrase = function( vs ){ var view = peliasQuery.view.phrase( vs ); view.match['phrase.default'].type = 'phrase'; @@ -19,13 +32,30 @@ var phrase = function( vs ){ return view; }; +var phrase_first_only = function( vs ){ + + var name = vs.var('input:name').get(); + var s = name.split(' '); + + // single token only, abort + if( s.length < 2 ){ + return function(){ return null; }; + } + + var vs2 = new peliasQuery.Vars( vs.export() ); + vs2.var('input:name').set( name.substr(0, name.lastIndexOf(' ') ) ); + + return phrase( vs2 ); +}; + //------------------------------ // autocomplete query //------------------------------ var query = new peliasQuery.layout.FilteredBooleanQuery(); // mandatory matches -query.score( ngrams, 'must' ); +query.score( phrase_first_only, 'must' ); +query.score( ngrams_last_only, 'must' ); // address components query.score( peliasQuery.view.address('housenumber') ); @@ -43,7 +73,7 @@ query.score( peliasQuery.view.admin('locality') ); query.score( peliasQuery.view.admin('neighborhood') ); // scoring boost -query.score( phrase ); +// query.score( phrase ); var focus = peliasQuery.view.focus( ngrams ); var localView = function( vs ){ From df7606786ca6d02513d2083cd872462c914baa58 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 17 Dec 2015 06:44:14 +0100 Subject: [PATCH 07/18] config tweaks --- query/autocomplete_defaults.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index 633c07d2..f30acc7a 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -33,7 +33,7 @@ module.exports = extend( false, peliasQuery.defaults, { 'focus:offset': '100km', 'focus:scale': '250km', 'focus:decay': 0.5, - 'focus:weight': 2, + 'focus:weight': 4, 'function_score:score_mode': 'avg', 'function_score:boost_mode': 'multiply', From b974f304e70ec76518a8ec9fd7877a15d3ee3fed Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 17 Dec 2015 06:51:33 +0100 Subject: [PATCH 08/18] trim --- query/autocomplete.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index 7af48db5..acaecd0c 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -124,7 +124,7 @@ function generateQuery( clean ){ var vs = new peliasQuery.Vars( defaults ); // remove single grams at end - clean.text = clean.text.replace(/( .$)/g,''); + clean.text = clean.text.replace(/( .$)/g,'').trim(); // input text vs.var( 'input:name', clean.text ); From 3d29b09805a35248eb820d87692f659e560b911b Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 17 Dec 2015 07:16:25 +0100 Subject: [PATCH 09/18] more tweaks --- query/autocomplete.js | 65 +++++++++++++++++----------------- query/autocomplete_defaults.js | 2 +- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index acaecd0c..4cf9f15c 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -48,32 +48,17 @@ var phrase_first_only = function( vs ){ return phrase( vs2 ); }; -//------------------------------ -// autocomplete query -//------------------------------ -var query = new peliasQuery.layout.FilteredBooleanQuery(); - -// mandatory matches -query.score( phrase_first_only, 'must' ); -query.score( ngrams_last_only, 'must' ); +var simpleNgramsView = function( vs ){ -// address components -query.score( peliasQuery.view.address('housenumber') ); -query.score( peliasQuery.view.address('street') ); -query.score( peliasQuery.view.address('postcode') ); + var view = ngrams( vs ); -// admin components -query.score( peliasQuery.view.admin('alpha3') ); -query.score( peliasQuery.view.admin('admin0') ); -query.score( peliasQuery.view.admin('admin1') ); -query.score( peliasQuery.view.admin('admin1_abbr') ); -query.score( peliasQuery.view.admin('admin2') ); -query.score( peliasQuery.view.admin('local_admin') ); -query.score( peliasQuery.view.admin('locality') ); -query.score( peliasQuery.view.admin('neighborhood') ); + view.match['name.default'].analyzer = 'peliasPhrase'; + delete view.match['name.default'].type; + delete view.match['name.default'].boost; -// scoring boost -// query.score( phrase ); + // console.log( JSON.stringify( view, null, 2 ) ); + return view; +}; var focus = peliasQuery.view.focus( ngrams ); var localView = function( vs ){ @@ -81,6 +66,7 @@ var localView = function( vs ){ var view = focus( vs ); if( view && view.hasOwnProperty('function_score') ){ + view.function_score.query.match['name.default'].analyzer = 'peliasPhrase'; view.function_score.filter = { 'or': [ { 'type': { 'value': 'osmnode' } }, @@ -95,20 +81,35 @@ var localView = function( vs ){ return view; }; -// console.log( focus ); +//------------------------------ +// autocomplete query +//------------------------------ +var query = new peliasQuery.layout.FilteredBooleanQuery(); -query.score( localView ); +// mandatory matches +query.score( phrase_first_only, 'must' ); +query.score( ngrams_last_only, 'must' ); -var simpleNgramsView = function( vs ){ +// address components +query.score( peliasQuery.view.address('housenumber') ); +query.score( peliasQuery.view.address('street') ); +query.score( peliasQuery.view.address('postcode') ); - var view = ngrams( vs ); +// admin components +query.score( peliasQuery.view.admin('alpha3') ); +query.score( peliasQuery.view.admin('admin0') ); +query.score( peliasQuery.view.admin('admin1') ); +query.score( peliasQuery.view.admin('admin1_abbr') ); +query.score( peliasQuery.view.admin('admin2') ); +query.score( peliasQuery.view.admin('local_admin') ); +query.score( peliasQuery.view.admin('locality') ); +query.score( peliasQuery.view.admin('neighborhood') ); - delete view.match['name.default'].type; - delete view.match['name.default'].boost; +// scoring boost +// query.score( phrase ); - // console.log( JSON.stringify( view, null, 2 ) ); - return view; -}; +// console.log( focus ); +query.score( localView ); query.score( peliasQuery.view.popularity( simpleNgramsView ) ); query.score( peliasQuery.view.population( simpleNgramsView ) ); diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index f30acc7a..c19c932a 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -30,7 +30,7 @@ module.exports = extend( false, peliasQuery.defaults, { 'phrase:slop': 2, 'focus:function': 'linear', - 'focus:offset': '100km', + 'focus:offset': '10km', 'focus:scale': '250km', 'focus:decay': 0.5, 'focus:weight': 4, From 68921e7d9499af3493b557e2c7c1967b9f823b2f Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 17 Dec 2015 07:30:48 +0100 Subject: [PATCH 10/18] more tweaks --- query/autocomplete_defaults.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index c19c932a..7bc82344 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -33,7 +33,7 @@ module.exports = extend( false, peliasQuery.defaults, { 'focus:offset': '10km', 'focus:scale': '250km', 'focus:decay': 0.5, - 'focus:weight': 4, + 'focus:weight': 5, 'function_score:score_mode': 'avg', 'function_score:boost_mode': 'multiply', From 22fa0d04583d769b4770f5b15c0b2f9b0b1e7c9e Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 17 Dec 2015 07:57:20 +0100 Subject: [PATCH 11/18] hack hack hack --- query/autocomplete.js | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index 4cf9f15c..4c850aa7 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -14,6 +14,11 @@ var ngrams = function( vs ){ var ngrams_last_only = function( vs ){ + // hack to disable ngrams when query parsing enabled + if( vs.var('parsed_text').get() ){ + return null; + } + var name = vs.var('input:name').get(); var vs2 = new peliasQuery.Vars( vs.export() ); @@ -34,18 +39,23 @@ var phrase = function( vs ){ var phrase_first_only = function( vs ){ - var name = vs.var('input:name').get(); - var s = name.split(' '); + // hack to disable substr when query parsing enabled + if( !vs.var('parsed_text').get() ){ - // single token only, abort - if( s.length < 2 ){ - return function(){ return null; }; - } + var name = vs.var('input:name').get(); + var s = name.split(' '); - var vs2 = new peliasQuery.Vars( vs.export() ); - vs2.var('input:name').set( name.substr(0, name.lastIndexOf(' ') ) ); + // single token only, abort + if( s.length < 2 ){ + return null; + } - return phrase( vs2 ); + var vs2 = new peliasQuery.Vars( vs.export() ); + vs2.var('input:name').set( name.substr(0, name.lastIndexOf(' ') ) ); + return phrase( vs2 ); + } + + return phrase( vs ); }; var simpleNgramsView = function( vs ){ @@ -123,12 +133,20 @@ query.score( peliasQuery.view.population( simpleNgramsView ) ); function generateQuery( clean ){ var vs = new peliasQuery.Vars( defaults ); + vs.var( 'parsed_text', false ); // remove single grams at end - clean.text = clean.text.replace(/( .$)/g,'').trim(); + var text = clean.text.replace(/( .$)/g,'').trim(); + + if( clean.hasOwnProperty('parsed_text') ){ + if( clean.parsed_text.hasOwnProperty('name') ){ + vs.var( 'parsed_text', true ); + text = clean.parsed_text.name; + } + } // input text - vs.var( 'input:name', clean.text ); + vs.var( 'input:name', text ); // always 10 (not user definable due to caching) vs.var( 'size', 10 ); From c0385fcdef7689483f9ecb5e505b050167cf4084 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 17 Dec 2015 08:04:00 +0100 Subject: [PATCH 12/18] massive boost to admin (when specified) --- query/autocomplete_defaults.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index 7bc82344..acf341b1 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -48,39 +48,39 @@ module.exports = extend( false, peliasQuery.defaults, { 'address:postcode:analyzer': 'peliasZip', 'address:postcode:field': 'address.zip', - 'address:postcode:boost': 20, + 'address:postcode:boost': 2000, 'admin:alpha3:analyzer': 'standard', 'admin:alpha3:field': 'alpha3', - 'admin:alpha3:boost': 50, + 'admin:alpha3:boost': 1000, 'admin:admin0:analyzer': 'peliasAdmin', 'admin:admin0:field': 'admin0', - 'admin:admin0:boost': 40, + 'admin:admin0:boost': 800, 'admin:admin1:analyzer': 'peliasAdmin', 'admin:admin1:field': 'admin1', - 'admin:admin1:boost': 30, + 'admin:admin1:boost': 600, 'admin:admin1_abbr:analyzer': 'peliasAdmin', 'admin:admin1_abbr:field': 'admin1_abbr', - 'admin:admin1_abbr:boost': 30, + 'admin:admin1_abbr:boost': 600, 'admin:admin2:analyzer': 'peliasAdmin', 'admin:admin2:field': 'admin2', - 'admin:admin2:boost': 20, + 'admin:admin2:boost': 400, 'admin:local_admin:analyzer': 'peliasAdmin', 'admin:local_admin:field': 'local_admin', - 'admin:local_admin:boost': 10, + 'admin:local_admin:boost': 200, 'admin:locality:analyzer': 'peliasAdmin', 'admin:locality:field': 'locality', - 'admin:locality:boost': 10, + 'admin:locality:boost': 200, 'admin:neighborhood:analyzer': 'peliasAdmin', 'admin:neighborhood:field': 'neighborhood', - 'admin:neighborhood:boost': 10, + 'admin:neighborhood:boost': 200, 'popularity:field': 'popularity', 'popularity:modifier': 'log1p', From 3236d97d4ab6c3312c86890090f91789a74563c8 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 17 Dec 2015 08:29:53 +0100 Subject: [PATCH 13/18] switch back to using standard analyzer for ngrams must condition --- query/autocomplete.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index 4c850aa7..8429083c 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -25,7 +25,7 @@ var ngrams_last_only = function( vs ){ vs2.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) ); var view = ngrams( vs2 ); - view.match['name.default'].analyzer = 'peliasOneEdgeGram'; + view.match['name.default'].analyzer = 'standard'; return view; }; From 9ceb8f37d2c4e3b0f377d431fc8ba8be20cfb645 Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 22 Dec 2015 14:39:40 +0100 Subject: [PATCH 14/18] more tweaks --- query/autocomplete.js | 2 +- query/autocomplete_defaults.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/query/autocomplete.js b/query/autocomplete.js index 8429083c..363c74bf 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -25,7 +25,7 @@ var ngrams_last_only = function( vs ){ vs2.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) ); var view = ngrams( vs2 ); - view.match['name.default'].analyzer = 'standard'; + view.match['name.default'].analyzer = 'peliasPhrase'; return view; }; diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index acf341b1..f14e1441 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -20,7 +20,7 @@ module.exports = extend( false, peliasQuery.defaults, { 'boundary:rect:type': 'indexed', 'boundary:rect:_cache': true, - 'ngram:analyzer': 'standard', + 'ngram:analyzer': 'peliasPhrase', 'ngram:field': 'name.default', 'ngram:boost': 100, @@ -33,7 +33,7 @@ module.exports = extend( false, peliasQuery.defaults, { 'focus:offset': '10km', 'focus:scale': '250km', 'focus:decay': 0.5, - 'focus:weight': 5, + 'focus:weight': 3, 'function_score:score_mode': 'avg', 'function_score:boost_mode': 'multiply', From 3b4d566b11cdef01d3cdf47a43b1e042c9159f66 Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 22 Dec 2015 17:45:44 +0100 Subject: [PATCH 15/18] add unit test --- .../autocomplete_linguistic_final_token.js | 80 +++++++++ .../fixture/autocomplete_linguistic_focus.js | 64 +++++--- ...tocomplete_linguistic_focus_null_island.js | 64 +++++--- ...autocomplete_linguistic_multiple_tokens.js | 91 +++++++++++ .../fixture/autocomplete_linguistic_only.js | 32 ++-- .../autocomplete_linguistic_with_admin.js | 154 ++++++++++++++++++ test/unit/query/autocomplete.js | 44 +++++ 7 files changed, 457 insertions(+), 72 deletions(-) create mode 100644 test/unit/fixture/autocomplete_linguistic_final_token.js create mode 100644 test/unit/fixture/autocomplete_linguistic_multiple_tokens.js create mode 100644 test/unit/fixture/autocomplete_linguistic_with_admin.js diff --git a/test/unit/fixture/autocomplete_linguistic_final_token.js b/test/unit/fixture/autocomplete_linguistic_final_token.js new file mode 100644 index 00000000..ed7d43e4 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_final_token.js @@ -0,0 +1,80 @@ + +module.exports = { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'name.default': { + 'analyzer': 'peliasPhrase', + 'boost': 1, + 'query': 'one', + 'type': 'phrase', + 'operator': 'and' + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match': { + 'name.default': { + 'analyzer': 'peliasPhrase', + 'query': 'one', + 'operator': 'and' + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'filter': { + 'exists': { + 'field': 'popularity' + } + }, + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity' + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match': { + 'name.default': { + 'analyzer': 'peliasPhrase', + 'query': 'one', + 'operator': 'and' + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'filter': { + 'exists': { + 'field': 'population' + } + }, + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population' + }, + 'weight': 2 + }] + } + }] + } + } + } + }, + 'sort': [ '_score' ], + 'size': 10, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_focus.js b/test/unit/fixture/autocomplete_linguistic_focus.js index 8d0e9914..28e5e0db 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus.js +++ b/test/unit/fixture/autocomplete_linguistic_focus.js @@ -7,30 +7,24 @@ module.exports = { 'must': [{ 'match': { 'name.default': { - 'query': 'test', + 'analyzer': 'peliasPhrase', 'boost': 1, - 'analyzer': 'peliasOneEdgeGram' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { 'query': 'test', - 'analyzer': 'peliasPhrase', 'type': 'phrase', - 'boost': 1, - 'slop': 2 + 'operator': 'and' } } - }, { + }], + 'should': [{ 'function_score': { 'query': { 'match': { 'name.default': { - 'analyzer': 'peliasOneEdgeGram', + 'analyzer': 'peliasPhrase', 'boost': 1, - 'query': 'test' + 'query': 'test', + 'type': 'phrase', + 'operator': 'and' } } }, @@ -49,18 +43,40 @@ module.exports = { 'weight': 2 }], 'score_mode': 'avg', - 'boost_mode': 'replace' + 'boost_mode': 'replace', + 'filter': { + 'or': [ + { + 'type': { + 'value': 'osmnode' + } + }, + { + 'type': { + 'value': 'osmway' + } + }, + { + 'type': { + 'value': 'osmaddress' + } + }, + { + 'type': { + 'value': 'openaddresses' + } + } + ] + } } },{ 'function_score': { 'query': { 'match': { - 'phrase.default': { - 'query': 'test', + 'name.default': { 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 + 'query': 'test', + 'operator': 'and' } } }, @@ -84,12 +100,10 @@ module.exports = { 'function_score': { 'query': { 'match': { - 'phrase.default': { - 'query': 'test', + 'name.default': { 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 + 'query': 'test', + 'operator': 'and' } } }, diff --git a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js index 8300b026..2fc02bfa 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js +++ b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js @@ -7,30 +7,24 @@ module.exports = { 'must': [{ 'match': { 'name.default': { - 'query': 'test', + 'analyzer': 'peliasPhrase', 'boost': 1, - 'analyzer': 'peliasOneEdgeGram' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { 'query': 'test', - 'analyzer': 'peliasPhrase', 'type': 'phrase', - 'boost': 1, - 'slop': 2 + 'operator': 'and' } } - }, { + }], + 'should': [{ 'function_score': { 'query': { 'match': { 'name.default': { - 'analyzer': 'peliasOneEdgeGram', + 'analyzer': 'peliasPhrase', 'boost': 1, - 'query': 'test' + 'query': 'test', + 'type': 'phrase', + 'operator': 'and' } } }, @@ -49,18 +43,40 @@ module.exports = { 'weight': 2 }], 'score_mode': 'avg', - 'boost_mode': 'replace' + 'boost_mode': 'replace', + 'filter': { + 'or': [ + { + 'type': { + 'value': 'osmnode' + } + }, + { + 'type': { + 'value': 'osmway' + } + }, + { + 'type': { + 'value': 'osmaddress' + } + }, + { + 'type': { + 'value': 'openaddresses' + } + } + ] + } } },{ 'function_score': { 'query': { 'match': { - 'phrase.default': { - 'query': 'test', + 'name.default': { 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 + 'query': 'test', + 'operator': 'and' } } }, @@ -84,12 +100,10 @@ module.exports = { 'function_score': { 'query': { 'match': { - 'phrase.default': { - 'query': 'test', + 'name.default': { 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 + 'query': 'test', + 'operator': 'and' } } }, diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js new file mode 100644 index 00000000..c4b4b10e --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js @@ -0,0 +1,91 @@ + +module.exports = { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2, + 'query': 'one two' + } + } + }, + { + 'match': { + 'name.default': { + 'analyzer': 'peliasPhrase', + 'boost': 1, + 'query': 'three', + 'type': 'phrase', + 'operator': 'and' + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match': { + 'name.default': { + 'analyzer': 'peliasPhrase', + 'query': 'one two three', + 'operator': 'and' + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'filter': { + 'exists': { + 'field': 'popularity' + } + }, + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity' + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match': { + 'name.default': { + 'analyzer': 'peliasPhrase', + 'query': 'one two three', + 'operator': 'and' + } + } + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'filter': { + 'exists': { + 'field': 'population' + } + }, + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population' + }, + 'weight': 2 + }] + } + }] + } + } + } + }, + 'sort': [ '_score' ], + 'size': 10, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_only.js b/test/unit/fixture/autocomplete_linguistic_only.js index 53ece28e..f8b8fa2d 100644 --- a/test/unit/fixture/autocomplete_linguistic_only.js +++ b/test/unit/fixture/autocomplete_linguistic_only.js @@ -7,32 +7,22 @@ module.exports = { 'must': [{ 'match': { 'name.default': { - 'query': 'test', + 'analyzer': 'peliasPhrase', 'boost': 1, - 'analyzer': 'peliasOneEdgeGram' - } - } - }], - 'should': [{ - 'match': { - 'phrase.default': { 'query': 'test', - 'analyzer': 'peliasPhrase', 'type': 'phrase', - 'boost': 1, - 'slop': 2 + 'operator': 'and' } } - },{ + }], + 'should':[{ 'function_score': { 'query': { 'match': { - 'phrase.default': { - 'query': 'test', + 'name.default': { 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 + 'query': 'test', + 'operator': 'and' } } }, @@ -56,12 +46,10 @@ module.exports = { 'function_score': { 'query': { 'match': { - 'phrase.default': { - 'query': 'test', + 'name.default': { 'analyzer': 'peliasPhrase', - 'type': 'phrase', - 'slop': 2, - 'boost': 1 + 'query': 'test', + 'operator': 'and' } } }, diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js new file mode 100644 index 00000000..ebfa4c84 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js @@ -0,0 +1,154 @@ + +module.exports = { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'must': [ + { + 'match': { + 'phrase.default': { + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2, + 'query': 'one two' + } + } + } + ], + 'should': [ + { + 'match': { + 'admin0': { + 'analyzer': 'peliasAdmin', + 'boost': 4, + 'query': 'three' + } + } + }, + { + 'match': { + 'admin1': { + 'analyzer': 'peliasAdmin', + 'boost': 3, + 'query': 'three' + } + } + }, + { + 'match': { + 'admin1_abbr': { + 'analyzer': 'peliasAdmin', + 'boost': 3, + 'query': 'three' + } + } + }, + { + 'match': { + 'admin2': { + 'analyzer': 'peliasAdmin', + 'boost': 2, + 'query': 'three' + } + } + }, + { + 'match': { + 'local_admin': { + 'analyzer': 'peliasAdmin', + 'boost': 1, + 'query': 'three' + } + } + }, + { + 'match': { + 'locality': { + 'analyzer': 'peliasAdmin', + 'boost': 1, + 'query': 'three' + } + } + }, + { + 'match': { + 'neighborhood': { + 'analyzer': 'peliasAdmin', + 'boost': 1, + 'query': 'three' + } + } + }, + { + 'function_score': { + 'query': { + 'match': { + 'name.default': { + 'analyzer': 'peliasPhrase', + 'query': 'one two', + 'operator': 'and' + } + } + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity' + }, + 'weight': 1 + } + ], + 'score_mode': 'first', + 'boost_mode': 'replace', + 'filter': { + 'exists': { + 'field': 'popularity' + } + } + } + }, + { + 'function_score': { + 'query': { + 'match': { + 'name.default': { + 'analyzer': 'peliasPhrase', + 'query': 'one two', + 'operator': 'and' + } + } + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population' + }, + 'weight': 2 + } + ], + 'score_mode': 'first', + 'boost_mode': 'replace', + 'filter': { + 'exists': { + 'field': 'population' + } + } + } + } + ] + } + } + } + }, + 'size': 10, + 'track_scores': true, + 'sort': [ + '_score' + ] +}; diff --git a/test/unit/query/autocomplete.js b/test/unit/query/autocomplete.js index c3f620c0..dc973ddc 100644 --- a/test/unit/query/autocomplete.js +++ b/test/unit/query/autocomplete.js @@ -24,6 +24,50 @@ module.exports.tests.query = function(test, common) { t.end(); }); + test('valid lingustic autocomplete with 3 tokens', function(t) { + var query = generate({ + text: 'one two three' + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_multiple_tokens.js'); + + t.deepEqual(compiled, expected, 'valid autocomplete query'); + t.end(); + }); + + test('valid lingustic autocomplete with comma delimited admin section', function(t) { + var query = generate({ + text: 'one two, three', + parsed_text: { + name: 'one two', + regions: [ 'one two', 'three' ], + admin_parts: 'three' + } + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_with_admin.js'); + + t.deepEqual(compiled, expected, 'valid autocomplete query'); + t.end(); + }); + + // if the final token is less than 2 chars we need to remove it from the string. + // note: this behaviour is tied to having a min_gram size of 2. + // note: if 1 grams are enabled at a later date, remove this behaviour. + test('valid lingustic autocomplete final token', function(t) { + var query = generate({ + text: 'one t' + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_final_token.js'); + + t.deepEqual(compiled, expected, 'valid autocomplete query'); + t.end(); + }); + test('autocomplete + focus', function(t) { var query = generate({ text: 'test', From 6711aa45e0d92252078fb98b63cd21224ce96563 Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 22 Dec 2015 18:31:15 +0100 Subject: [PATCH 16/18] simplify pass 1 --- query/autocomplete.js | 45 ++++--------------- query/view/ngrams_strict.js | 19 ++++++++ .../autocomplete_linguistic_final_token.js | 4 ++ .../fixture/autocomplete_linguistic_focus.js | 4 ++ ...tocomplete_linguistic_focus_null_island.js | 4 ++ ...autocomplete_linguistic_multiple_tokens.js | 4 ++ .../fixture/autocomplete_linguistic_only.js | 4 ++ .../autocomplete_linguistic_with_admin.js | 4 ++ 8 files changed, 52 insertions(+), 36 deletions(-) create mode 100644 query/view/ngrams_strict.js diff --git a/query/autocomplete.js b/query/autocomplete.js index bfc2f60c..354504c7 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -4,13 +4,9 @@ var peliasQuery = require('pelias-query'), textParser = require('./text_parser'), check = require('check-types'); -var ngrams = function( vs ){ - var view = peliasQuery.view.ngrams( vs ); - view.match['name.default'].type = 'phrase'; - view.match['name.default'].operator = 'and'; - // console.log( JSON.stringify( view, null, 2 ) ); - return view; -}; +// additional views (these may be merged in to pelias/query at a later date) +var views = {}; +views.ngrams_strict = require('./view/ngrams_strict'); var ngrams_last_only = function( vs ){ @@ -24,19 +20,12 @@ var ngrams_last_only = function( vs ){ var vs2 = new peliasQuery.Vars( vs.export() ); vs2.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) ); - var view = ngrams( vs2 ); + var view = views.ngrams_strict( vs2 ); view.match['name.default'].analyzer = 'peliasPhrase'; return view; }; -var phrase = function( vs ){ - var view = peliasQuery.view.phrase( vs ); - view.match['phrase.default'].type = 'phrase'; - // console.log( JSON.stringify( view, null, 2 ) ); - return view; -}; - var phrase_first_only = function( vs ){ // hack to disable substr when query parsing enabled @@ -52,31 +41,18 @@ var phrase_first_only = function( vs ){ var vs2 = new peliasQuery.Vars( vs.export() ); vs2.var('input:name').set( name.substr(0, name.lastIndexOf(' ') ) ); - return phrase( vs2 ); + return peliasQuery.view.phrase( vs2 ); } - return phrase( vs ); -}; - -var simpleNgramsView = function( vs ){ - - var view = ngrams( vs ); - - view.match['name.default'].analyzer = 'peliasPhrase'; - delete view.match['name.default'].type; - delete view.match['name.default'].boost; - - // console.log( JSON.stringify( view, null, 2 ) ); - return view; + return peliasQuery.view.phrase( vs ); }; -var focus = peliasQuery.view.focus( ngrams ); +var focus = peliasQuery.view.focus( views.ngrams_strict ); var localView = function( vs ){ var view = focus( vs ); if( view && view.hasOwnProperty('function_score') ){ - view.function_score.query.match['name.default'].analyzer = 'peliasPhrase'; view.function_score.filter = { 'or': [ { 'type': { 'value': 'osmnode' } }, @@ -116,13 +92,10 @@ query.score( peliasQuery.view.admin('locality') ); query.score( peliasQuery.view.admin('neighborhood') ); // scoring boost -// query.score( phrase ); - -// console.log( focus ); query.score( localView ); -query.score( peliasQuery.view.popularity( simpleNgramsView ) ); -query.score( peliasQuery.view.population( simpleNgramsView ) ); +query.score( peliasQuery.view.popularity( views.ngrams_strict ) ); +query.score( peliasQuery.view.population( views.ngrams_strict ) ); // -------------------------------- diff --git a/query/view/ngrams_strict.js b/query/view/ngrams_strict.js new file mode 100644 index 00000000..1f90ebe6 --- /dev/null +++ b/query/view/ngrams_strict.js @@ -0,0 +1,19 @@ + +var peliasQuery = require('pelias-query'); + +/** + Ngrams view with the additional properties to enable: + type:phrase -> tokens MUST appear in the same order in BOTH query and index + operator:and -> ALL tokens are mandatory, missing any single token will cause + a query failure. +**/ + +module.exports = function( vs ){ + + var view = peliasQuery.view.ngrams( vs ); + + view.match['name.default'].type = 'phrase'; + view.match['name.default'].operator = 'and'; + + return view; +}; diff --git a/test/unit/fixture/autocomplete_linguistic_final_token.js b/test/unit/fixture/autocomplete_linguistic_final_token.js index 8ce3329f..e63c84b6 100644 --- a/test/unit/fixture/autocomplete_linguistic_final_token.js +++ b/test/unit/fixture/autocomplete_linguistic_final_token.js @@ -21,7 +21,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'one', + 'type': 'phrase', 'operator': 'and' } } @@ -48,7 +50,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'one', + 'type': 'phrase', 'operator': 'and' } } diff --git a/test/unit/fixture/autocomplete_linguistic_focus.js b/test/unit/fixture/autocomplete_linguistic_focus.js index ba8e4f6b..7f3c7174 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus.js +++ b/test/unit/fixture/autocomplete_linguistic_focus.js @@ -75,7 +75,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'test', + 'type': 'phrase', 'operator': 'and' } } @@ -102,7 +104,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'test', + 'type': 'phrase', 'operator': 'and' } } diff --git a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js index c6f751be..46554715 100644 --- a/test/unit/fixture/autocomplete_linguistic_focus_null_island.js +++ b/test/unit/fixture/autocomplete_linguistic_focus_null_island.js @@ -75,7 +75,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'test', + 'type': 'phrase', 'operator': 'and' } } @@ -102,7 +104,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'test', + 'type': 'phrase', 'operator': 'and' } } diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js index 040f116e..dab5e31a 100644 --- a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js +++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js @@ -32,7 +32,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'one two three', + 'type': 'phrase', 'operator': 'and' } } @@ -59,7 +61,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'one two three', + 'type': 'phrase', 'operator': 'and' } } diff --git a/test/unit/fixture/autocomplete_linguistic_only.js b/test/unit/fixture/autocomplete_linguistic_only.js index d3bf88c4..2ec025d4 100644 --- a/test/unit/fixture/autocomplete_linguistic_only.js +++ b/test/unit/fixture/autocomplete_linguistic_only.js @@ -21,7 +21,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'test', + 'type': 'phrase', 'operator': 'and' } } @@ -48,7 +50,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'test', + 'type': 'phrase', 'operator': 'and' } } diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js index fd815896..215a5fda 100644 --- a/test/unit/fixture/autocomplete_linguistic_with_admin.js +++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js @@ -87,7 +87,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'one two', + 'type': 'phrase', 'operator': 'and' } } @@ -117,7 +119,9 @@ module.exports = { 'match': { 'name.default': { 'analyzer': 'peliasPhrase', + 'boost': 100, 'query': 'one two', + 'type': 'phrase', 'operator': 'and' } } From 8474dede206d1e68e33d11ffb6ab2f1d9a828698 Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 22 Dec 2015 18:42:38 +0100 Subject: [PATCH 17/18] simplify pass 2 --- query/autocomplete.js | 29 +++++------------------- query/view/focus_selected_layers.js | 35 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 24 deletions(-) create mode 100644 query/view/focus_selected_layers.js diff --git a/query/autocomplete.js b/query/autocomplete.js index 354504c7..416b255b 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -5,8 +5,10 @@ var peliasQuery = require('pelias-query'), check = require('check-types'); // additional views (these may be merged in to pelias/query at a later date) -var views = {}; -views.ngrams_strict = require('./view/ngrams_strict'); +var views = { + ngrams_strict: require('./view/ngrams_strict'), + focus_selected_layers: require('./view/focus_selected_layers') +}; var ngrams_last_only = function( vs ){ @@ -47,26 +49,6 @@ var phrase_first_only = function( vs ){ return peliasQuery.view.phrase( vs ); }; -var focus = peliasQuery.view.focus( views.ngrams_strict ); -var localView = function( vs ){ - - var view = focus( vs ); - - if( view && view.hasOwnProperty('function_score') ){ - view.function_score.filter = { - 'or': [ - { 'type': { 'value': 'osmnode' } }, - { 'type': { 'value': 'osmway' } }, - { 'type': { 'value': 'osmaddress' } }, - { 'type': { 'value': 'openaddresses' } } - ] - }; - } - - // console.log( JSON.stringify( view, null, 2 ) ); - return view; -}; - //------------------------------ // autocomplete query //------------------------------ @@ -92,8 +74,7 @@ query.score( peliasQuery.view.admin('locality') ); query.score( peliasQuery.view.admin('neighborhood') ); // scoring boost -query.score( localView ); - +query.score( views.focus_selected_layers( views.ngrams_strict ) ); query.score( peliasQuery.view.popularity( views.ngrams_strict ) ); query.score( peliasQuery.view.population( views.ngrams_strict ) ); diff --git a/query/view/focus_selected_layers.js b/query/view/focus_selected_layers.js new file mode 100644 index 00000000..038d7ffa --- /dev/null +++ b/query/view/focus_selected_layers.js @@ -0,0 +1,35 @@ + +var peliasQuery = require('pelias-query'); + +/** + This view is the same as `peliasQuery.view.focus` with one exception: + + if the view is generated successfully, we add a 'filter' clause which + restricts the targeted '_type' to be in the list specified below. + + documents which are not in the '_type' list below will simply score 0 for + this section of the query. +**/ + +module.exports = function( subview ){ + return function( vs ){ + + if( !subview ){ return null; } // subview validation failed + var macroView = peliasQuery.view.focus( subview ); + if( !macroView ){ return null; } // macroView validation failed + var view = macroView( vs ); + + if( view && view.hasOwnProperty('function_score') ){ + view.function_score.filter = { + 'or': [ + { 'type': { 'value': 'osmnode' } }, + { 'type': { 'value': 'osmway' } }, + { 'type': { 'value': 'osmaddress' } }, + { 'type': { 'value': 'openaddresses' } } + ] + }; + } + + return view; + }; +}; From 8a213b70bb482c623e72853f107a527e158fc5d3 Mon Sep 17 00:00:00 2001 From: missinglink Date: Tue, 22 Dec 2015 19:19:47 +0100 Subject: [PATCH 18/18] simplify pass 3 --- query/autocomplete.js | 73 ++++++++------------------ query/view/ngrams_last_token_only.js | 37 +++++++++++++ query/view/phrase_first_tokens_only.js | 44 ++++++++++++++++ 3 files changed, 104 insertions(+), 50 deletions(-) create mode 100644 query/view/ngrams_last_token_only.js create mode 100644 query/view/phrase_first_tokens_only.js diff --git a/query/autocomplete.js b/query/autocomplete.js index 416b255b..bf5f76aa 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -6,47 +6,10 @@ var peliasQuery = require('pelias-query'), // additional views (these may be merged in to pelias/query at a later date) var views = { - ngrams_strict: require('./view/ngrams_strict'), - focus_selected_layers: require('./view/focus_selected_layers') -}; - -var ngrams_last_only = function( vs ){ - - // hack to disable ngrams when query parsing enabled - if( vs.var('parsed_text').get() ){ - return null; - } - - var name = vs.var('input:name').get(); - - var vs2 = new peliasQuery.Vars( vs.export() ); - vs2.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) ); - - var view = views.ngrams_strict( vs2 ); - view.match['name.default'].analyzer = 'peliasPhrase'; - - return view; -}; - -var phrase_first_only = function( vs ){ - - // hack to disable substr when query parsing enabled - if( !vs.var('parsed_text').get() ){ - - var name = vs.var('input:name').get(); - var s = name.split(' '); - - // single token only, abort - if( s.length < 2 ){ - return null; - } - - var vs2 = new peliasQuery.Vars( vs.export() ); - vs2.var('input:name').set( name.substr(0, name.lastIndexOf(' ') ) ); - return peliasQuery.view.phrase( vs2 ); - } - - return peliasQuery.view.phrase( vs ); + ngrams_strict: require('./view/ngrams_strict'), + focus_selected_layers: require('./view/focus_selected_layers'), + ngrams_last_token_only: require('./view/ngrams_last_token_only'), + phrase_first_tokens_only: require('./view/phrase_first_tokens_only') }; //------------------------------ @@ -55,8 +18,8 @@ var phrase_first_only = function( vs ){ var query = new peliasQuery.layout.FilteredBooleanQuery(); // mandatory matches -query.score( phrase_first_only, 'must' ); -query.score( ngrams_last_only, 'must' ); +query.score( views.phrase_first_tokens_only, 'must' ); +query.score( views.ngrams_last_token_only, 'must' ); // address components query.score( peliasQuery.view.address('housenumber') ); @@ -87,16 +50,26 @@ query.score( peliasQuery.view.population( views.ngrams_strict ) ); function generateQuery( clean ){ var vs = new peliasQuery.Vars( defaults ); - vs.var( 'parsed_text', false ); - // remove single grams at end + // mark the name as incomplete (user has not yet typed a comma) + vs.var( 'input:name:isComplete', false ); + + // perform some operations on 'clean.text': + // 1. if there is a space followed by a single char, remove them. + // - this is required as the index uses 2grams and sending 1grams + // - to a 2gram index when using 'type:phrase' or 'operator:and' will + // - result in a complete failure of the query. + // 2. trim leading and trailing whitespace. var text = clean.text.replace(/( .$)/g,'').trim(); - if( clean.hasOwnProperty('parsed_text') ){ - if( clean.parsed_text.hasOwnProperty('name') ){ - vs.var( 'parsed_text', true ); - text = clean.parsed_text.name; - } + // if the input parser has run and suggested a 'parsed_text.name' to use. + if( clean.hasOwnProperty('parsed_text') && clean.parsed_text.hasOwnProperty('name') ){ + + // mark the name as complete (user has already typed a comma) + vs.var( 'input:name:isComplete', true ); + + // use 'parsed_text.name' instead of 'clean.text'. + text = clean.parsed_text.name; } // input text diff --git a/query/view/ngrams_last_token_only.js b/query/view/ngrams_last_token_only.js new file mode 100644 index 00000000..3e3315f7 --- /dev/null +++ b/query/view/ngrams_last_token_only.js @@ -0,0 +1,37 @@ + +var peliasQuery = require('pelias-query'), + ngrams_strict = require('./ngrams_strict'); + +/** + Ngrams view which trims the 'input:name' and only uses the LAST TOKEN. + + eg. if the input was "100 foo str", then 'input:name' would only be 'str' + note: it is assumed that the rest of the input is matched using another view. + + there is an additional flag 'input:name:isComplete' used to disable this view + selectively, see that section for more info. + + code notes: this view makes a copy of the $vs object in order to change their + values without mutating the original values, which may be expected in their + unaltered form by other views. +**/ + +module.exports = function( vs ){ + + // Totally disable this view when bool value 'input:name:isComplete' is true. + // This is the case when the user has typed a comma, so we can assume + // that the 'name' part of the query is now complete. + if( vs.var('input:name:isComplete').get() ){ return null; } + + // make a copy Vars so we don't mutate the original + var vsCopy = new peliasQuery.Vars( vs.export() ); + + // get the input 'name' variable + var name = vs.var('input:name').get(); + + // set the 'name' variable in the copy to only the last token + vsCopy.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) ); + + // return the view rendered using the copy + return ngrams_strict( vsCopy ); +}; diff --git a/query/view/phrase_first_tokens_only.js b/query/view/phrase_first_tokens_only.js new file mode 100644 index 00000000..b047b30f --- /dev/null +++ b/query/view/phrase_first_tokens_only.js @@ -0,0 +1,44 @@ + +var peliasQuery = require('pelias-query'); + +/** + Phrase view which trims the 'input:name' and uses ALL BUT the last token. + + eg. if the input was "100 foo str", then 'input:name' would only be '100 foo' + note: it is assumed that the rest of the input is matched using another view. + + there is an additional flag 'input:name:isComplete' used to disable this view + selectively, see that section for more info. + + code notes: this view makes a copy of the $vs object in order to change their + values without mutating the original values, which may be expected in their + unaltered form by other views. +**/ + +module.exports = function( vs ){ + + // Don't mutate the name variable when 'input:name:isComplete' is true. + // This is the case when the user has typed a comma, so we can assume + // that the 'name' part of the query is now complete. + if( vs.var('input:name:isComplete').get() ){ + // return the view rendered using the original vars + return peliasQuery.view.phrase( vs ); + } + + // make a copy Vars so we don't mutate the original + var vsCopy = new peliasQuery.Vars( vs.export() ); + + // get the input 'name' variable and split in to tokens + var name = vs.var('input:name').get(), + tokens = name.split(' '); + + // single token only, abort (we don't want the *last* token) + // return null here will completely disable the view. + if( tokens.length < 2 ){ return null; } + + // set the 'name' variable in the copy to all but the last token + vsCopy.var('input:name').set( name.substr( 0, name.lastIndexOf(' ') ) ); + + // return the view rendered using the copy + return peliasQuery.view.phrase( vsCopy ); +};