Browse Source

Merge pull request #392 from pelias/master

Merge master into production
pull/745/head
Julian Simioni 9 years ago
parent
commit
bf14997734
  1. 45
      query/autocomplete.js
  2. 30
      query/autocomplete_defaults.js
  3. 35
      query/view/focus_selected_layers.js
  4. 37
      query/view/ngrams_last_token_only.js
  5. 19
      query/view/ngrams_strict.js
  6. 44
      query/view/phrase_first_tokens_only.js
  7. 84
      test/unit/fixture/autocomplete_linguistic_final_token.js
  8. 75
      test/unit/fixture/autocomplete_linguistic_focus.js
  9. 75
      test/unit/fixture/autocomplete_linguistic_focus_null_island.js
  10. 95
      test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
  11. 37
      test/unit/fixture/autocomplete_linguistic_only.js
  12. 158
      test/unit/fixture/autocomplete_linguistic_with_admin.js
  13. 44
      test/unit/query/autocomplete.js

45
query/autocomplete.js

@ -4,6 +4,13 @@ var peliasQuery = require('pelias-query'),
textParser = require('./text_parser'),
check = require('check-types');
// additional views (these may be merged in to pelias/query at a later date)
var views = {
ngrams_strict: require('./view/ngrams_strict'),
focus_selected_layers: require('./view/focus_selected_layers'),
ngrams_last_token_only: require('./view/ngrams_last_token_only'),
phrase_first_tokens_only: require('./view/phrase_first_tokens_only')
};
//------------------------------
// autocomplete query
@ -11,7 +18,13 @@ var peliasQuery = require('pelias-query'),
var query = new peliasQuery.layout.FilteredBooleanQuery();
// mandatory matches
query.score( peliasQuery.view.ngrams, 'must' );
query.score( views.phrase_first_tokens_only, 'must' );
query.score( views.ngrams_last_token_only, 'must' );
// address components
query.score( peliasQuery.view.address('housenumber') );
query.score( peliasQuery.view.address('street') );
query.score( peliasQuery.view.address('postcode') );
// admin components
query.score( peliasQuery.view.admin('alpha3') );
@ -24,10 +37,9 @@ query.score( peliasQuery.view.admin('locality') );
query.score( peliasQuery.view.admin('neighborhood') );
// scoring boost
query.score( peliasQuery.view.phrase );
query.score( peliasQuery.view.focus( peliasQuery.view.ngrams ) );
query.score( peliasQuery.view.popularity( peliasQuery.view.phrase ) );
query.score( peliasQuery.view.population( peliasQuery.view.phrase ) );
query.score( views.focus_selected_layers( views.ngrams_strict ) );
query.score( peliasQuery.view.popularity( views.ngrams_strict ) );
query.score( peliasQuery.view.population( views.ngrams_strict ) );
// --------------------------------
@ -39,8 +51,29 @@ function generateQuery( clean ){
var vs = new peliasQuery.Vars( defaults );
// mark the name as incomplete (user has not yet typed a comma)
vs.var( 'input:name:isComplete', false );
// perform some operations on 'clean.text':
// 1. if there is a space followed by a single char, remove them.
// - this is required as the index uses 2grams and sending 1grams
// - to a 2gram index when using 'type:phrase' or 'operator:and' will
// - result in a complete failure of the query.
// 2. trim leading and trailing whitespace.
var text = clean.text.replace(/( .$)/g,'').trim();
// if the input parser has run and suggested a 'parsed_text.name' to use.
if( clean.hasOwnProperty('parsed_text') && clean.parsed_text.hasOwnProperty('name') ){
// mark the name as complete (user has already typed a comma)
vs.var( 'input:name:isComplete', true );
// use 'parsed_text.name' instead of 'clean.text'.
text = clean.parsed_text.name;
}
// input text
vs.var( 'input:name', clean.text );
vs.var( 'input:name', text );
// focus point
if( check.number(clean['focus.point.lat']) &&

30
query/autocomplete_defaults.js

@ -20,9 +20,9 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true,
'ngram:analyzer': 'peliasOneEdgeGram',
'ngram:analyzer': 'peliasPhrase',
'ngram:field': 'name.default',
'ngram:boost': 1,
'ngram:boost': 100,
'phrase:analyzer': 'peliasPhrase',
'phrase:field': 'phrase.default',
@ -30,13 +30,13 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'phrase:slop': 2,
'focus:function': 'linear',
'focus:offset': '1km',
'focus:scale': '50km',
'focus:offset': '10km',
'focus:scale': '250km',
'focus:decay': 0.5,
'focus:weight': 2,
'focus:weight': 3,
'function_score:score_mode': 'avg',
'function_score:boost_mode': 'replace',
'function_score:boost_mode': 'multiply',
'address:housenumber:analyzer': 'peliasHousenumber',
'address:housenumber:field': 'address.number',
@ -48,39 +48,39 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'address:postcode:analyzer': 'peliasZip',
'address:postcode:field': 'address.zip',
'address:postcode:boost': 20,
'address:postcode:boost': 2000,
'admin:alpha3:analyzer': 'standard',
'admin:alpha3:field': 'alpha3',
'admin:alpha3:boost': 5,
'admin:alpha3:boost': 1000,
'admin:admin0:analyzer': 'peliasAdmin',
'admin:admin0:field': 'admin0',
'admin:admin0:boost': 4,
'admin:admin0:boost': 800,
'admin:admin1:analyzer': 'peliasAdmin',
'admin:admin1:field': 'admin1',
'admin:admin1:boost': 3,
'admin:admin1:boost': 600,
'admin:admin1_abbr:analyzer': 'peliasAdmin',
'admin:admin1_abbr:field': 'admin1_abbr',
'admin:admin1_abbr:boost': 3,
'admin:admin1_abbr:boost': 600,
'admin:admin2:analyzer': 'peliasAdmin',
'admin:admin2:field': 'admin2',
'admin:admin2:boost': 2,
'admin:admin2:boost': 400,
'admin:local_admin:analyzer': 'peliasAdmin',
'admin:local_admin:field': 'local_admin',
'admin:local_admin:boost': 1,
'admin:local_admin:boost': 200,
'admin:locality:analyzer': 'peliasAdmin',
'admin:locality:field': 'locality',
'admin:locality:boost': 1,
'admin:locality:boost': 200,
'admin:neighborhood:analyzer': 'peliasAdmin',
'admin:neighborhood:field': 'neighborhood',
'admin:neighborhood:boost': 1,
'admin:neighborhood:boost': 200,
'popularity:field': 'popularity',
'popularity:modifier': 'log1p',

35
query/view/focus_selected_layers.js

@ -0,0 +1,35 @@
var peliasQuery = require('pelias-query');
/**
This view is the same as `peliasQuery.view.focus` with one exception:
if the view is generated successfully, we add a 'filter' clause which
restricts the targeted '_type' to be in the list specified below.
documents which are not in the '_type' list below will simply score 0 for
this section of the query.
**/
module.exports = function( subview ){
return function( vs ){
if( !subview ){ return null; } // subview validation failed
var macroView = peliasQuery.view.focus( subview );
if( !macroView ){ return null; } // macroView validation failed
var view = macroView( vs );
if( view && view.hasOwnProperty('function_score') ){
view.function_score.filter = {
'or': [
{ 'type': { 'value': 'osmnode' } },
{ 'type': { 'value': 'osmway' } },
{ 'type': { 'value': 'osmaddress' } },
{ 'type': { 'value': 'openaddresses' } }
]
};
}
return view;
};
};

37
query/view/ngrams_last_token_only.js

@ -0,0 +1,37 @@
var peliasQuery = require('pelias-query'),
ngrams_strict = require('./ngrams_strict');
/**
Ngrams view which trims the 'input:name' and only uses the LAST TOKEN.
eg. if the input was "100 foo str", then 'input:name' would only be 'str'
note: it is assumed that the rest of the input is matched using another view.
there is an additional flag 'input:name:isComplete' used to disable this view
selectively, see that section for more info.
code notes: this view makes a copy of the $vs object in order to change their
values without mutating the original values, which may be expected in their
unaltered form by other views.
**/
module.exports = function( vs ){
// Totally disable this view when bool value 'input:name:isComplete' is true.
// This is the case when the user has typed a comma, so we can assume
// that the 'name' part of the query is now complete.
if( vs.var('input:name:isComplete').get() ){ return null; }
// make a copy Vars so we don't mutate the original
var vsCopy = new peliasQuery.Vars( vs.export() );
// get the input 'name' variable
var name = vs.var('input:name').get();
// set the 'name' variable in the copy to only the last token
vsCopy.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) );
// return the view rendered using the copy
return ngrams_strict( vsCopy );
};

19
query/view/ngrams_strict.js

@ -0,0 +1,19 @@
var peliasQuery = require('pelias-query');
/**
Ngrams view with the additional properties to enable:
type:phrase -> tokens MUST appear in the same order in BOTH query and index
operator:and -> ALL tokens are mandatory, missing any single token will cause
a query failure.
**/
module.exports = function( vs ){
var view = peliasQuery.view.ngrams( vs );
view.match['name.default'].type = 'phrase';
view.match['name.default'].operator = 'and';
return view;
};

44
query/view/phrase_first_tokens_only.js

@ -0,0 +1,44 @@
var peliasQuery = require('pelias-query');
/**
Phrase view which trims the 'input:name' and uses ALL BUT the last token.
eg. if the input was "100 foo str", then 'input:name' would only be '100 foo'
note: it is assumed that the rest of the input is matched using another view.
there is an additional flag 'input:name:isComplete' used to disable this view
selectively, see that section for more info.
code notes: this view makes a copy of the $vs object in order to change their
values without mutating the original values, which may be expected in their
unaltered form by other views.
**/
module.exports = function( vs ){
// Don't mutate the name variable when 'input:name:isComplete' is true.
// This is the case when the user has typed a comma, so we can assume
// that the 'name' part of the query is now complete.
if( vs.var('input:name:isComplete').get() ){
// return the view rendered using the original vars
return peliasQuery.view.phrase( vs );
}
// make a copy Vars so we don't mutate the original
var vsCopy = new peliasQuery.Vars( vs.export() );
// get the input 'name' variable and split in to tokens
var name = vs.var('input:name').get(),
tokens = name.split(' ');
// single token only, abort (we don't want the *last* token)
// return null here will completely disable the view.
if( tokens.length < 2 ){ return null; }
// set the 'name' variable in the copy to all but the last token
vsCopy.var('input:name').set( name.substr( 0, name.lastIndexOf(' ') ) );
// return the view rendered using the copy
return peliasQuery.view.phrase( vsCopy );
};

84
test/unit/fixture/autocomplete_linguistic_final_token.js

@ -0,0 +1,84 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'one',
'type': 'phrase',
'operator': 'and'
}
}
}],
'should':[{
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'one',
'type': 'phrase',
'operator': 'and'
}
}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'filter': {
'exists': {
'field': 'popularity'
}
},
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'popularity'
},
'weight': 1
}]
}
},{
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'one',
'type': 'phrase',
'operator': 'and'
}
}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'filter': {
'exists': {
'field': 'population'
}
},
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'population'
},
'weight': 2
}]
}
}]
}
}
}
},
'sort': [ '_score' ],
'size': 20,
'track_scores': true
};

75
test/unit/fixture/autocomplete_linguistic_focus.js

@ -1,4 +1,3 @@
var vs = require('../../../query/autocomplete_defaults');
module.exports = {
'query': {
@ -8,30 +7,24 @@ module.exports = {
'must': [{
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'boost': 1,
'analyzer': 'peliasOneEdgeGram'
'type': 'phrase',
'operator': 'and'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2
}
}
}, {
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasOneEdgeGram',
'boost': 1,
'query': 'test'
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'type': 'phrase',
'operator': 'and'
}
}
},
@ -42,26 +35,50 @@ module.exports = {
'lat': 29.49136,
'lon': -82.50622
},
'offset': '1km',
'scale': '50km',
'offset': '10km',
'scale': '250km',
'decay': 0.5
}
},
'weight': 2
'weight': 3
}],
'score_mode': 'avg',
'boost_mode': 'replace'
'boost_mode': 'multiply',
'filter': {
'or': [
{
'type': {
'value': 'osmnode'
}
},
{
'type': {
'value': 'osmway'
}
},
{
'type': {
'value': 'osmaddress'
}
},
{
'type': {
'value': 'openaddresses'
}
}
]
}
}
},{
'function_score': {
'query': {
'match': {
'phrase.default': {
'query': 'test',
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'type': 'phrase',
'slop': 2,
'boost': 1
'operator': 'and'
}
}
},
@ -85,12 +102,12 @@ module.exports = {
'function_score': {
'query': {
'match': {
'phrase.default': {
'query': 'test',
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'type': 'phrase',
'slop': 2,
'boost': 1
'operator': 'and'
}
}
},
@ -116,6 +133,6 @@ module.exports = {
}
},
'sort': [ '_score' ],
'size': vs.size,
'size': 20,
'track_scores': true
};

75
test/unit/fixture/autocomplete_linguistic_focus_null_island.js

@ -1,4 +1,3 @@
var vs = require('../../../query/autocomplete_defaults');
module.exports = {
'query': {
@ -8,30 +7,24 @@ module.exports = {
'must': [{
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'boost': 1,
'analyzer': 'peliasOneEdgeGram'
'type': 'phrase',
'operator': 'and'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2
}
}
}, {
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasOneEdgeGram',
'boost': 1,
'query': 'test'
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'type': 'phrase',
'operator': 'and'
}
}
},
@ -42,26 +35,50 @@ module.exports = {
'lat': 0,
'lon': 0
},
'offset': '1km',
'scale': '50km',
'offset': '10km',
'scale': '250km',
'decay': 0.5
}
},
'weight': 2
'weight': 3
}],
'score_mode': 'avg',
'boost_mode': 'replace'
'boost_mode': 'multiply',
'filter': {
'or': [
{
'type': {
'value': 'osmnode'
}
},
{
'type': {
'value': 'osmway'
}
},
{
'type': {
'value': 'osmaddress'
}
},
{
'type': {
'value': 'openaddresses'
}
}
]
}
}
},{
'function_score': {
'query': {
'match': {
'phrase.default': {
'query': 'test',
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'type': 'phrase',
'slop': 2,
'boost': 1
'operator': 'and'
}
}
},
@ -85,12 +102,12 @@ module.exports = {
'function_score': {
'query': {
'match': {
'phrase.default': {
'query': 'test',
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'type': 'phrase',
'slop': 2,
'boost': 1
'operator': 'and'
}
}
},
@ -116,6 +133,6 @@ module.exports = {
}
},
'sort': [ '_score' ],
'size': vs.size,
'size': 20,
'track_scores': true
};

95
test/unit/fixture/autocomplete_linguistic_multiple_tokens.js

@ -0,0 +1,95 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'phrase.default': {
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2,
'query': 'one two'
}
}
},
{
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'three',
'type': 'phrase',
'operator': 'and'
}
}
}],
'should':[{
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'one two three',
'type': 'phrase',
'operator': 'and'
}
}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'filter': {
'exists': {
'field': 'popularity'
}
},
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'popularity'
},
'weight': 1
}]
}
},{
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'one two three',
'type': 'phrase',
'operator': 'and'
}
}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'filter': {
'exists': {
'field': 'population'
}
},
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'population'
},
'weight': 2
}]
}
}]
}
}
}
},
'sort': [ '_score' ],
'size': 20,
'track_scores': true
};

37
test/unit/fixture/autocomplete_linguistic_only.js

@ -1,4 +1,3 @@
var vs = require('../../../query/autocomplete_defaults');
module.exports = {
'query': {
@ -8,32 +7,24 @@ module.exports = {
'must': [{
'match': {
'name.default': {
'query': 'test',
'boost': 1,
'analyzer': 'peliasOneEdgeGram'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': 'test',
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'type': 'phrase',
'boost': 1,
'slop': 2
'operator': 'and'
}
}
},{
}],
'should':[{
'function_score': {
'query': {
'match': {
'phrase.default': {
'query': 'test',
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'type': 'phrase',
'slop': 2,
'boost': 1
'operator': 'and'
}
}
},
@ -57,12 +48,12 @@ module.exports = {
'function_score': {
'query': {
'match': {
'phrase.default': {
'query': 'test',
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'test',
'type': 'phrase',
'slop': 2,
'boost': 1
'operator': 'and'
}
}
},
@ -88,6 +79,6 @@ module.exports = {
}
},
'sort': [ '_score' ],
'size': vs.size,
'size': 20,
'track_scores': true
};

158
test/unit/fixture/autocomplete_linguistic_with_admin.js

@ -0,0 +1,158 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [
{
'match': {
'phrase.default': {
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2,
'query': 'one two'
}
}
}
],
'should': [
{
'match': {
'admin0': {
'analyzer': 'peliasAdmin',
'boost': 800,
'query': 'three'
}
}
},
{
'match': {
'admin1': {
'analyzer': 'peliasAdmin',
'boost': 600,
'query': 'three'
}
}
},
{
'match': {
'admin1_abbr': {
'analyzer': 'peliasAdmin',
'boost': 600,
'query': 'three'
}
}
},
{
'match': {
'admin2': {
'analyzer': 'peliasAdmin',
'boost': 400,
'query': 'three'
}
}
},
{
'match': {
'local_admin': {
'analyzer': 'peliasAdmin',
'boost': 200,
'query': 'three'
}
}
},
{
'match': {
'locality': {
'analyzer': 'peliasAdmin',
'boost': 200,
'query': 'three'
}
}
},
{
'match': {
'neighborhood': {
'analyzer': 'peliasAdmin',
'boost': 200,
'query': 'three'
}
}
},
{
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'one two',
'type': 'phrase',
'operator': 'and'
}
}
},
'max_boost': 20,
'functions': [
{
'field_value_factor': {
'modifier': 'log1p',
'field': 'popularity'
},
'weight': 1
}
],
'score_mode': 'first',
'boost_mode': 'replace',
'filter': {
'exists': {
'field': 'popularity'
}
}
}
},
{
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'query': 'one two',
'type': 'phrase',
'operator': 'and'
}
}
},
'max_boost': 20,
'functions': [
{
'field_value_factor': {
'modifier': 'log1p',
'field': 'population'
},
'weight': 2
}
],
'score_mode': 'first',
'boost_mode': 'replace',
'filter': {
'exists': {
'field': 'population'
}
}
}
}
]
}
}
}
},
'size': 20,
'track_scores': true,
'sort': [
'_score'
]
};

44
test/unit/query/autocomplete.js

@ -24,6 +24,50 @@ module.exports.tests.query = function(test, common) {
t.end();
});
test('valid lingustic autocomplete with 3 tokens', function(t) {
var query = generate({
text: 'one two three'
});
var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_linguistic_multiple_tokens.js');
t.deepEqual(compiled, expected, 'valid autocomplete query');
t.end();
});
test('valid lingustic autocomplete with comma delimited admin section', function(t) {
var query = generate({
text: 'one two, three',
parsed_text: {
name: 'one two',
regions: [ 'one two', 'three' ],
admin_parts: 'three'
}
});
var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_linguistic_with_admin.js');
t.deepEqual(compiled, expected, 'valid autocomplete query');
t.end();
});
// if the final token is less than 2 chars we need to remove it from the string.
// note: this behaviour is tied to having a min_gram size of 2.
// note: if 1 grams are enabled at a later date, remove this behaviour.
test('valid lingustic autocomplete final token', function(t) {
var query = generate({
text: 'one t'
});
var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_linguistic_final_token.js');
t.deepEqual(compiled, expected, 'valid autocomplete query');
t.end();
});
test('autocomplete + focus', function(t) {
var query = generate({
text: 'test',

Loading…
Cancel
Save