Browse Source

Merge pull request #523 from pelias/master

Merge master into staging
pull/530/head
Julian Simioni 9 years ago
parent
commit
bfac84a1d3
  1. 27
      helper/labelGenerator.js
  2. 86
      helper/text_parser.js
  3. 3
      helper/type_mapping.js
  4. 4
      middleware/geocodeJSON.js
  5. 2
      middleware/options.js
  6. 8
      package.json
  7. 49
      query/autocomplete.js
  8. 14
      query/autocomplete_defaults.js
  9. 2
      query/reverse_defaults.js
  10. 1
      query/search.js
  11. 6
      query/search_defaults.js
  12. 13
      query/text_parser.js
  13. 40
      query/view/boost_exact_matches.js
  14. 17
      query/view/ngrams_last_token_only.js
  15. 25
      query/view/phrase_first_tokens_only.js
  16. 17
      query/view/pop_subquery.js
  17. 4
      sanitiser/_text.js
  18. 112
      sanitiser/_tokenizer.js
  19. 1
      sanitiser/autocomplete.js
  20. 1
      test/ciao/autocomplete/layers_alias_coarse.coffee
  21. 2
      test/ciao/autocomplete/layers_invalid.coffee
  22. 2
      test/ciao/autocomplete/layers_mix_invalid_valid.coffee
  23. 1
      test/ciao/reverse/layers_alias_coarse.coffee
  24. 2
      test/ciao/reverse/layers_invalid.coffee
  25. 2
      test/ciao/reverse/layers_mix_invalid_valid.coffee
  26. 1
      test/ciao/search/layers_alias_coarse.coffee
  27. 2
      test/ciao/search/layers_invalid.coffee
  28. 2
      test/ciao/search/layers_mix_invalid_valid.coffee
  29. 28
      test/unit/fixture/autocomplete_linguistic_final_token.js
  30. 16
      test/unit/fixture/autocomplete_linguistic_focus.js
  31. 16
      test/unit/fixture/autocomplete_linguistic_focus_null_island.js
  32. 32
      test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
  33. 12
      test/unit/fixture/autocomplete_linguistic_only.js
  34. 36
      test/unit/fixture/autocomplete_linguistic_with_admin.js
  35. 155
      test/unit/fixture/autocomplete_single_character_street.js
  36. 12
      test/unit/fixture/autocomplete_with_source_filtering.js
  37. 2
      test/unit/fixture/search_boundary_country.js
  38. 10
      test/unit/fixture/search_full_address.js
  39. 2
      test/unit/fixture/search_linguistic_bbox.js
  40. 2
      test/unit/fixture/search_linguistic_focus.js
  41. 2
      test/unit/fixture/search_linguistic_focus_bbox.js
  42. 2
      test/unit/fixture/search_linguistic_focus_null_island.js
  43. 2
      test/unit/fixture/search_linguistic_only.js
  44. 2
      test/unit/fixture/search_linguistic_viewport.js
  45. 2
      test/unit/fixture/search_linguistic_viewport_min_diagonal.js
  46. 10
      test/unit/fixture/search_partial_address.js
  47. 10
      test/unit/fixture/search_regions_address.js
  48. 2
      test/unit/fixture/search_with_source_filtering.js
  49. 11
      test/unit/helper/labelGenerator_examples.js
  50. 150
      test/unit/helper/text_parser.js
  51. 2
      test/unit/helper/type_mapping.js
  52. 74
      test/unit/query/autocomplete.js
  53. 26
      test/unit/query/search.js
  54. 2
      test/unit/run.js
  55. 10
      test/unit/sanitiser/_layers.js
  56. 457
      test/unit/sanitiser/_tokenizer.js
  57. 5
      test/unit/sanitiser/autocomplete.js
  58. 4
      test/unit/sanitiser/search.js

27
helper/labelGenerator.js

@ -16,18 +16,27 @@ module.exports = function( record ){
// retain only things that are truthy // retain only things that are truthy
labelParts = _.compact(labelParts); labelParts = _.compact(labelParts);
// first, dedupe the name and 1st label array elements // third, dedupe and join with a comma and return
// this is used to ensure that the `name` and first admin hierarchy elements aren't repeated return dedupeNameAndFirstLabelElement(labelParts).join(', ');
// eg - `["Lancaster", "Lancaster", "PA", "United States"]` -> `["Lancaster", "PA", "United States"]`
var dedupedNameAndFirstLabelElement = _.uniq([labelParts.shift(), labelParts.shift()]);
// second, unshift the deduped parts back onto the labelParts };
labelParts.unshift.apply(labelParts, dedupedNameAndFirstLabelElement);
// third, join with a comma and return function dedupeNameAndFirstLabelElement(labelParts) {
return labelParts.join(', '); // only dedupe if a result has more than a name (the first label part)
if (labelParts.length > 1) {
// first, dedupe the name and 1st label array elements
// this is used to ensure that the `name` and first admin hierarchy elements aren't repeated
// eg - `["Lancaster", "Lancaster", "PA", "United States"]` -> `["Lancaster", "PA", "United States"]`
var deduped = _.uniq([labelParts.shift(), labelParts.shift()]);
}; // second, unshift the deduped parts back onto the labelParts
labelParts.unshift.apply(labelParts, deduped);
}
return labelParts;
}
function getSchema(country_a) { function getSchema(country_a) {
if (country_a && country_a.length && schemas[country_a]) { if (country_a && country_a.length && schemas[country_a]) {

86
helper/text_parser.js

@ -1,86 +0,0 @@
var parser = require('addressit');
var extend = require('extend');
var type_mapping = require('../helper/type_mapping');
var check = require('check-types');
var logger = require('pelias-logger').get('api');
var DELIM = ',';
/*
* For performance, and to prefer POI and admin records, express a preference
* to only search coarse layers on very short text inputs.
*/
module.exports.get_layers = function get_layers(query) {
if (query.length <= 3 ) {
// no address parsing required
return type_mapping.layer_mapping.coarse;
}
};
module.exports.get_parsed_address = function get_parsed_address(query) {
var getAdminPartsBySplittingOnDelim = function(queryParts) {
// naive approach - for admin matching during query time
// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
var address = {};
if (queryParts.length > 1) {
address.name = queryParts[0].trim();
// 1. slice away all parts after the first one
// 2. trim spaces from each part just in case
// 3. join the parts back together with appropriate delimiter and spacing
address.admin_parts = queryParts.slice(1)
.map(function (part) { return part.trim(); })
.join(DELIM + ' ');
}
return address;
};
var getAddressParts = function(query) {
// perform full address parsing
// except on queries so short they obviously can't contain an address
if (query.length > 3) {
return parser( query );
}
};
var queryParts = query.split(DELIM);
var addressWithAdminParts = getAdminPartsBySplittingOnDelim(queryParts);
var addressWithAddressParts= getAddressParts(queryParts.join(DELIM + ' '));
var parsedAddress = extend(addressWithAdminParts,
addressWithAddressParts);
var address_parts = [ 'name',
'number',
'street',
'city',
'state',
'country',
'postalcode',
'regions',
'admin_parts'
];
var parsed_text = {};
address_parts.forEach(function(part){
if (parsedAddress[part]) {
parsed_text[part] = parsedAddress[part];
}
});
// if all we found was regions, ignore it as it is not enough information to make smarter decisions
if (Object.keys(parsed_text).length === 1 && !check.undefined(parsed_text.regions))
{
logger.info('Ignoring address parser output, regions only');
return null;
}
return parsed_text;
};

3
helper/type_mapping.js

@ -49,7 +49,8 @@ var LAYERS_BY_SOURCE = {
openaddresses: [ 'address' ], openaddresses: [ 'address' ],
geonames: [ 'country', 'region', 'county', 'locality', 'venue' ], geonames: [ 'country', 'region', 'county', 'locality', 'venue' ],
whosonfirst: [ 'continent', 'country', 'dependency', 'macroregion', 'region', whosonfirst: [ 'continent', 'country', 'dependency', 'macroregion', 'region',
'locality', 'localadmin', 'macrocounty', 'county', 'macrohood', 'neighbourhood', 'microhood', 'disputed'] 'locality', 'localadmin', 'macrocounty', 'county', 'macrohood', 'borough',
'neighbourhood', 'microhood', 'disputed']
}; };
/* /*

4
middleware/geocodeJSON.js

@ -56,6 +56,10 @@ function convertToGeocodeJSON(req, res, next, opts) {
// Helpful for debugging and understanding how the input impacts results. // Helpful for debugging and understanding how the input impacts results.
res.body.geocoding.query = req.clean; res.body.geocoding.query = req.clean;
// remove arrays produced by the tokenizer (only intended to be used internally).
delete res.body.geocoding.query.tokens_complete;
delete res.body.geocoding.query.tokens_incomplete;
// OPTIONAL. Warnings and errors. // OPTIONAL. Warnings and errors.
addMessages(req, 'warnings', res.body.geocoding); addMessages(req, 'warnings', res.body.geocoding);
addMessages(req, 'errors', res.body.geocoding); addMessages(req, 'errors', res.body.geocoding);

2
middleware/options.js

@ -9,7 +9,7 @@
function middleware(req, res, next){ function middleware(req, res, next){
if( req.method === 'OPTIONS' ){ if( req.method === 'OPTIONS' ){
res.send(200); res.sendStatus(200);
} else { } else {
next(); next();
} }

8
package.json

@ -35,7 +35,6 @@
"elasticsearch": ">=1.2.1" "elasticsearch": ">=1.2.1"
}, },
"dependencies": { "dependencies": {
"addressit": "git://github.com/dianashk/addressit.git#temp",
"async": "^1.5.2", "async": "^1.5.2",
"check-types": "^6.0.0", "check-types": "^6.0.0",
"cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve", "cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve",
@ -53,9 +52,10 @@
"morgan": "1.7.0", "morgan": "1.7.0",
"pelias-config": "^1.0.1", "pelias-config": "^1.0.1",
"pelias-logger": "^0.0.8", "pelias-logger": "^0.0.8",
"pelias-model": "^3.1.0", "pelias-model": "^4.0.0",
"pelias-query": "6.2.0", "pelias-query": "6.3.0",
"pelias-suggester-pipeline": "2.0.4", "pelias-suggester-pipeline": "2.0.4",
"pelias-text-analyzer": "^1.0.1",
"stats-lite": "1.0.3", "stats-lite": "1.0.3",
"through2": "2.0.1" "through2": "2.0.1"
}, },
@ -68,7 +68,7 @@
"precommit-hook": "^3.0.0", "precommit-hook": "^3.0.0",
"proxyquire": "^1.4.0", "proxyquire": "^1.4.0",
"tap-dot": "1.0.5", "tap-dot": "1.0.5",
"tape": "^4.4.0" "tape": "^4.5.1"
}, },
"pre-commit": [ "pre-commit": [
"lint", "lint",

49
query/autocomplete.js

@ -9,7 +9,9 @@ var views = {
ngrams_strict: require('./view/ngrams_strict'), ngrams_strict: require('./view/ngrams_strict'),
focus_selected_layers: require('./view/focus_selected_layers'), focus_selected_layers: require('./view/focus_selected_layers'),
ngrams_last_token_only: require('./view/ngrams_last_token_only'), ngrams_last_token_only: require('./view/ngrams_last_token_only'),
phrase_first_tokens_only: require('./view/phrase_first_tokens_only') phrase_first_tokens_only: require('./view/phrase_first_tokens_only'),
pop_subquery: require('./view/pop_subquery'),
boost_exact_matches: require('./view/boost_exact_matches')
}; };
//------------------------------ //------------------------------
@ -32,14 +34,16 @@ query.score( peliasQuery.view.admin('country_a') );
query.score( peliasQuery.view.admin('region') ); query.score( peliasQuery.view.admin('region') );
query.score( peliasQuery.view.admin('region_a') ); query.score( peliasQuery.view.admin('region_a') );
query.score( peliasQuery.view.admin('county') ); query.score( peliasQuery.view.admin('county') );
query.score( peliasQuery.view.admin('borough') );
query.score( peliasQuery.view.admin('localadmin') ); query.score( peliasQuery.view.admin('localadmin') );
query.score( peliasQuery.view.admin('locality') ); query.score( peliasQuery.view.admin('locality') );
query.score( peliasQuery.view.admin('neighbourhood') ); query.score( peliasQuery.view.admin('neighbourhood') );
// scoring boost // scoring boost
query.score( views.boost_exact_matches );
query.score( views.focus_selected_layers( views.ngrams_strict ) ); query.score( views.focus_selected_layers( views.ngrams_strict ) );
query.score( peliasQuery.view.popularity( views.ngrams_strict ) ); query.score( peliasQuery.view.popularity( views.pop_subquery ) );
query.score( peliasQuery.view.population( views.ngrams_strict ) ); query.score( peliasQuery.view.population( views.pop_subquery ) );
// non-scoring hard filters // non-scoring hard filters
query.filter( peliasQuery.view.sources ); query.filter( peliasQuery.view.sources );
@ -59,29 +63,28 @@ function generateQuery( clean ){
vs.var( 'sources', clean.sources ); vs.var( 'sources', clean.sources );
} }
// mark the name as incomplete (user has not yet typed a comma) // pass the input tokens to the views so they can choose which tokens
vs.var( 'input:name:isComplete', false ); // are relevant for their specific function.
if( check.array( clean.tokens ) ){
// perform some operations on 'clean.text': vs.var( 'input:name:tokens', clean.tokens );
// 1. if there is a space followed by a single char, remove them. vs.var( 'input:name:tokens_complete', clean.tokens_complete );
// - this is required as the index uses 2grams and sending 1grams vs.var( 'input:name:tokens_incomplete', clean.tokens_incomplete );
// - to a 2gram index when using 'type:phrase' or 'operator:and' will
// - result in a complete failure of the query.
// 2. trim leading and trailing whitespace.
var text = clean.text.replace(/( .$)/g,'').trim();
// if the input parser has run and suggested a 'parsed_text.name' to use.
if( clean.hasOwnProperty('parsed_text') && clean.parsed_text.hasOwnProperty('name') ){
// mark the name as complete (user has already typed a comma)
vs.var( 'input:name:isComplete', true );
// use 'parsed_text.name' instead of 'clean.text'.
text = clean.parsed_text.name;
} }
// input text // input text
vs.var( 'input:name', text ); vs.var( 'input:name', clean.text );
// if the tokenizer has run then we set 'input:name' to as the combination of the
// 'complete' tokens with the 'incomplete' tokens, the resuting array differs
// slightly from the 'input:name:tokens' array as some tokens might have been
// removed in the process; such as single grams which are not present in then
// ngrams index.
if( check.array( clean.tokens_complete ) && check.array( clean.tokens_incomplete ) ){
var combined = clean.tokens_complete.concat( clean.tokens_incomplete );
if( combined.length ){
vs.var( 'input:name', combined.join(' ') );
}
}
// focus point // focus point
if( check.number(clean['focus.point.lat']) && if( check.number(clean['focus.point.lat']) &&

14
query/autocomplete_defaults.js

@ -20,20 +20,20 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed', 'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true, 'boundary:rect:_cache': true,
'ngram:analyzer': 'peliasPhrase', 'ngram:analyzer': 'peliasQueryPartialToken',
'ngram:field': 'name.default', 'ngram:field': 'name.default',
'ngram:boost': 100, 'ngram:boost': 100,
'phrase:analyzer': 'peliasPhrase', 'phrase:analyzer': 'peliasQueryFullToken',
'phrase:field': 'phrase.default', 'phrase:field': 'name.default',
'phrase:boost': 1, 'phrase:boost': 1,
'phrase:slop': 2, 'phrase:slop': 3,
'focus:function': 'linear', 'focus:function': 'linear',
'focus:offset': '0km', 'focus:offset': '0km',
'focus:scale': '250km', 'focus:scale': '250km',
'focus:decay': 0.5, 'focus:decay': 0.5,
'focus:weight': 10, 'focus:weight': 40,
'function_score:score_mode': 'avg', 'function_score:score_mode': 'avg',
'function_score:boost_mode': 'multiply', 'function_score:boost_mode': 'multiply',
@ -82,6 +82,10 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'admin:neighbourhood:field': 'parent.neighbourhood', 'admin:neighbourhood:field': 'parent.neighbourhood',
'admin:neighbourhood:boost': 200, 'admin:neighbourhood:boost': 200,
'admin:borough:analyzer': 'peliasAdmin',
'admin:borough:field': 'parent.borough',
'admin:borough:boost': 600,
'popularity:field': 'popularity', 'popularity:field': 'popularity',
'popularity:modifier': 'log1p', 'popularity:modifier': 'log1p',
'popularity:max_boost': 20, 'popularity:max_boost': 20,

2
query/reverse_defaults.js

@ -20,7 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed', 'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true, 'boundary:rect:_cache': true,
'ngram:analyzer': 'peliasOneEdgeGram', 'ngram:analyzer': 'peliasQueryPartialToken',
'ngram:field': 'name.default', 'ngram:field': 'name.default',
'ngram:boost': 1, 'ngram:boost': 1,

1
query/search.js

@ -30,6 +30,7 @@ query.score( peliasQuery.view.admin('country_a') );
query.score( peliasQuery.view.admin('region') ); query.score( peliasQuery.view.admin('region') );
query.score( peliasQuery.view.admin('region_a') ); query.score( peliasQuery.view.admin('region_a') );
query.score( peliasQuery.view.admin('county') ); query.score( peliasQuery.view.admin('county') );
query.score( peliasQuery.view.admin('borough') );
query.score( peliasQuery.view.admin('localadmin') ); query.score( peliasQuery.view.admin('localadmin') );
query.score( peliasQuery.view.admin('locality') ); query.score( peliasQuery.view.admin('locality') );
query.score( peliasQuery.view.admin('neighbourhood') ); query.score( peliasQuery.view.admin('neighbourhood') );

6
query/search_defaults.js

@ -20,7 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed', 'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true, 'boundary:rect:_cache': true,
'ngram:analyzer': 'peliasOneEdgeGram', 'ngram:analyzer': 'peliasIndexOneEdgeGram',
'ngram:field': 'name.default', 'ngram:field': 'name.default',
'ngram:boost': 1, 'ngram:boost': 1,
@ -78,6 +78,10 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'admin:locality:field': 'parent.locality', 'admin:locality:field': 'parent.locality',
'admin:locality:boost': 1, 'admin:locality:boost': 1,
'admin:borough:analyzer': 'peliasAdmin',
'admin:borough:field': 'parent.borough',
'admin:borough:boost': 1,
'admin:neighbourhood:analyzer': 'peliasAdmin', 'admin:neighbourhood:analyzer': 'peliasAdmin',
'admin:neighbourhood:field': 'parent.neighbourhood', 'admin:neighbourhood:field': 'parent.neighbourhood',
'admin:neighbourhood:boost': 1, 'admin:neighbourhood:boost': 1,

13
query/text_parser.js

@ -1,20 +1,15 @@
var logger = require('pelias-logger').get('api'); var logger = require('pelias-logger').get('api');
var placeTypes = require('../helper/placeTypes');
/* /*
This list should only contain admin fields we are comfortable matching in the case This list should only contain admin fields we are comfortable matching in the case
when we can't identify parts of an address. This shouldn't contain fields like country_a when we can't identify parts of an address. This shouldn't contain fields like country_a
or postalcode because we should only try to match those when we're sure that's what they are. or postalcode because we should only try to match those when we're sure that's what they are.
*/ */
var adminFields = [ var adminFields = placeTypes.concat([
'country', 'region_a'
'region', ]);
'region_a',
'county',
'localadmin',
'locality',
'neighbourhood'
];
/** /**
@todo: refactor me @todo: refactor me

40
query/view/boost_exact_matches.js

@ -0,0 +1,40 @@
var peliasQuery = require('pelias-query'),
searchDefaults = require('../search_defaults');
/**
This view (unfortunately) requires autocomplete to use the phrase.* index.
ideally we wouldn't need to use this, but at time of writing we are unable
to distinguish between 'complete tokens' and 'grams' in the name.* index.
this view was introduced in order to score exact matches higher than partial
matches, without it we find results such as "Clayton Avenue" appearing first
in the results list for the query "Clay Av".
the view uses some of the values from the 'search_defaults.js' file to add an
additional 'SHOULD' condition which scores exact matches slighly higher
than partial matches.
**/
module.exports = function( vs ){
// make a copy of the variables so we don't interfere with the values
// passed to other views.
var vsCopy = new peliasQuery.Vars( vs.export() );
// copy phrase:* values from search defaults
vsCopy.var('phrase:analyzer').set(searchDefaults['phrase:analyzer']);
vsCopy.var('phrase:field').set(searchDefaults['phrase:field']);
// get a copy of the *complete* tokens produced from the input:name
var tokens = vs.var('input:name:tokens_complete').get();
// no valid tokens to use, fail now, don't render this view.
if( !tokens || tokens.length < 1 ){ return null; }
// set 'input:name' to be only the fully completed characters
vsCopy.var('input:name').set( tokens.join(' ') );
return peliasQuery.view.phrase( vsCopy );
};

17
query/view/ngrams_last_token_only.js

@ -8,9 +8,6 @@ var peliasQuery = require('pelias-query'),
eg. if the input was "100 foo str", then 'input:name' would only be 'str' eg. if the input was "100 foo str", then 'input:name' would only be 'str'
note: it is assumed that the rest of the input is matched using another view. note: it is assumed that the rest of the input is matched using another view.
there is an additional flag 'input:name:isComplete' used to disable this view
selectively, see that section for more info.
code notes: this view makes a copy of the $vs object in order to change their code notes: this view makes a copy of the $vs object in order to change their
values without mutating the original values, which may be expected in their values without mutating the original values, which may be expected in their
unaltered form by other views. unaltered form by other views.
@ -18,19 +15,17 @@ var peliasQuery = require('pelias-query'),
module.exports = function( vs ){ module.exports = function( vs ){
// Totally disable this view when bool value 'input:name:isComplete' is true. // get a copy of the *tokens_incomplete* tokens produced from the input:name
// This is the case when the user has typed a comma, so we can assume var tokens = vs.var('input:name:tokens_incomplete').get();
// that the 'name' part of the query is now complete.
if( vs.var('input:name:isComplete').get() ){ return null; } // no valid tokens to use, fail now, don't render this view.
if( !tokens || tokens.length < 1 ){ return null; }
// make a copy Vars so we don't mutate the original // make a copy Vars so we don't mutate the original
var vsCopy = new peliasQuery.Vars( vs.export() ); var vsCopy = new peliasQuery.Vars( vs.export() );
// get the input 'name' variable
var name = vs.var('input:name').get();
// set the 'name' variable in the copy to only the last token // set the 'name' variable in the copy to only the last token
vsCopy.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) ); vsCopy.var('input:name').set( tokens.join(' ') );
// return the view rendered using the copy // return the view rendered using the copy
return ngrams_strict( vsCopy ); return ngrams_strict( vsCopy );

25
query/view/phrase_first_tokens_only.js

@ -7,9 +7,6 @@ var peliasQuery = require('pelias-query');
eg. if the input was "100 foo str", then 'input:name' would only be '100 foo' eg. if the input was "100 foo str", then 'input:name' would only be '100 foo'
note: it is assumed that the rest of the input is matched using another view. note: it is assumed that the rest of the input is matched using another view.
there is an additional flag 'input:name:isComplete' used to disable this view
selectively, see that section for more info.
code notes: this view makes a copy of the $vs object in order to change their code notes: this view makes a copy of the $vs object in order to change their
values without mutating the original values, which may be expected in their values without mutating the original values, which may be expected in their
unaltered form by other views. unaltered form by other views.
@ -17,27 +14,17 @@ var peliasQuery = require('pelias-query');
module.exports = function( vs ){ module.exports = function( vs ){
// Don't mutate the name variable when 'input:name:isComplete' is true. // get a copy of the *complete* tokens produced from the input:name
// This is the case when the user has typed a comma, so we can assume var tokens = vs.var('input:name:tokens_complete').get();
// that the 'name' part of the query is now complete.
if( vs.var('input:name:isComplete').get() ){ // no valid tokens to use, fail now, don't render this view.
// return the view rendered using the original vars if( !tokens || tokens.length < 1 ){ return null; }
return peliasQuery.view.phrase( vs );
}
// make a copy Vars so we don't mutate the original // make a copy Vars so we don't mutate the original
var vsCopy = new peliasQuery.Vars( vs.export() ); var vsCopy = new peliasQuery.Vars( vs.export() );
// get the input 'name' variable and split in to tokens
var name = vs.var('input:name').get(),
tokens = name.split(' ');
// single token only, abort (we don't want the *last* token)
// return null here will completely disable the view.
if( tokens.length < 2 ){ return null; }
// set the 'name' variable in the copy to all but the last token // set the 'name' variable in the copy to all but the last token
vsCopy.var('input:name').set( name.substr( 0, name.lastIndexOf(' ') ) ); vsCopy.var('input:name').set( tokens.join(' ') );
// return the view rendered using the copy // return the view rendered using the copy
return peliasQuery.view.phrase( vsCopy ); return peliasQuery.view.phrase( vsCopy );

17
query/view/pop_subquery.js

@ -0,0 +1,17 @@
var peliasQuery = require('pelias-query'),
check = require('check-types');
/**
Population / Popularity subquery
**/
module.exports = function( vs ){
var view = peliasQuery.view.ngrams( vs );
view.match['name.default'].analyzer = vs.var('phrase:analyzer');
delete view.match['name.default'].boost;
return view;
};

4
sanitiser/_text.js

@ -1,5 +1,5 @@
var check = require('check-types'), var check = require('check-types'),
text_parser = require('../helper/text_parser'); text_analyzer = require('pelias-text-analyzer');
// validate texts, convert types and apply defaults // validate texts, convert types and apply defaults
function sanitize( raw, clean ){ function sanitize( raw, clean ){
@ -19,7 +19,7 @@ function sanitize( raw, clean ){
clean.text = raw.text; clean.text = raw.text;
// parse text with query parser // parse text with query parser
var parsed_text = text_parser.get_parsed_address(clean.text); var parsed_text = text_analyzer.parse(clean.text);
if (check.assigned(parsed_text)) { if (check.assigned(parsed_text)) {
clean.parsed_text = parsed_text; clean.parsed_text = parsed_text;
} }

112
sanitiser/_tokenizer.js

@ -0,0 +1,112 @@
var check = require('check-types');
/**
simplified version of the elaticsearch tokenizer, used in order to
be able to detect which tokens are 'complete' (user has finished typing them)
or 'incomplete' (the user has possibly only typed part of the token).
note: we don't need to strip punctuation as that will be handled on the
elasticsearch side, so sending a token such as 'st.' is not an issue, these
tokens should *not* be modified as the anaylsis can use the punctuation to
infer meaning.
note: this sanitizer should run *after* the '_text' sanitizer so it can
use the output of clean.parsed_text where available.
**/
function sanitize( raw, clean ){
// error & warning messages
var messages = { errors: [], warnings: [] };
// this is the string we will use for analysis
var text = clean.text;
// a boolean to track whether the input parser successfully ran; or not.
var inputParserRanSuccessfully = false;
// if the text parser has run then we only tokenize the 'name' section
// of the 'parsed_text' object, ignoring the 'admin' parts.
if( clean.hasOwnProperty('parsed_text') ) {
inputParserRanSuccessfully = true;
// parsed_text.name is set, this is the highest priority, use this string
if( clean.parsed_text.hasOwnProperty('name') ){
text = clean.parsed_text.name; // use this string instead
}
// else handle the case where parsed_text.street was produced but
// no parsed_text.name is produced.
// additionally, handle the case where parsed_text.number is present
// note: the addressit module may also produce parsed_text.unit info
// for now, we discard that information as we don't have an appropriate
else if( clean.parsed_text.hasOwnProperty('street') ){
text = [
clean.parsed_text.number,
clean.parsed_text.street
].filter(function(el){return el;})
.join(' '); // remove empty elements
}
}
// always set 'clean.tokens*' arrays for consistency and to avoid upstream errors.
clean.tokens = [];
clean.tokens_complete = [];
clean.tokens_incomplete = [];
// sanity check that the text is valid.
if( check.nonEmptyString( text ) ){
// split according to the regex used in the elasticsearch tokenizer
// see: https://github.com/pelias/schema/blob/master/settings.js
// see: settings.analysis.tokenizer.peliasNameTokenizer
clean.tokens = text
.split(/[\s,\\\/]+/) // split on delimeters
.filter(function(el){return el;}); // remove empty elements
}
/**
the following section splits the tokens in to two arrays called
'tokens_complete' and 'tokens_incomplete'.
it also strips any tokens from 'tokens_incomplete' which might not
match the ngrams index (such as single grams not stored in the index).
**/
// split the tokens in to 'complete' and 'incomplete'.
if( clean.tokens.length ){
// if all the tokens are complete, simply copy them from clean.tokens
if( inputParserRanSuccessfully ){
// all these tokens are complete!
clean.tokens_complete = clean.tokens.slice();
// user hasn't finished typing yet
} else {
// make a copy of the tokens and remove the last element
var tokensCopy = clean.tokens.slice(),
lastToken = tokensCopy.pop();
// set all but the last token as 'complete'
clean.tokens_complete = tokensCopy;
/**
if the last token is a single non-numeric character then we must discard it.
at time of writing, single non-numeric ngrams are not stored in the index,
sending them as part of the query would result in 0 documents being returned.
**/
if( lastToken && ( lastToken.length > 1 || lastToken.match(/[0-9]/) ) ){
clean.tokens_incomplete = [ lastToken ];
}
}
}
return messages;
}
// export function
module.exports = sanitize;

1
sanitiser/autocomplete.js

@ -4,6 +4,7 @@ var sanitizeAll = require('../sanitiser/sanitizeAll'),
sanitizers = { sanitizers = {
singleScalarParameters: require('../sanitiser/_single_scalar_parameters'), singleScalarParameters: require('../sanitiser/_single_scalar_parameters'),
text: require('../sanitiser/_text'), text: require('../sanitiser/_text'),
tokenizer: require('../sanitiser/_tokenizer'),
size: require('../sanitiser/_size')(10, 10, 10), size: require('../sanitiser/_size')(10, 10, 10),
layers: require('../sanitiser/_targets')('layers', type_mapping.layer_mapping), layers: require('../sanitiser/_targets')('layers', type_mapping.layer_mapping),
sources: require('../sanitiser/_targets')('sources', type_mapping.source_mapping), sources: require('../sanitiser/_targets')('sources', type_mapping.source_mapping),

1
test/ciao/autocomplete/layers_alias_coarse.coffee

@ -41,6 +41,7 @@ json.geocoding.query.layers.should.eql [ "continent",
"macrocounty", "macrocounty",
"county", "county",
"macrohood", "macrohood",
"borough",
"neighbourhood", "neighbourhood",
"microhood", "microhood",
"disputed" "disputed"

2
test/ciao/autocomplete/layers_invalid.coffee

@ -24,7 +24,7 @@ json.features.should.be.instanceof Array
#? expected errors #? expected errors
should.exist json.geocoding.errors should.exist json.geocoding.errors
json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,macroregion,region,county,locality,continent,macrocounty,dependency,localadmin,macrohood,neighbourhood,microhood,disputed' ] json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,region,county,locality,continent,dependency,macroregion,localadmin,macrocounty,macrohood,borough,neighbourhood,microhood,disputed' ]
#? expected warnings #? expected warnings
should.not.exist json.geocoding.warnings should.not.exist json.geocoding.warnings

2
test/ciao/autocomplete/layers_mix_invalid_valid.coffee

@ -24,7 +24,7 @@ json.features.should.be.instanceof Array
#? expected errors #? expected errors
should.exist json.geocoding.errors should.exist json.geocoding.errors
json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,macroregion,region,county,locality,continent,macrocounty,dependency,localadmin,macrohood,neighbourhood,microhood,disputed' ] json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,region,county,locality,continent,dependency,macroregion,localadmin,macrocounty,macrohood,borough,neighbourhood,microhood,disputed' ]
#? expected warnings #? expected warnings
should.not.exist json.geocoding.warnings should.not.exist json.geocoding.warnings

1
test/ciao/reverse/layers_alias_coarse.coffee

@ -40,6 +40,7 @@ json.geocoding.query.layers.should.eql [ "continent",
"macrocounty", "macrocounty",
"county", "county",
"macrohood", "macrohood",
"borough",
"neighbourhood", "neighbourhood",
"microhood", "microhood",
"disputed" "disputed"

2
test/ciao/reverse/layers_invalid.coffee

@ -24,7 +24,7 @@ json.features.should.be.instanceof Array
#? expected errors #? expected errors
should.exist json.geocoding.errors should.exist json.geocoding.errors
json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,macroregion,region,county,locality,continent,macrocounty,dependency,localadmin,macrohood,neighbourhood,microhood,disputed' ] json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,region,county,locality,continent,dependency,macroregion,localadmin,macrocounty,macrohood,borough,neighbourhood,microhood,disputed' ]
#? expected warnings #? expected warnings
should.not.exist json.geocoding.warnings should.not.exist json.geocoding.warnings

2
test/ciao/reverse/layers_mix_invalid_valid.coffee

@ -24,7 +24,7 @@ json.features.should.be.instanceof Array
#? expected errors #? expected errors
should.exist json.geocoding.errors should.exist json.geocoding.errors
json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,macroregion,region,county,locality,continent,macrocounty,dependency,localadmin,macrohood,neighbourhood,microhood,disputed' ] json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,region,county,locality,continent,dependency,macroregion,localadmin,macrocounty,macrohood,borough,neighbourhood,microhood,disputed' ]
#? expected warnings #? expected warnings
should.not.exist json.geocoding.warnings should.not.exist json.geocoding.warnings

1
test/ciao/search/layers_alias_coarse.coffee

@ -41,6 +41,7 @@ json.geocoding.query.layers.should.eql [ "continent",
"macrocounty", "macrocounty",
"county", "county",
"macrohood", "macrohood",
"borough",
"neighbourhood", "neighbourhood",
"microhood", "microhood",
"disputed" "disputed"

2
test/ciao/search/layers_invalid.coffee

@ -24,7 +24,7 @@ json.features.should.be.instanceof Array
#? expected errors #? expected errors
should.exist json.geocoding.errors should.exist json.geocoding.errors
json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,macroregion,region,county,locality,continent,macrocounty,dependency,localadmin,macrohood,neighbourhood,microhood,disputed' ] json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,region,county,locality,continent,dependency,macroregion,localadmin,macrocounty,macrohood,borough,neighbourhood,microhood,disputed' ]
#? expected warnings #? expected warnings
should.not.exist json.geocoding.warnings should.not.exist json.geocoding.warnings

2
test/ciao/search/layers_mix_invalid_valid.coffee

@ -24,7 +24,7 @@ json.features.should.be.instanceof Array
#? expected errors #? expected errors
should.exist json.geocoding.errors should.exist json.geocoding.errors
json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,macroregion,region,county,locality,continent,macrocounty,dependency,localadmin,macrohood,neighbourhood,microhood,disputed' ] json.geocoding.errors.should.eql [ '\'notlayer\' is an invalid layers parameter. Valid options: coarse,address,venue,country,region,county,locality,continent,dependency,macroregion,localadmin,macrocounty,macrohood,borough,neighbourhood,microhood,disputed' ]
#? expected warnings #? expected warnings
should.not.exist json.geocoding.warnings should.not.exist json.geocoding.warnings

28
test/unit/fixture/autocomplete_linguistic_final_token.js

@ -7,24 +7,31 @@ module.exports = {
'must': [{ 'must': [{
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100, 'boost': 1,
'slop': 3,
'query': 'one', 'query': 'one',
'type': 'phrase', 'type': 'phrase'
'operator': 'and'
} }
} }
}], }],
'should':[{ 'should':[{
'match': {
'phrase.default': {
'analyzer': 'peliasPhrase',
'boost': 1,
'slop': 3,
'query': 'one',
'type': 'phrase'
}
}
},{
'function_score': { 'function_score': {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'one', 'query': 'one',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },
@ -45,11 +52,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'one', 'query': 'one',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },

16
test/unit/fixture/autocomplete_linguistic_focus.js

@ -7,7 +7,7 @@ module.exports = {
'must': [{ 'must': [{
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryPartialToken',
'boost': 100, 'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase', 'type': 'phrase',
@ -20,7 +20,7 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryPartialToken',
'boost': 100, 'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase', 'type': 'phrase',
@ -40,7 +40,7 @@ module.exports = {
'decay': 0.5 'decay': 0.5
} }
}, },
'weight': 10 'weight': 40
}], }],
'score_mode': 'avg', 'score_mode': 'avg',
'boost_mode': 'multiply', 'boost_mode': 'multiply',
@ -64,11 +64,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },
@ -89,11 +86,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },

16
test/unit/fixture/autocomplete_linguistic_focus_null_island.js

@ -7,7 +7,7 @@ module.exports = {
'must': [{ 'must': [{
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryPartialToken',
'boost': 100, 'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase', 'type': 'phrase',
@ -20,7 +20,7 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryPartialToken',
'boost': 100, 'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase', 'type': 'phrase',
@ -40,7 +40,7 @@ module.exports = {
'decay': 0.5 'decay': 0.5
} }
}, },
'weight': 10 'weight': 40
}], }],
'score_mode': 'avg', 'score_mode': 'avg',
'boost_mode': 'multiply', 'boost_mode': 'multiply',
@ -64,11 +64,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },
@ -89,11 +86,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },

32
test/unit/fixture/autocomplete_linguistic_multiple_tokens.js

@ -6,11 +6,11 @@ module.exports = {
'bool': { 'bool': {
'must': [{ 'must': [{
'match': { 'match': {
'phrase.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'type': 'phrase', 'type': 'phrase',
'boost': 1, 'boost': 1,
'slop': 2, 'slop': 3,
'query': 'one two' 'query': 'one two'
} }
} }
@ -18,7 +18,7 @@ module.exports = {
{ {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryPartialToken',
'boost': 100, 'boost': 100,
'query': 'three', 'query': 'three',
'type': 'phrase', 'type': 'phrase',
@ -26,16 +26,25 @@ module.exports = {
} }
} }
}], }],
'should':[{ 'should':[
{
'match': {
'phrase.default': {
'analyzer' : 'peliasPhrase',
'type' : 'phrase',
'boost' : 1,
'slop' : 3,
'query' : 'one two'
}
}
},
{
'function_score': { 'function_score': {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'one two three', 'query': 'one two three',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },
@ -56,11 +65,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'one two three', 'query': 'one two three',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },

12
test/unit/fixture/autocomplete_linguistic_only.js

@ -7,7 +7,7 @@ module.exports = {
'must': [{ 'must': [{
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryPartialToken',
'boost': 100, 'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase', 'type': 'phrase',
@ -20,11 +20,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },
@ -45,11 +42,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },

36
test/unit/fixture/autocomplete_linguistic_with_admin.js

@ -7,11 +7,11 @@ module.exports = {
'must': [ 'must': [
{ {
'match': { 'match': {
'phrase.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'type': 'phrase', 'type': 'phrase',
'boost': 1, 'boost': 1,
'slop': 2, 'slop': 3,
'query': 'one two' 'query': 'one two'
} }
} }
@ -54,6 +54,15 @@ module.exports = {
} }
} }
}, },
{
'match': {
'parent.borough': {
'analyzer': 'peliasAdmin',
'boost': 600,
'query': 'three'
}
}
},
{ {
'match': { 'match': {
'parent.localadmin': { 'parent.localadmin': {
@ -81,16 +90,24 @@ module.exports = {
} }
} }
}, },
{
'match': {
'phrase.default': {
'analyzer' : 'peliasPhrase',
'type' : 'phrase',
'boost' : 1,
'slop' : 3,
'query' : 'one two'
}
}
},
{ {
'function_score': { 'function_score': {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'one two', 'query': 'one two',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },
@ -114,11 +131,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'one two', 'query': 'one two',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },

155
test/unit/fixture/autocomplete_single_character_street.js

@ -0,0 +1,155 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'analyzer': 'peliasQueryFullToken',
'type': 'phrase',
'boost': 1,
'slop': 3,
'query': 'k road'
}
}
}],
'should':[
{
'match': {
'address_parts.street': {
'query': 'k road',
'boost': 5,
'analyzer': 'peliasStreet'
}
}
}, {
'match': {
'parent.country': {
'query': 'laird',
'boost': 800,
'analyzer': 'peliasAdmin'
}
}
}, {
'match': {
'parent.region': {
'query': 'laird',
'boost': 600,
'analyzer': 'peliasAdmin'
}
}
}, {
'match': {
'parent.region_a': {
'query': 'laird',
'boost': 600,
'analyzer': 'peliasAdmin'
}
}
}, {
'match': {
'parent.county': {
'query': 'laird',
'boost': 400,
'analyzer': 'peliasAdmin'
}
}
}, {
'match': {
'parent.borough': {
'analyzer': 'peliasAdmin',
'boost': 600,
'query': 'laird'
}
}
}, {
'match': {
'parent.localadmin': {
'query': 'laird',
'boost': 200,
'analyzer': 'peliasAdmin'
}
}
}, {
'match': {
'parent.locality': {
'query': 'laird',
'boost': 200,
'analyzer': 'peliasAdmin'
}
}
}, {
'match': {
'parent.neighbourhood': {
'query': 'laird',
'boost': 200,
'analyzer': 'peliasAdmin'
}
}
},
{
'match': {
'phrase.default': {
'analyzer' : 'peliasPhrase',
'type' : 'phrase',
'boost' : 1,
'slop' : 3,
'query' : 'k road'
}
}
},
{
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasQueryFullToken',
'query': 'k road',
}
}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'popularity',
'missing': 1
},
'weight': 1
}]
}
},{
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasQueryFullToken',
'query': 'k road',
}
}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'population',
'missing': 1
},
'weight': 3
}]
}
}]
}
}
}
},
'sort': [ '_score' ],
'size': 20,
'track_scores': true
};

12
test/unit/fixture/autocomplete_with_source_filtering.js

@ -7,7 +7,7 @@ module.exports = {
'must': [{ 'must': [{
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryPartialToken',
'boost': 100, 'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase', 'type': 'phrase',
@ -20,11 +20,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },
@ -45,11 +42,8 @@ module.exports = {
'query': { 'query': {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasPhrase', 'analyzer': 'peliasQueryFullToken',
'boost': 100,
'query': 'test', 'query': 'test',
'type': 'phrase',
'operator': 'and'
} }
} }
}, },

2
test/unit/fixture/search_boundary_country.js

@ -18,7 +18,7 @@ module.exports = {
'name.default': { 'name.default': {
'query': 'test', 'query': 'test',
'boost': 1, 'boost': 1,
'analyzer': 'peliasOneEdgeGram' 'analyzer': 'peliasIndexOneEdgeGram'
} }
} }
} }

10
test/unit/fixture/search_full_address.js

@ -9,7 +9,7 @@ module.exports = {
'match': { 'match': {
'name.default': { 'name.default': {
'query': '123 main st', 'query': '123 main st',
'analyzer': 'peliasOneEdgeGram', 'analyzer': 'peliasIndexOneEdgeGram',
'boost': 1 'boost': 1
} }
} }
@ -139,6 +139,14 @@ module.exports = {
'analyzer': vs['admin:county:analyzer'] 'analyzer': vs['admin:county:analyzer']
} }
} }
}, {
'match': {
'parent.borough': {
'query': 'new york',
'boost': vs['admin:borough:boost'],
'analyzer': vs['admin:borough:analyzer']
}
}
}, { }, {
'match': { 'match': {
'parent.localadmin': { 'parent.localadmin': {

2
test/unit/fixture/search_linguistic_bbox.js

@ -9,7 +9,7 @@ module.exports = {
'name.default': { 'name.default': {
'query': 'test', 'query': 'test',
'boost': 1, 'boost': 1,
'analyzer': 'peliasOneEdgeGram' 'analyzer': 'peliasIndexOneEdgeGram'
} }
} }
}], }],

2
test/unit/fixture/search_linguistic_focus.js

@ -9,7 +9,7 @@ module.exports = {
'name.default': { 'name.default': {
'query': 'test', 'query': 'test',
'boost': 1, 'boost': 1,
'analyzer': 'peliasOneEdgeGram' 'analyzer': 'peliasIndexOneEdgeGram'
} }
} }
}], }],

2
test/unit/fixture/search_linguistic_focus_bbox.js

@ -9,7 +9,7 @@ module.exports = {
'name.default': { 'name.default': {
'query': 'test', 'query': 'test',
'boost': 1, 'boost': 1,
'analyzer': 'peliasOneEdgeGram' 'analyzer': 'peliasIndexOneEdgeGram'
} }
} }
}], }],

2
test/unit/fixture/search_linguistic_focus_null_island.js

@ -9,7 +9,7 @@ module.exports = {
'name.default': { 'name.default': {
'query': 'test', 'query': 'test',
'boost': 1, 'boost': 1,
'analyzer': 'peliasOneEdgeGram' 'analyzer': 'peliasIndexOneEdgeGram'
} }
} }
}], }],

2
test/unit/fixture/search_linguistic_only.js

@ -9,7 +9,7 @@ module.exports = {
'name.default': { 'name.default': {
'query': 'test', 'query': 'test',
'boost': 1, 'boost': 1,
'analyzer': 'peliasOneEdgeGram' 'analyzer': 'peliasIndexOneEdgeGram'
} }
} }
}], }],

2
test/unit/fixture/search_linguistic_viewport.js

@ -7,7 +7,7 @@ module.exports = {
{ {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasOneEdgeGram', 'analyzer': 'peliasIndexOneEdgeGram',
'boost': 1, 'boost': 1,
'query': 'test' 'query': 'test'
} }

2
test/unit/fixture/search_linguistic_viewport_min_diagonal.js

@ -7,7 +7,7 @@ module.exports = {
{ {
'match': { 'match': {
'name.default': { 'name.default': {
'analyzer': 'peliasOneEdgeGram', 'analyzer': 'peliasIndexOneEdgeGram',
'boost': 1, 'boost': 1,
'query': 'test' 'query': 'test'
} }

10
test/unit/fixture/search_partial_address.js

@ -10,7 +10,7 @@ module.exports = {
'match': { 'match': {
'name.default': { 'name.default': {
'query': 'soho grand', 'query': 'soho grand',
'analyzer': 'peliasOneEdgeGram', 'analyzer': 'peliasIndexOneEdgeGram',
'boost': 1 'boost': 1
} }
} }
@ -107,6 +107,14 @@ module.exports = {
'analyzer': vs['admin:county:analyzer'] 'analyzer': vs['admin:county:analyzer']
} }
} }
}, {
'match': {
'parent.borough': {
'query': 'new york',
'boost': vs['admin:borough:boost'],
'analyzer': vs['admin:borough:analyzer']
}
}
}, { }, {
'match': { 'match': {
'parent.localadmin': { 'parent.localadmin': {

10
test/unit/fixture/search_regions_address.js

@ -10,7 +10,7 @@ module.exports = {
'match': { 'match': {
'name.default': { 'name.default': {
'query': '1 water st', 'query': '1 water st',
'analyzer': 'peliasOneEdgeGram', 'analyzer': 'peliasIndexOneEdgeGram',
'boost': 1 'boost': 1
} }
} }
@ -123,6 +123,14 @@ module.exports = {
'analyzer': vs['admin:county:analyzer'] 'analyzer': vs['admin:county:analyzer']
} }
} }
}, {
'match': {
'parent.borough': {
'query': 'manhattan',
'boost': vs['admin:borough:boost'],
'analyzer': vs['admin:borough:analyzer']
}
}
}, { }, {
'match': { 'match': {
'parent.localadmin': { 'parent.localadmin': {

2
test/unit/fixture/search_with_source_filtering.js

@ -9,7 +9,7 @@ module.exports = {
'name.default': { 'name.default': {
'query': 'test', 'query': 'test',
'boost': 1, 'boost': 1,
'analyzer': 'peliasOneEdgeGram' 'analyzer': 'peliasIndexOneEdgeGram'
} }
} }
}], }],

11
test/unit/helper/labelGenerator_examples.js

@ -104,6 +104,17 @@ module.exports.tests.france = function(test, common) {
}; };
module.exports.tests.name_only = function(test, common) {
test('name-only results (no admin fields) should not include extraneous comma', function(t) {
var doc = {
'name': 'Result name',
};
t.equal(generator(doc),'Result name');
t.end();
});
};
module.exports.all = function (tape, common) { module.exports.all = function (tape, common) {
function test(name, testFunction) { function test(name, testFunction) {

150
test/unit/helper/text_parser.js

@ -1,150 +0,0 @@
var parser = require('../../../helper/text_parser');
var type_mapping = require('../../../helper/type_mapping');
var layers_map = type_mapping.layer_mapping;
module.exports.tests = {};
module.exports.tests.interface = function(test, common) {
test('interface', function(t) {
t.equal(typeof parser.get_parsed_address, 'function', 'valid function');
t.equal(typeof parser.get_layers, 'function', 'valid function');
t.end();
});
};
module.exports.tests.split_on_comma = function(test, common) {
var queries = [
{ name: 'soho', admin_parts: 'new york' },
{ name: 'chelsea', admin_parts: 'london' },
{ name: '123 main', admin_parts: 'new york' }
];
queries.forEach(function (query) {
test('naive parsing ' + query, function(t) {
var address = parser.get_parsed_address(query.name + ', ' + query.admin_parts);
t.equal(typeof address, 'object', 'valid object');
t.equal(address.name, query.name, 'name set correctly to ' + address.name);
t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts);
t.end();
});
test('naive parsing ' + query + 'without spaces', function(t) {
var address = parser.get_parsed_address(query.name + ',' + query.admin_parts);
t.equal(typeof address, 'object', 'valid object');
t.equal(address.name, query.name, 'name set correctly to ' + address.name);
t.equal(address.admin_parts, query.admin_parts, 'admin_parts set correctly to ' + address.admin_parts);
t.end();
});
});
};
module.exports.tests.parse_three_chars_or_less = function(test, common) {
var chars_queries = ['a', 'bb', 'ccc'];
var num_queries = ['1', '12', '123'];
var alphanum_q = ['a1', '1a2', '12c'];
var queries = chars_queries.concat(num_queries).concat(alphanum_q);
queries.forEach(function(query) {
test('query length < 3 (' + query + ')', function(t) {
var address = parser.get_parsed_address(query);
var target_layer = layers_map.coarse;
var layers = parser.get_layers(query);
t.equal(typeof address, 'object', 'valid object');
t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', '));
t.end();
});
});
};
module.exports.tests.parse_one_token = function(test, common) {
test('query with one token', function (t) {
var address = parser.get_parsed_address('yugolsavia');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
test('query with two tokens, no numbers', function (t) {
var address = parser.get_parsed_address('small town');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
test('query with two tokens, number first', function (t) {
var address = parser.get_parsed_address('123 main');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
test('query with two tokens, number second', function (t) {
var address = parser.get_parsed_address('main 123');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
test('query with many tokens', function(t) {
var address = parser.get_parsed_address('main particle new york');
t.equal(address, null, 'nothing address specific detected');
t.end();
});
};
module.exports.tests.parse_address = function(test, common) {
test('valid address, house number', function(t) {
var query_string = '123 main st new york ny';
var address = parser.get_parsed_address(query_string);
t.equal(typeof address, 'object', 'valid object for the address');
t.equal(address.number, '123', 'parsed house number');
t.equal(address.street, 'main st', 'parsed street');
t.deepEqual(address.regions, ['new york'], 'parsed city');
t.equal(address.state , 'NY', 'parsed state');
t.end();
});
test('valid address, zipcode', function(t) {
var query_string = '123 main st new york ny 10010';
var address = parser.get_parsed_address(query_string);
t.equal(typeof address, 'object', 'valid object for the address');
t.equal(address.number, '123', 'parsed house number');
t.equal(address.street, 'main st', 'parsed street');
t.deepEqual(address.regions, ['new york'], 'parsed city');
t.equal(address.state , 'NY', 'parsed state');
t.equal(address.postalcode, '10010', 'parsed zip is a string');
t.end();
});
test('valid address with leading 0s in zipcode', function(t) {
var query_string = '339 W Main St, Cheshire, 06410';
var address = parser.get_parsed_address(query_string);
console.log(address);
t.equal(typeof address, 'object', 'valid object for the address');
t.equal(address.street, 'W Main St', 'parsed street');
t.deepEqual(address.regions, ['Cheshire'], 'parsed city');
t.equal(address.postalcode, '06410', 'parsed zip');
t.end();
});
test('valid address without spaces after commas', function(t) {
var query_string = '339 W Main St,Lancaster,PA';
var address = parser.get_parsed_address(query_string);
t.equal(typeof address, 'object', 'valid object for the address');
t.equal(address.number, '339', 'parsed house number');
t.equal(address.street, 'W Main St', 'parsed street');
t.deepEqual(address.regions, ['Lancaster'], 'parsed city');
t.deepEqual(address.state, 'PA', 'parsed state');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('QUERY PARSING: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

2
test/unit/helper/type_mapping.js

@ -14,7 +14,7 @@ module.exports.tests.interfaces = function(test, common) {
t.deepEquals(type_mapping.layer_mapping.coarse, t.deepEquals(type_mapping.layer_mapping.coarse,
[ 'continent', 'country', 'dependency', 'macroregion', [ 'continent', 'country', 'dependency', 'macroregion',
'region', 'locality', 'localadmin', 'macrocounty', 'county', 'macrohood', 'region', 'locality', 'localadmin', 'macrocounty', 'county', 'macrohood',
'neighbourhood', 'microhood', 'disputed' ]); 'borough', 'neighbourhood', 'microhood', 'disputed' ]);
t.end(); t.end();
}); });

74
test/unit/query/autocomplete.js

@ -1,6 +1,5 @@
var generate = require('../../../query/autocomplete'); var generate = require('../../../query/autocomplete');
var parser = require('../../../helper/text_parser');
module.exports.tests = {}; module.exports.tests = {};
@ -14,25 +13,31 @@ module.exports.tests.interface = function(test, common) {
module.exports.tests.query = function(test, common) { module.exports.tests.query = function(test, common) {
test('valid lingustic-only autocomplete', function(t) { test('valid lingustic-only autocomplete', function(t) {
var query = generate({ var query = generate({
text: 'test' text: 'test',
tokens: ['test'],
tokens_complete: [],
tokens_incomplete: ['test']
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_linguistic_only'); var expected = require('../fixture/autocomplete_linguistic_only');
t.deepEqual(compiled, expected, 'valid autocomplete query'); t.deepEqual(compiled, expected, 'autocomplete_linguistic_only');
t.end(); t.end();
}); });
test('valid lingustic autocomplete with 3 tokens', function(t) { test('valid lingustic autocomplete with 3 tokens', function(t) {
var query = generate({ var query = generate({
text: 'one two three' text: 'one two three',
tokens: ['one','two','three'],
tokens_complete: ['one','two'],
tokens_incomplete: ['three']
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_linguistic_multiple_tokens.js'); var expected = require('../fixture/autocomplete_linguistic_multiple_tokens');
t.deepEqual(compiled, expected, 'valid autocomplete query'); t.deepEqual(compiled, expected, 'autocomplete_linguistic_multiple_tokens');
t.end(); t.end();
}); });
@ -43,13 +48,16 @@ module.exports.tests.query = function(test, common) {
name: 'one two', name: 'one two',
regions: [ 'one two', 'three' ], regions: [ 'one two', 'three' ],
admin_parts: 'three' admin_parts: 'three'
} },
tokens: ['one','two'],
tokens_complete: ['one','two'],
tokens_incomplete: []
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_linguistic_with_admin.js'); var expected = require('../fixture/autocomplete_linguistic_with_admin');
t.deepEqual(compiled, expected, 'valid autocomplete query'); t.deepEqual(compiled, expected, 'autocomplete_linguistic_with_admin');
t.end(); t.end();
}); });
@ -58,13 +66,16 @@ module.exports.tests.query = function(test, common) {
// note: if 1 grams are enabled at a later date, remove this behaviour. // note: if 1 grams are enabled at a later date, remove this behaviour.
test('valid lingustic autocomplete final token', function(t) { test('valid lingustic autocomplete final token', function(t) {
var query = generate({ var query = generate({
text: 'one t' text: 'one t',
tokens: ['one','t'],
tokens_complete: ['one'],
tokens_incomplete: []
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_linguistic_final_token.js'); var expected = require('../fixture/autocomplete_linguistic_final_token');
t.deepEqual(compiled, expected, 'valid autocomplete query'); t.deepEqual(compiled, expected, 'autocomplete_linguistic_final_token');
t.end(); t.end();
}); });
@ -72,13 +83,16 @@ module.exports.tests.query = function(test, common) {
var query = generate({ var query = generate({
text: 'test', text: 'test',
'focus.point.lat': 29.49136, 'focus.point.lat': 29.49136,
'focus.point.lon': -82.50622 'focus.point.lon': -82.50622,
tokens: ['test'],
tokens_complete: [],
tokens_incomplete: ['test']
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_linguistic_focus'); var expected = require('../fixture/autocomplete_linguistic_focus');
t.deepEqual(compiled, expected, 'valid autocomplete query'); t.deepEqual(compiled, expected, 'autocomplete_linguistic_focus');
t.end(); t.end();
}); });
@ -86,20 +100,26 @@ module.exports.tests.query = function(test, common) {
var query = generate({ var query = generate({
text: 'test', text: 'test',
'focus.point.lat': 0, 'focus.point.lat': 0,
'focus.point.lon': 0 'focus.point.lon': 0,
tokens: ['test'],
tokens_complete: [],
tokens_incomplete: ['test']
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_linguistic_focus_null_island'); var expected = require('../fixture/autocomplete_linguistic_focus_null_island');
t.deepEqual(compiled, expected, 'valid autocomplete query'); t.deepEqual(compiled, expected, 'autocomplete_linguistic_focus_null_island');
t.end(); t.end();
}); });
test('valid sources filter', function(t) { test('valid sources filter', function(t) {
var query = generate({ var query = generate({
'text': 'test', 'text': 'test',
'sources': ['test_source'] 'sources': ['test_source'],
tokens: ['test'],
tokens_complete: [],
tokens_incomplete: ['test']
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
@ -108,6 +128,26 @@ module.exports.tests.query = function(test, common) {
t.deepEqual(compiled, expected, 'valid autocomplete query with source filtering'); t.deepEqual(compiled, expected, 'valid autocomplete query with source filtering');
t.end(); t.end();
}); });
test('single character street address', function(t) {
var query = generate({
text: 'k road, laird',
parsed_text: {
name: 'k road',
street: 'k road',
regions: [ 'laird' ]
},
tokens: ['k', 'road'],
tokens_complete: ['k', 'road'],
tokens_incomplete: []
});
var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_single_character_street');
t.deepEqual(compiled, expected, 'autocomplete_single_character_street');
t.end();
});
}; };
module.exports.all = function (tape, common) { module.exports.all = function (tape, common) {

26
test/unit/query/search.js

@ -1,5 +1,5 @@
var generate = require('../../../query/search'); var generate = require('../../../query/search');
var parser = require('../../../helper/text_parser'); var text_analyzer = require('pelias-text-analyzer');
module.exports.tests = {}; module.exports.tests = {};
@ -25,7 +25,7 @@ module.exports.tests.query = function(test, common) {
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/search_linguistic_focus_bbox'); var expected = require('../fixture/search_linguistic_focus_bbox');
t.deepEqual(compiled, expected, 'valid search query'); t.deepEqual(compiled, expected, 'search_linguistic_focus_bbox');
t.end(); t.end();
}); });
@ -42,7 +42,7 @@ module.exports.tests.query = function(test, common) {
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/search_linguistic_bbox'); var expected = require('../fixture/search_linguistic_bbox');
t.deepEqual(compiled, expected, 'valid search query'); t.deepEqual(compiled, expected, 'search_linguistic_bbox');
t.end(); t.end();
}); });
@ -55,7 +55,7 @@ module.exports.tests.query = function(test, common) {
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/search_linguistic_only'); var expected = require('../fixture/search_linguistic_only');
t.deepEqual(compiled, expected, 'valid search query'); t.deepEqual(compiled, expected, 'search_linguistic_only');
t.end(); t.end();
}); });
@ -69,7 +69,7 @@ module.exports.tests.query = function(test, common) {
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/search_linguistic_focus'); var expected = require('../fixture/search_linguistic_focus');
t.deepEqual(compiled, expected, 'valid search query'); t.deepEqual(compiled, expected, 'search_linguistic_focus');
t.end(); t.end();
}); });
@ -86,7 +86,7 @@ module.exports.tests.query = function(test, common) {
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/search_linguistic_viewport'); var expected = require('../fixture/search_linguistic_viewport');
t.deepEqual(compiled, expected, 'valid search query'); t.deepEqual(compiled, expected, 'search_linguistic_viewport');
t.end(); t.end();
}); });
@ -119,7 +119,7 @@ module.exports.tests.query = function(test, common) {
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/search_linguistic_focus_null_island'); var expected = require('../fixture/search_linguistic_focus_null_island');
t.deepEqual(compiled, expected, 'valid search query'); t.deepEqual(compiled, expected, 'search_linguistic_focus_null_island');
t.end(); t.end();
}); });
@ -128,13 +128,13 @@ module.exports.tests.query = function(test, common) {
var query = generate({ text: address, var query = generate({ text: address,
layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ], layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ],
querySize: 10, querySize: 10,
parsed_text: parser.get_parsed_address(address), parsed_text: text_analyzer.parse(address),
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/search_full_address'); var expected = require('../fixture/search_full_address');
t.deepEqual(compiled, expected, 'valid search query'); t.deepEqual(compiled, expected, 'search_full_address');
t.end(); t.end();
}); });
@ -143,13 +143,13 @@ module.exports.tests.query = function(test, common) {
var query = generate({ text: partial_address, var query = generate({ text: partial_address,
layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ], layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ],
querySize: 10, querySize: 10,
parsed_text: parser.get_parsed_address(partial_address), parsed_text: text_analyzer.parse(partial_address),
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/search_partial_address'); var expected = require('../fixture/search_partial_address');
t.deepEqual(compiled, expected, 'valid search query'); t.deepEqual(compiled, expected, 'search_partial_address');
t.end(); t.end();
}); });
@ -158,13 +158,13 @@ module.exports.tests.query = function(test, common) {
var query = generate({ text: partial_address, var query = generate({ text: partial_address,
layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ], layers: [ 'address', 'venue', 'country', 'region', 'county', 'neighbourhood', 'locality', 'localadmin' ],
querySize: 10, querySize: 10,
parsed_text: parser.get_parsed_address(partial_address), parsed_text: text_analyzer.parse(partial_address),
}); });
var compiled = JSON.parse( JSON.stringify( query ) ); var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/search_regions_address'); var expected = require('../fixture/search_regions_address');
t.deepEqual(compiled, expected, 'valid search query'); t.deepEqual(compiled, expected, 'search_regions_address');
t.end(); t.end();
}); });

2
test/unit/run.js

@ -19,7 +19,6 @@ var tests = [
require('./helper/labelGenerator_GBR'), require('./helper/labelGenerator_GBR'),
require('./helper/labelGenerator_USA'), require('./helper/labelGenerator_USA'),
require('./helper/labelSchema'), require('./helper/labelSchema'),
require('./helper/text_parser'),
require('./helper/type_mapping'), require('./helper/type_mapping'),
require('./helper/sizeCalculator'), require('./helper/sizeCalculator'),
require('./middleware/confidenceScore'), require('./middleware/confidenceScore'),
@ -47,6 +46,7 @@ var tests = [
require('./sanitiser/_sources'), require('./sanitiser/_sources'),
require('./sanitiser/_sources_and_layers'), require('./sanitiser/_sources_and_layers'),
require('./sanitiser/_text'), require('./sanitiser/_text'),
require('./sanitiser/_tokenizer'),
require('./sanitiser/_deprecate_quattroshapes'), require('./sanitiser/_deprecate_quattroshapes'),
require('./src/backend'), require('./src/backend'),
require('./sanitiser/autocomplete'), require('./sanitiser/autocomplete'),

10
test/unit/sanitiser/_layers.js

@ -42,8 +42,8 @@ module.exports.tests.sanitize_layers = function(test, common) {
sanitize(raw, clean); sanitize(raw, clean);
var admin_layers = [ 'continent', 'country', 'dependency', var admin_layers = [ 'continent', 'country', 'dependency',
'macroregion', 'region', 'locality', 'localadmin', 'macrocounty', 'county', 'macrohood', 'neighbourhood', 'macroregion', 'region', 'locality', 'localadmin', 'macrocounty', 'county',
'microhood', 'disputed' ]; 'macrohood', 'borough', 'neighbourhood', 'microhood', 'disputed' ];
t.deepEqual(clean.layers, admin_layers, 'coarse layers set'); t.deepEqual(clean.layers, admin_layers, 'coarse layers set');
t.end(); t.end();
@ -77,8 +77,8 @@ module.exports.tests.sanitize_layers = function(test, common) {
sanitize(raw, clean); sanitize(raw, clean);
var expected_layers = [ 'continent', 'country', 'dependency', var expected_layers = [ 'continent', 'country', 'dependency',
'macroregion', 'region', 'locality', 'localadmin', 'macrocounty', 'county', 'macrohood', 'neighbourhood', 'macroregion', 'region', 'locality', 'localadmin', 'macrocounty', 'county',
'microhood', 'disputed' ]; 'macrohood', 'borough', 'neighbourhood', 'microhood', 'disputed' ];
t.deepEqual(clean.layers, expected_layers, 'coarse + regular layers set'); t.deepEqual(clean.layers, expected_layers, 'coarse + regular layers set');
t.end(); t.end();
@ -114,7 +114,7 @@ module.exports.tests.sanitize_layers = function(test, common) {
var coarse_layers = [ 'continent', var coarse_layers = [ 'continent',
'country', 'dependency', 'macroregion', 'region', 'locality', 'localadmin', 'country', 'dependency', 'macroregion', 'region', 'locality', 'localadmin',
'macrocounty', 'county', 'macrohood', 'neighbourhood', 'microhood', 'macrocounty', 'county', 'macrohood', 'borough', 'neighbourhood', 'microhood',
'disputed' ]; 'disputed' ];
var venue_layers = [ 'venue' ]; var venue_layers = [ 'venue' ];

457
test/unit/sanitiser/_tokenizer.js

@ -0,0 +1,457 @@
var sanitiser = require('../../../sanitiser/_tokenizer');
module.exports.tests = {};
module.exports.tests.sanity_checks = function(test, common) {
test('clean.text not set', function(t) {
var clean = {}; // clean.text not set
var messages = sanitiser({}, clean);
// no tokens produced
t.deepEquals(clean.tokens, [], 'no tokens');
t.deepEquals(clean.tokens_complete, [], 'no tokens');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('clean.text not a string', function(t) {
var clean = { text: {} }; // clean.text not a string
var messages = sanitiser({}, clean);
// no tokens produced
t.deepEquals(clean.tokens, [], 'no tokens');
t.deepEquals(clean.tokens_complete, [], 'no tokens');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('empty string', function(t) {
var clean = { text: '' };
var messages = sanitiser({}, clean);
// no tokens produced
t.deepEquals(clean.tokens, [], 'no tokens');
t.deepEquals(clean.tokens_complete, [], 'no tokens');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('clean.parsed_text set but clean.parsed_text.name invalid', function(t) {
var clean = { parsed_text: { text: {} } };
var messages = sanitiser({}, clean);
// no tokens produced
t.deepEquals(clean.tokens, [], 'no tokens');
t.deepEquals(clean.tokens_complete, [], 'no tokens');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('favor clean.parsed_text.name over clean.text', function(t) {
var clean = { parsed_text: { name: 'foo' }, text: 'bar' };
var messages = sanitiser({}, clean);
// favor clean.parsed_text.name over clean.text
t.deepEquals(clean.tokens, [ 'foo' ], 'use clean.parsed_text.name');
t.deepEquals(clean.tokens_complete, [ 'foo' ], 'use clean.parsed_text.name');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('favor clean.parsed_text street data over clean.text', function(t) {
var clean = { parsed_text: { number: '190', street: 'foo st' }, text: 'bar' };
var messages = sanitiser({}, clean);
// favor clean.parsed_text.name over clean.text
t.deepEquals(clean.tokens, [ '190', 'foo', 'st' ], 'use street name + number');
t.deepEquals(clean.tokens_complete, [ '190', 'foo', 'st' ], 'use street name + number');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('favor clean.parsed_text.name over clean.parsed_text street data', function(t) {
var clean = { parsed_text: { number: '190', street: 'foo st', name: 'foo' }, text: 'bar' };
var messages = sanitiser({}, clean);
// favor clean.parsed_text.name over all other variables
t.deepEquals(clean.tokens, [ 'foo' ], 'use clean.parsed_text.name');
t.deepEquals(clean.tokens_complete, [ 'foo' ], 'use clean.parsed_text.name');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
};
module.exports.tests.space_delimiter = function(test, common) {
test('space delimiter - simple', function(t) {
var clean = { text: '30 west 26th street new york' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'30',
'west',
'26th',
'street',
'new',
'york'
], 'tokens produced');
// all but last token marked as 'complete'
t.deepEquals(clean.tokens_complete, [
'30',
'west',
'26th',
'street',
'new'
], 'tokens produced');
// last token marked as 'incomplete'
t.deepEquals(clean.tokens_incomplete, [
'york'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('space delimiter - multiple spaces / other whitespace', function(t) {
var clean = { text: ' 30 west \t26th \nstreet new york ' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'30',
'west',
'26th',
'street',
'new',
'york'
], 'tokens produced');
// all but last token marked as 'complete'
t.deepEquals(clean.tokens_complete, [
'30',
'west',
'26th',
'street',
'new'
], 'tokens produced');
// last token marked as 'incomplete'
t.deepEquals(clean.tokens_incomplete, [
'york'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
};
module.exports.tests.comma_delimiter = function(test, common) {
test('comma delimiter - simple', function(t) {
var clean = { text: '30 west 26th street, new york' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'30',
'west',
'26th',
'street',
'new',
'york'
], 'tokens produced');
// all but last token marked as 'complete'
t.deepEquals(clean.tokens_complete, [
'30',
'west',
'26th',
'street',
'new'
], 'tokens produced');
// last token marked as 'incomplete'
t.deepEquals(clean.tokens_incomplete, [
'york'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('comma delimiter - multiple commas', function(t) {
var clean = { text: ',30 west 26th street,,, new york,' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'30',
'west',
'26th',
'street',
'new',
'york'
], 'tokens produced');
// all but last token marked as 'complete'
t.deepEquals(clean.tokens_complete, [
'30',
'west',
'26th',
'street',
'new'
], 'tokens produced');
// last token marked as 'incomplete'
t.deepEquals(clean.tokens_incomplete, [
'york'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
};
module.exports.tests.forward_slash_delimiter = function(test, common) {
test('forward slash delimiter - simple', function(t) {
var clean = { text: 'Bedell Street/133rd Avenue' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'Bedell',
'Street',
'133rd',
'Avenue'
], 'tokens produced');
// all but last token marked as 'complete'
t.deepEquals(clean.tokens_complete, [
'Bedell',
'Street',
'133rd'
], 'tokens produced');
// last token marked as 'incomplete'
t.deepEquals(clean.tokens_incomplete, [
'Avenue'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('forward slash - multiple slashes', function(t) {
var clean = { text: '/Bedell Street//133rd Avenue/' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'Bedell',
'Street',
'133rd',
'Avenue'
], 'tokens produced');
// all but last token marked as 'complete'
t.deepEquals(clean.tokens_complete, [
'Bedell',
'Street',
'133rd'
], 'tokens produced');
// last token marked as 'incomplete'
t.deepEquals(clean.tokens_incomplete, [
'Avenue'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
};
module.exports.tests.final_token_single_gram = function(test, common) {
test('final token single gram - numeric', function(t) {
var clean = { text: 'grolmanstrasse 1' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'grolmanstrasse',
'1'
], 'tokens produced');
// all but last token marked as 'complete'
t.deepEquals(clean.tokens_complete, [
'grolmanstrasse',
], 'tokens produced');
// last token marked as 'incomplete'
t.deepEquals(clean.tokens_incomplete, [
'1'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('final token single gram - non-numeric', function(t) {
var clean = { text: 'grolmanstrasse a' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'grolmanstrasse',
'a'
], 'tokens produced');
// all but last token marked as 'complete'
t.deepEquals(clean.tokens_complete, [
'grolmanstrasse',
], 'tokens produced');
// last token removed!
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
};
module.exports.tests.back_slash_delimiter = function(test, common) {
test('back slash delimiter - simple', function(t) {
var clean = { text: 'Bedell Street\\133rd Avenue' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'Bedell',
'Street',
'133rd',
'Avenue'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('back slash - multiple slashes', function(t) {
var clean = { text: '\\Bedell Street\\\\133rd Avenue\\' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'Bedell',
'Street',
'133rd',
'Avenue'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
};
module.exports.tests.mixed_delimiter = function(test, common) {
test('mixed delimiters', function(t) {
var clean = { text: ',/Bedell Street\\, \n\t ,\\//133rd Avenue, /\n/' };
var messages = sanitiser({}, clean);
// tokens produced
t.deepEquals(clean.tokens, [
'Bedell',
'Street',
'133rd',
'Avenue'
], 'tokens produced');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('SANITISER _tokenizer: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

5
test/unit/sanitiser/autocomplete.js

@ -4,7 +4,10 @@ module.exports.tests = {};
module.exports.tests.sanitisers = function(test, common) { module.exports.tests.sanitisers = function(test, common) {
test('check sanitiser list', function (t) { test('check sanitiser list', function (t) {
var expected = ['singleScalarParameters', 'text', 'size', 'layers', 'sources', 'sources_and_layers', 'private', 'geo_autocomplete' ]; var expected = [
'singleScalarParameters', 'text', 'tokenizer', 'size', 'layers', 'sources',
'sources_and_layers', 'private', 'geo_autocomplete'
];
t.deepEqual(Object.keys(autocomplete.sanitiser_list), expected); t.deepEqual(Object.keys(autocomplete.sanitiser_list), expected);
t.end(); t.end();
}); });

4
test/unit/sanitiser/search.js

@ -1,6 +1,6 @@
var extend = require('extend'), var extend = require('extend'),
search = require('../../../sanitiser/search'), search = require('../../../sanitiser/search'),
parser = require('../../../helper/text_parser'), text_analyzer = require('pelias-text-analyzer'),
sanitize = search.sanitize, sanitize = search.sanitize,
middleware = search.middleware, middleware = search.middleware,
defaultError = 'invalid param \'text\': text length, must be >0'; defaultError = 'invalid param \'text\': text length, must be >0';
@ -80,7 +80,7 @@ module.exports.tests.sanitize_text_with_delim = function(test, common) {
sanitize( req, function( ){ sanitize( req, function( ){
var expected_text = text; var expected_text = text;
var expected_parsed_text = parser.get_parsed_address(text); var expected_parsed_text = text_analyzer.parse(text);
t.equal(req.errors[0], undefined, 'no error'); t.equal(req.errors[0], undefined, 'no error');
t.equal(req.clean.parsed_text.name, expected_parsed_text.name, 'clean name set correctly'); t.equal(req.clean.parsed_text.name, expected_parsed_text.name, 'clean name set correctly');
t.equal(req.clean.text, expected_text, 'text should match'); t.equal(req.clean.text, expected_text, 'text should match');

Loading…
Cancel
Save