Browse Source

fix(autocomplete): detect the case where input text is unsubstantial

It's possible for the `text` input to /v1/autocomplete to be of non-zero
length after trimming whitespace and quotes, but still be insufficient
to use for geocoding.

One common case is that it contains only commas, slashes, or other
delimiters.

Our query logic currently does not handle this case, and will generate
Elasticsearch queries that do not have a primary `must` clause and end
up searching every document in the index. These queries are slow, take
up cluster resources, and are not useful.

By detecting unsubstantial inputs, we can prevent this.
pull/1214/head
Julian Simioni 6 years ago
parent
commit
b1107a0c8f
No known key found for this signature in database
GPG Key ID: B9EEB0C6EE0910A1
  1. 6
      sanitizer/_tokenizer.js
  2. 35
      test/unit/sanitizer/_tokenizer.js

6
sanitizer/_tokenizer.js

@ -62,6 +62,9 @@ function _sanitize( raw, clean ){
clean.tokens = text clean.tokens = text
.split(/[\s,\\\/]+/) // split on delimeters .split(/[\s,\\\/]+/) // split on delimeters
.filter(function(el){return el;}); // remove empty elements .filter(function(el){return el;}); // remove empty elements
} else {
// text is empty, this sanitizer should be a no-op
return messages;
} }
/** /**
@ -96,6 +99,9 @@ function _sanitize( raw, clean ){
} }
} }
} else {
// set error if no substantial tokens were found
messages.errors.push('invalid `text` input: must contain more than just delimiters');
} }
return messages; return messages;

35
test/unit/sanitizer/_tokenizer.js

@ -51,6 +51,41 @@ module.exports.tests.sanity_checks = function(test, common) {
t.end(); t.end();
}); });
test('just a comma - should error', function(t) {
var clean = { text: ',' };
var messages = sanitizer.sanitize({}, clean);
// no tokens produced
t.deepEquals(clean.tokens, [], 'no tokens');
t.deepEquals(clean.tokens_complete, [], 'no tokens');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// helpful error message
t.deepEquals(messages.errors, ['invalid `text` input: must contain more than just delimiters'], 'error produced');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('several commas - should error', function(t) {
var clean = { text: ',,,\\\/ ,,' };
var messages = sanitizer.sanitize({}, clean);
// no tokens produced
t.deepEquals(clean.tokens, [], 'no tokens');
t.deepEquals(clean.tokens_complete, [], 'no tokens');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// helpful error message
t.deepEquals(messages.errors, ['invalid `text` input: must contain more than just delimiters'], 'error produced');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('clean.parsed_text set but clean.parsed_text.name invalid', function(t) { test('clean.parsed_text set but clean.parsed_text.name invalid', function(t) {
var clean = { parsed_text: { text: {} } }; var clean = { parsed_text: { text: {} } };

Loading…
Cancel
Save