Browse Source

Merge pull request #1214 from pelias/fix-autocomplete-tokenization

fix(autocomplete): detect the case where input text is unsubstantial
libpostal_aus_unit_numbers
Julian Simioni 6 years ago committed by GitHub
parent
commit
a982eab215
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 6
      sanitizer/_tokenizer.js
  2. 35
      test/unit/sanitizer/_tokenizer.js

6
sanitizer/_tokenizer.js

@ -62,6 +62,9 @@ function _sanitize( raw, clean ){
clean.tokens = text clean.tokens = text
.split(/[\s,\\\/]+/) // split on delimeters .split(/[\s,\\\/]+/) // split on delimeters
.filter(function(el){return el;}); // remove empty elements .filter(function(el){return el;}); // remove empty elements
} else {
// text is empty, this sanitizer should be a no-op
return messages;
} }
/** /**
@ -96,6 +99,9 @@ function _sanitize( raw, clean ){
} }
} }
} else {
// set error if no substantial tokens were found
messages.errors.push('invalid `text` input: must contain more than just delimiters');
} }
return messages; return messages;

35
test/unit/sanitizer/_tokenizer.js

@ -51,6 +51,41 @@ module.exports.tests.sanity_checks = function(test, common) {
t.end(); t.end();
}); });
test('just a comma - should error', function(t) {
var clean = { text: ',' };
var messages = sanitizer.sanitize({}, clean);
// no tokens produced
t.deepEquals(clean.tokens, [], 'no tokens');
t.deepEquals(clean.tokens_complete, [], 'no tokens');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// helpful error message
t.deepEquals(messages.errors, ['invalid `text` input: must contain more than just delimiters'], 'error produced');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('several commas - should error', function(t) {
var clean = { text: ',,,\\\/ ,,' };
var messages = sanitizer.sanitize({}, clean);
// no tokens produced
t.deepEquals(clean.tokens, [], 'no tokens');
t.deepEquals(clean.tokens_complete, [], 'no tokens');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// helpful error message
t.deepEquals(messages.errors, ['invalid `text` input: must contain more than just delimiters'], 'error produced');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('clean.parsed_text set but clean.parsed_text.name invalid', function(t) { test('clean.parsed_text set but clean.parsed_text.name invalid', function(t) {
var clean = { parsed_text: { text: {} } }; var clean = { parsed_text: { text: {} } };

Loading…
Cancel
Save