Browse Source

fix(sanitizer): Trim whitespace in addressit queries

This is a followup PR to https://github.com/pelias/api/pull/1171 and
https://github.com/pelias/api/pull/1170.

Apparently we have two different `text` sanitizers, and autocomplete
queries were treating a single space as valid input. This had a
particularly bad outcome as it would end up generating queries (see an
[example](https://gist.github.com/orangejulius/2cc26c7eed39311b6eaf1fb0175c13e6)) that had no main query clause.

This caused them to match basically every document in the index. Looking
at the geocode.earth slowlog, these queries took **__8 seconds per
shard__**.
pull/1211/head
Julian Simioni 6 years ago
parent
commit
7e4559fdc2
No known key found for this signature in database
GPG Key ID: B9EEB0C6EE0910A1
  1. 8
      sanitizer/_text_addressit.js
  2. 14
      test/unit/sanitizer/_text_addressit.js

8
sanitizer/_text_addressit.js

@ -3,6 +3,9 @@ var parser = require('addressit');
var _ = require('lodash'); var _ = require('lodash');
var logger = require('pelias-logger').get('api'); var logger = require('pelias-logger').get('api');
// ref: https://en.wikipedia.org/wiki/Quotation_mark
const QUOTES = `"'«»‘’‚‛“”„‟‹›⹂「」『』〝〞〟﹁﹂﹃﹄"'「」`;
// validate texts, convert types and apply defaults // validate texts, convert types and apply defaults
function _sanitize( raw, clean ){ function _sanitize( raw, clean ){
@ -10,7 +13,8 @@ function _sanitize( raw, clean ){
var messages = { errors: [], warnings: [] }; var messages = { errors: [], warnings: [] };
// invalid input 'text' // invalid input 'text'
if( !check.nonEmptyString( raw.text ) ){ const text = _.trim( _.trim( raw.text ), QUOTES );
if( !check.nonEmptyString( text ) ){
messages.errors.push('invalid param \'text\': text length, must be >0'); messages.errors.push('invalid param \'text\': text length, must be >0');
} }
@ -18,7 +22,7 @@ function _sanitize( raw, clean ){
else { else {
// valid text // valid text
clean.text = raw.text; clean.text = text;
clean.parser = 'addressit'; clean.parser = 'addressit';
// remove anything that may have been parsed before // remove anything that may have been parsed before

14
test/unit/sanitizer/_text_addressit.js

@ -339,6 +339,20 @@ module.exports.tests.text_parser = function(test, common) {
}); });
test('whitespace-only input counts as empty', (t) => {
const raw = { text: ' ' };
const clean = {};
const expected_clean = {};
const messages = sanitizer.sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0']);
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('return an array of expected parameters in object form for validation', (t) => { test('return an array of expected parameters in object form for validation', (t) => {
const expected = [{ name: 'text' }]; const expected = [{ name: 'text' }];
const validParameters = sanitizer.expected(); const validParameters = sanitizer.expected();

Loading…
Cancel
Save