From d38d4b1fa85a8c876415e08b57626bd2b9656973 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Tue, 26 Jun 2018 13:48:50 +0200 Subject: [PATCH] feat(text_sanitizer): trim whitespace and quotation marks from a range of natural languages --- sanitizer/_text.js | 6 ++- test/unit/sanitizer/_text.js | 74 +++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/sanitizer/_text.js b/sanitizer/_text.js index 1df63411..cb4dc737 100644 --- a/sanitizer/_text.js +++ b/sanitizer/_text.js @@ -1,6 +1,9 @@ const check = require('check-types'); const _ = require('lodash'); +// ref: https://en.wikipedia.org/wiki/Quotation_mark +const QUOTES = `"'«»‘’‚‛“”„‟‹›⹂「」『』〝〞〟﹁﹂﹃﹄"'「」`; + // validate texts, convert types and apply defaults function _sanitize( raw, clean ){ @@ -14,8 +17,7 @@ function _sanitize( raw, clean ){ messages.errors.push('invalid param \'text\': text length, must be >0'); } else { - clean.text = raw.text; - + clean.text = _.trim( _.trim( raw.text ), QUOTES ); } return messages; diff --git a/test/unit/sanitizer/_text.js b/test/unit/sanitizer/_text.js index 1fde352b..7a35fbbe 100644 --- a/test/unit/sanitizer/_text.js +++ b/test/unit/sanitizer/_text.js @@ -37,7 +37,7 @@ module.exports.tests.text_parser = function(test, common) { const messages = sanitizer.sanitize(raw, clean); t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0'], 'no errors'); + t.deepEquals(messages.errors, ['invalid param \'text\': text length, must be >0']); t.deepEquals(messages.warnings, [], 'no warnings'); }); @@ -46,6 +46,78 @@ module.exports.tests.text_parser = function(test, common) { }); + test('should trim whitespace', t => { + var clean = {}; + var raw = { text: ` test \n ` }; + const messages = sanitizer.sanitize(raw, clean); + + t.equals(clean.text, 'test'); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + + t.end(); + }); + + test('should trim double quotes', t => { + var clean = {}; + var raw = { text: ` "test" \n ` }; + const messages = sanitizer.sanitize(raw, clean); + + t.equals(clean.text, 'test'); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + + t.end(); + }); + + test('should trim single quotes', t => { + var clean = {}; + var raw = { text: ` 'test' \n ` }; + const messages = sanitizer.sanitize(raw, clean); + + t.equals(clean.text, 'test'); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + + t.end(); + }); + + test('should trim German quotes', t => { + var clean = {}; + var raw = { text: ` „test“ \n ` }; + const messages = sanitizer.sanitize(raw, clean); + + t.equals(clean.text, 'test'); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + + t.end(); + }); + + test('should trim guillemets', t => { + var clean = {}; + var raw = { text: ` »test« \n ` }; + const messages = sanitizer.sanitize(raw, clean); + + t.equals(clean.text, 'test'); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + + t.end(); + }); + + test('should trim Chinese quotes', t => { + var clean = {}; + var raw = { text: ` ﹁「test」﹂ \n ` }; + const messages = sanitizer.sanitize(raw, clean); + + t.equals(clean.text, 'test'); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + + t.end(); + }); + test('return an array of expected parameters in object form for validation', (t) => { const expected = [{ name: 'text' }]; const validParameters = sanitizer.expected();