From 30cd30236a804ce706c87ff83554c39817b2f36f Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 5 Jul 2017 08:36:43 -0400 Subject: [PATCH] don't call libpostal if sources=whosonfirst --- sanitizer/_text.js | 26 ++++++++------- test/unit/sanitizer/_text.js | 61 ++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 12 deletions(-) diff --git a/sanitizer/_text.js b/sanitizer/_text.js index 874a9b17..d7d1227b 100644 --- a/sanitizer/_text.js +++ b/sanitizer/_text.js @@ -1,29 +1,31 @@ -var check = require('check-types'), - text_analyzer = require('pelias-text-analyzer'); +const check = require('check-types'); +const text_analyzer = require('pelias-text-analyzer'); +const _ = require('lodash'); // validate texts, convert types and apply defaults function sanitize( raw, clean ){ - // error & warning messages - var messages = { errors: [], warnings: [] }; + const messages = { errors: [], warnings: [] }; // invalid input 'text' // must call `!check.nonEmptyString` since `check.emptyString` returns // `false` for `undefined` and `null` if( !check.nonEmptyString( raw.text ) ){ messages.errors.push('invalid param \'text\': text length, must be >0'); - } - // valid input 'text' - else { - // valid text + } else { clean.text = raw.text; - // parse text with query parser - var parsed_text = text_analyzer.parse(clean.text); - if (check.assigned(parsed_text)) { - clean.parsed_text = parsed_text; + // only call libpostal if there are other sources besides whosonfirst + // since placeholder will take care of it later + if (!_.isEqual(clean.sources, ['whosonfirst'])) { + // parse text with query parser + const parsed_text = text_analyzer.parse(clean.text); + if (check.assigned(parsed_text)) { + clean.parsed_text = parsed_text; + } } + } return messages; diff --git a/test/unit/sanitizer/_text.js b/test/unit/sanitizer/_text.js index c29f0a1d..a8a46137 100644 --- a/test/unit/sanitizer/_text.js +++ b/test/unit/sanitizer/_text.js @@ -142,6 +142,67 @@ module.exports.tests.text_parser = function(test, common) { }); + test('sources=whosonfirst should not call text_analyzer and set clean.text from raw.text', (t) => { + const sanitizer = proxyquire('../../../sanitizer/_text', { + 'pelias-text-analyzer': { parse: query => t.fail('should not have been called') } + }); + + const raw = { + text: 'raw clean.text' + }; + const clean = { + sources: ['whosonfirst'], + text: 'original clean.text' + }; + + const expected_clean = { + sources: ['whosonfirst'], + text: 'raw clean.text' + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages, { errors: [], warnings: [] }); + t.end(); + + }); + + test('sources with whosonfirst + others should call analyzer', (t) => { + const sanitizer = proxyquire('../../../sanitizer/_text', { + 'pelias-text-analyzer': { parse: function(query) { + return { + key1: 'value 1', + key2: 'value 2' + }; + } + }}); + + const raw = { + text: 'raw text' + }; + const clean = { + sources: ['whosonfirst', 'another source'], + text: 'clean text' + }; + + const expected_clean = { + sources: ['whosonfirst', 'another source'], + text: 'raw text', + parsed_text: { + key1: 'value 1', + key2: 'value 2' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages, { errors: [], warnings: [] }); + t.end(); + + }); + }; module.exports.all = function (tape, common) {