From 35ab0503414e89bf03d2e9af6a0453f2b7fe6466 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Mon, 12 Dec 2016 17:03:19 -0500 Subject: [PATCH 1/5] transliterate mt/ft/saint/sainte into mount/fort/st/ste respectively --- sanitizer/_mount_saint_fort_standardizer.js | 31 +++ sanitizer/search.js | 1 + sanitizer/structured_geocoding.js | 1 + test/unit/run.js | 1 + .../_mount_saint_fort_standardizer.js | 237 ++++++++++++++++++ test/unit/sanitizer/search.js | 5 + test/unit/sanitizer/structured_geocoding.js | 5 + 7 files changed, 281 insertions(+) create mode 100644 sanitizer/_mount_saint_fort_standardizer.js create mode 100644 test/unit/sanitizer/_mount_saint_fort_standardizer.js diff --git a/sanitizer/_mount_saint_fort_standardizer.js b/sanitizer/_mount_saint_fort_standardizer.js new file mode 100644 index 00000000..1ee6c59a --- /dev/null +++ b/sanitizer/_mount_saint_fort_standardizer.js @@ -0,0 +1,31 @@ +const _ = require('lodash'); + +// matches 'ft', 'mt', 'saint', and 'sainte' on word boundary +const mountSaintFort = /\b([fm]t|sainte?)\b/g; + +const translations = { + 'mt': 'mount', + 'ft': 'fort', + 'saint': 'st', + 'sainte': 'ste' +}; + +function translate(match) { + return _.get(translations, match); +} + +function sanitize(raw, clean) { + // error & warning messages + // this function doesn't add any error or warning messages + const messages = { errors: [], warnings: [] }; + + if (!_.isEmpty(_.get(clean, 'parsed_text.city'))) { + // replace ft/mt/saint/sainte with fort/mount/st/ste, respectively + clean.parsed_text.city = _.toLower(clean.parsed_text.city.replace(/\./g, '')).replace(mountSaintFort, translate); + } + + return messages; + +} + +module.exports = sanitize; diff --git a/sanitizer/search.js b/sanitizer/search.js index d99a926e..48418b7f 100644 --- a/sanitizer/search.js +++ b/sanitizer/search.js @@ -6,6 +6,7 @@ var sanitizeAll = require('../sanitizer/sanitizeAll'), quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'), text: require('../sanitizer/_text'), iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'), + mount_saint_fort_standardizer: require('../sanitizer/_mount_saint_fort_standardizer'), size: require('../sanitizer/_size')(/* use defaults*/), layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping), sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping), diff --git a/sanitizer/structured_geocoding.js b/sanitizer/structured_geocoding.js index ebd55a56..0edc6b11 100644 --- a/sanitizer/structured_geocoding.js +++ b/sanitizer/structured_geocoding.js @@ -6,6 +6,7 @@ var sanitizeAll = require('../sanitizer/sanitizeAll'), quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'), synthesize_analysis: require('../sanitizer/_synthesize_analysis'), iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'), + mount_saint_fort_standardizer: require('../sanitizer/_mount_saint_fort_standardizer'), size: require('../sanitizer/_size')(/* use defaults*/), layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping), sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping), diff --git a/test/unit/run.js b/test/unit/run.js index a3109585..5ca4c9c0 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -48,6 +48,7 @@ var tests = [ require('./sanitizer/_ids'), require('./sanitizer/_iso2_to_iso3'), require('./sanitizer/_layers'), + require('./sanitizer/_mount_saint_fort_standardizer'), require('./sanitizer/_single_scalar_parameters'), require('./sanitizer/_size'), require('./sanitizer/_sources'), diff --git a/test/unit/sanitizer/_mount_saint_fort_standardizer.js b/test/unit/sanitizer/_mount_saint_fort_standardizer.js new file mode 100644 index 00000000..7d069257 --- /dev/null +++ b/test/unit/sanitizer/_mount_saint_fort_standardizer.js @@ -0,0 +1,237 @@ +const _ = require('lodash'); +const sanitizer = require('../../../sanitizer/_mount_saint_fort_standardizer'); + +module.exports.tests = {}; + +module.exports.tests.text_parser = function(test, common) { + test('clean without parsed_text should not throw exception', function(t) { + const raw = {}; + + const clean = { + }; + + const expected_clean = { + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('undefined parsed_text.city should be unchanged', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + address: 'address value', + city: undefined + } + }; + + const expected_clean = { + parsed_text: { + address: 'address value', + city: undefined + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('\'saint\' should be abbreviated to \'st\' wherever it appears in the city', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + query: 'saint query value', + neighbourhood: 'saint neighbourhood value', + borough: 'saint borough value', + city: 'saint city saint value saint', + county: 'saint county value', + state: 'saint state value', + postalcode: 'saint postalcode value', + country: 'saint country value' + } + }; + + const expected_clean = { + parsed_text: { + query: 'saint query value', + neighbourhood: 'saint neighbourhood value', + borough: 'saint borough value', + city: 'st city st value st', + county: 'saint county value', + state: 'saint state value', + postalcode: 'saint postalcode value', + country: 'saint country value' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('\'sainte\' should be abbreviated to \'ste\' wherever it appears in the city', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + query: 'sainte query value', + neighbourhood: 'sainte neighbourhood value', + borough: 'sainte borough value', + city: 'sainte city sainte value sainte', + county: 'sainte county value', + state: 'sainte state value', + postalcode: 'sainte postalcode value', + country: 'sainte country value' + } + }; + + const expected_clean = { + parsed_text: { + query: 'sainte query value', + neighbourhood: 'sainte neighbourhood value', + borough: 'sainte borough value', + city: 'ste city ste value ste', + county: 'sainte county value', + state: 'sainte state value', + postalcode: 'sainte postalcode value', + country: 'sainte country value' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('\'ft\' should be expanded to \'fort\' wherever it appears in the city', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + query: 'ft query value', + neighbourhood: 'ft neighbourhood value', + borough: 'ft borough value', + city: 'ft city ft value ft', + county: 'ft county value', + state: 'ft state value', + postalcode: 'ft postalcode value', + country: 'ft country value' + } + }; + + const expected_clean = { + parsed_text: { + query: 'ft query value', + neighbourhood: 'ft neighbourhood value', + borough: 'ft borough value', + city: 'fort city fort value fort', + county: 'ft county value', + state: 'ft state value', + postalcode: 'ft postalcode value', + country: 'ft country value' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('\'mt\' should be expanded to \'mount\' wherever it appears in the city', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + query: 'mt query value', + neighbourhood: 'mt neighbourhood value', + borough: 'mt borough value', + city: 'mt city mt value mt', + county: 'mt county value', + state: 'mt state value', + postalcode: 'mt postalcode value', + country: 'mt country value' + } + }; + + const expected_clean = { + parsed_text: { + query: 'mt query value', + neighbourhood: 'mt neighbourhood value', + borough: 'mt borough value', + city: 'mount city mount value mount', + county: 'mt county value', + state: 'mt state value', + postalcode: 'mt postalcode value', + country: 'mt country value' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('mixture of \'mt\', \'ft\', \'saint\', and \'sainte\' should be expanded/abbreviated', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + city: 'mt. ft. saint sainte' + } + }; + + const expected_clean = { + parsed_text: { + city: 'mount fort st ste' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('sanitizer _mount_saint_fort_standardizer: ' + name, testFunction); + } + + for( const testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/sanitizer/search.js b/test/unit/sanitizer/search.js index e2c01f8d..5844947f 100644 --- a/test/unit/sanitizer/search.js +++ b/test/unit/sanitizer/search.js @@ -25,6 +25,10 @@ module.exports.tests.sanitize = function(test, common) { called_sanitizers.push('_iso2_to_iso3'); return { errors: [], warnings: [] }; }, + '../sanitizer/_mount_saint_fort_standardizer': function() { + called_sanitizers.push('_mount_saint_fort_standardizer'); + return { errors: [], warnings: [] }; + }, '../sanitizer/_size': function() { if (arguments.length === 0) { return function() { @@ -86,6 +90,7 @@ module.exports.tests.sanitize = function(test, common) { '_deprecate_quattroshapes', '_text', '_iso2_to_iso3', + '_mount_saint_fort_standardizer', '_size', '_targets/layers', '_targets/sources', diff --git a/test/unit/sanitizer/structured_geocoding.js b/test/unit/sanitizer/structured_geocoding.js index ef9711b3..1a2295c0 100644 --- a/test/unit/sanitizer/structured_geocoding.js +++ b/test/unit/sanitizer/structured_geocoding.js @@ -25,6 +25,10 @@ module.exports.tests.sanitize = function(test, common) { called_sanitizers.push('_iso2_to_iso3'); return { errors: [], warnings: [] }; }, + '../sanitizer/_mount_saint_fort_standardizer': function() { + called_sanitizers.push('_mount_saint_fort_standardizer'); + return { errors: [], warnings: [] }; + }, '../sanitizer/_size': function() { if (arguments.length === 0) { return function() { @@ -86,6 +90,7 @@ module.exports.tests.sanitize = function(test, common) { '_deprecate_quattroshapes', '_synthesize_analysis', '_iso2_to_iso3', + '_mount_saint_fort_standardizer', '_size', '_targets/layers', '_targets/sources', From 8f6d820b430d8b42493a7201afe4c2e7d9163711 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Tue, 13 Dec 2016 00:29:37 -0500 Subject: [PATCH 2/5] streamlined standardizer more accurate since it's smarter about where to replace periods --- sanitizer/_mount_saint_fort_standardizer.js | 33 +++++++--- .../_mount_saint_fort_standardizer.js | 60 +++++++++++++++++-- 2 files changed, 79 insertions(+), 14 deletions(-) diff --git a/sanitizer/_mount_saint_fort_standardizer.js b/sanitizer/_mount_saint_fort_standardizer.js index 1ee6c59a..72e8aa10 100644 --- a/sanitizer/_mount_saint_fort_standardizer.js +++ b/sanitizer/_mount_saint_fort_standardizer.js @@ -3,25 +3,42 @@ const _ = require('lodash'); // matches 'ft', 'mt', 'saint', and 'sainte' on word boundary const mountSaintFort = /\b([fm]t|sainte?)\b/g; -const translations = { - 'mt': 'mount', - 'ft': 'fort', - 'saint': 'st', +const transliterations = { + 'mt': 'mount', + 'ft': 'fort', + 'saint': 'st', 'sainte': 'ste' }; -function translate(match) { - return _.get(translations, match); +function transliterate(match) { + return _.get(transliterations, match); } +// transliterate ft/mt/saint/sainte to fort/mount/st/ste, respectively function sanitize(raw, clean) { // error & warning messages // this function doesn't add any error or warning messages const messages = { errors: [], warnings: [] }; + // only try to transliterate if there is a city in parsed_text if (!_.isEmpty(_.get(clean, 'parsed_text.city'))) { - // replace ft/mt/saint/sainte with fort/mount/st/ste, respectively - clean.parsed_text.city = _.toLower(clean.parsed_text.city.replace(/\./g, '')).replace(mountSaintFort, translate); + // eg input: Ft. Saint Louis + // after 1. ft saint louis + // after 2. fort st louis + // after 3. fort st louis + + // 1. remove '.' that could abbreviate ft and mt (makes transliteration regex easier) + const periods_removed = _.toLower(clean.parsed_text.city).replace(/\b(mt|ft)\./g, '$1 '); + + // 2. transliterate 'saint'->'st', etc + const transliterated = periods_removed.replace(mountSaintFort, transliterate); + + // 3. whitespace-normalize by replacing much whitespace with a space and trimming + // duplicate whitespace can be introduced when removing periods + const whitespace_normalized = _.trimEnd(transliterated.replace(/\s+/, ' ')); + + clean.parsed_text.city = whitespace_normalized; + } return messages; diff --git a/test/unit/sanitizer/_mount_saint_fort_standardizer.js b/test/unit/sanitizer/_mount_saint_fort_standardizer.js index 7d069257..54e68fea 100644 --- a/test/unit/sanitizer/_mount_saint_fort_standardizer.js +++ b/test/unit/sanitizer/_mount_saint_fort_standardizer.js @@ -56,7 +56,7 @@ module.exports.tests.text_parser = function(test, common) { query: 'saint query value', neighbourhood: 'saint neighbourhood value', borough: 'saint borough value', - city: 'saint city saint value saint', + city: 'SainT city sAiNt value saInt', county: 'saint county value', state: 'saint state value', postalcode: 'saint postalcode value', @@ -94,7 +94,7 @@ module.exports.tests.text_parser = function(test, common) { query: 'sainte query value', neighbourhood: 'sainte neighbourhood value', borough: 'sainte borough value', - city: 'sainte city sainte value sainte', + city: 'SaintE city sAinTe value saINte', county: 'sainte county value', state: 'sainte state value', postalcode: 'sainte postalcode value', @@ -132,7 +132,7 @@ module.exports.tests.text_parser = function(test, common) { query: 'ft query value', neighbourhood: 'ft neighbourhood value', borough: 'ft borough value', - city: 'ft city ft value ft', + city: 'Ft city ft value fT', county: 'ft county value', state: 'ft state value', postalcode: 'ft postalcode value', @@ -170,7 +170,7 @@ module.exports.tests.text_parser = function(test, common) { query: 'mt query value', neighbourhood: 'mt neighbourhood value', borough: 'mt borough value', - city: 'mt city mt value mt', + city: 'Mt city mt value mT', county: 'mt county value', state: 'mt state value', postalcode: 'mt postalcode value', @@ -205,13 +205,61 @@ module.exports.tests.text_parser = function(test, common) { const clean = { parsed_text: { - city: 'mt. ft. saint sainte' + city: 'mt. ft saint sainte mt ft.' } }; const expected_clean = { parsed_text: { - city: 'mount fort st ste' + city: 'mount fort st ste mount fort' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('period word boundary on \'mt.\' should replace with a space', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + city: 'mt.city' + } + }; + + const expected_clean = { + parsed_text: { + city: 'mount city' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('period word boundary on \'ft.\' should replace with a space', function(t) { + const raw = {}; + + const clean = { + parsed_text: { + city: 'ft.city' + } + }; + + const expected_clean = { + parsed_text: { + city: 'fort city' } }; From 183f824577100f535f445045fbb978e2d4dd47ce Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 14 Dec 2016 14:51:32 -0500 Subject: [PATCH 3/5] fix comments grammar --- sanitizer/_mount_saint_fort_standardizer.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sanitizer/_mount_saint_fort_standardizer.js b/sanitizer/_mount_saint_fort_standardizer.js index 72e8aa10..3bcf817d 100644 --- a/sanitizer/_mount_saint_fort_standardizer.js +++ b/sanitizer/_mount_saint_fort_standardizer.js @@ -33,8 +33,7 @@ function sanitize(raw, clean) { // 2. transliterate 'saint'->'st', etc const transliterated = periods_removed.replace(mountSaintFort, transliterate); - // 3. whitespace-normalize by replacing much whitespace with a space and trimming - // duplicate whitespace can be introduced when removing periods + // 3. reduce whitespace sequences that can occur when removing periods down to a single space const whitespace_normalized = _.trimEnd(transliterated.replace(/\s+/, ' ')); clean.parsed_text.city = whitespace_normalized; From 9e569bae17cf3c2114dc64f02fc6ee4c1f043ca1 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 14 Dec 2016 14:57:09 -0500 Subject: [PATCH 4/5] renamed mt/st/ft sanitizer to something more generic --- ...aint_fort_standardizer.js => _city_name_standardizer.js} | 0 sanitizer/search.js | 2 +- sanitizer/structured_geocoding.js | 2 +- test/unit/run.js | 2 +- ...aint_fort_standardizer.js => _city_name_standardizer.js} | 4 ++-- test/unit/sanitizer/search.js | 6 +++--- test/unit/sanitizer/structured_geocoding.js | 6 +++--- 7 files changed, 11 insertions(+), 11 deletions(-) rename sanitizer/{_mount_saint_fort_standardizer.js => _city_name_standardizer.js} (100%) rename test/unit/sanitizer/{_mount_saint_fort_standardizer.js => _city_name_standardizer.js} (97%) diff --git a/sanitizer/_mount_saint_fort_standardizer.js b/sanitizer/_city_name_standardizer.js similarity index 100% rename from sanitizer/_mount_saint_fort_standardizer.js rename to sanitizer/_city_name_standardizer.js diff --git a/sanitizer/search.js b/sanitizer/search.js index 48418b7f..5694c9eb 100644 --- a/sanitizer/search.js +++ b/sanitizer/search.js @@ -6,7 +6,7 @@ var sanitizeAll = require('../sanitizer/sanitizeAll'), quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'), text: require('../sanitizer/_text'), iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'), - mount_saint_fort_standardizer: require('../sanitizer/_mount_saint_fort_standardizer'), + city_name_standardizer: require('../sanitizer/_city_name_standardizer'), size: require('../sanitizer/_size')(/* use defaults*/), layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping), sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping), diff --git a/sanitizer/structured_geocoding.js b/sanitizer/structured_geocoding.js index 0edc6b11..29edce41 100644 --- a/sanitizer/structured_geocoding.js +++ b/sanitizer/structured_geocoding.js @@ -6,7 +6,7 @@ var sanitizeAll = require('../sanitizer/sanitizeAll'), quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'), synthesize_analysis: require('../sanitizer/_synthesize_analysis'), iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'), - mount_saint_fort_standardizer: require('../sanitizer/_mount_saint_fort_standardizer'), + city_name_standardizer: require('../sanitizer/_city_name_standardizer'), size: require('../sanitizer/_size')(/* use defaults*/), layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping), sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping), diff --git a/test/unit/run.js b/test/unit/run.js index 5ca4c9c0..d21890e7 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -48,7 +48,7 @@ var tests = [ require('./sanitizer/_ids'), require('./sanitizer/_iso2_to_iso3'), require('./sanitizer/_layers'), - require('./sanitizer/_mount_saint_fort_standardizer'), + require('./sanitizer/_city_name_standardizer'), require('./sanitizer/_single_scalar_parameters'), require('./sanitizer/_size'), require('./sanitizer/_sources'), diff --git a/test/unit/sanitizer/_mount_saint_fort_standardizer.js b/test/unit/sanitizer/_city_name_standardizer.js similarity index 97% rename from test/unit/sanitizer/_mount_saint_fort_standardizer.js rename to test/unit/sanitizer/_city_name_standardizer.js index 54e68fea..da362868 100644 --- a/test/unit/sanitizer/_mount_saint_fort_standardizer.js +++ b/test/unit/sanitizer/_city_name_standardizer.js @@ -1,5 +1,5 @@ const _ = require('lodash'); -const sanitizer = require('../../../sanitizer/_mount_saint_fort_standardizer'); +const sanitizer = require('../../../sanitizer/_city_name_standardizer'); module.exports.tests = {}; @@ -276,7 +276,7 @@ module.exports.tests.text_parser = function(test, common) { module.exports.all = function (tape, common) { function test(name, testFunction) { - return tape('sanitizer _mount_saint_fort_standardizer: ' + name, testFunction); + return tape('sanitizer _city_name_standardizer: ' + name, testFunction); } for( const testCase in module.exports.tests ){ diff --git a/test/unit/sanitizer/search.js b/test/unit/sanitizer/search.js index 5844947f..0d44103d 100644 --- a/test/unit/sanitizer/search.js +++ b/test/unit/sanitizer/search.js @@ -25,8 +25,8 @@ module.exports.tests.sanitize = function(test, common) { called_sanitizers.push('_iso2_to_iso3'); return { errors: [], warnings: [] }; }, - '../sanitizer/_mount_saint_fort_standardizer': function() { - called_sanitizers.push('_mount_saint_fort_standardizer'); + '../sanitizer/_city_name_standardizer': function() { + called_sanitizers.push('_city_name_standardizer'); return { errors: [], warnings: [] }; }, '../sanitizer/_size': function() { @@ -90,7 +90,7 @@ module.exports.tests.sanitize = function(test, common) { '_deprecate_quattroshapes', '_text', '_iso2_to_iso3', - '_mount_saint_fort_standardizer', + '_city_name_standardizer', '_size', '_targets/layers', '_targets/sources', diff --git a/test/unit/sanitizer/structured_geocoding.js b/test/unit/sanitizer/structured_geocoding.js index 1a2295c0..133be462 100644 --- a/test/unit/sanitizer/structured_geocoding.js +++ b/test/unit/sanitizer/structured_geocoding.js @@ -25,8 +25,8 @@ module.exports.tests.sanitize = function(test, common) { called_sanitizers.push('_iso2_to_iso3'); return { errors: [], warnings: [] }; }, - '../sanitizer/_mount_saint_fort_standardizer': function() { - called_sanitizers.push('_mount_saint_fort_standardizer'); + '../sanitizer/_city_name_standardizer': function() { + called_sanitizers.push('_city_name_standardizer'); return { errors: [], warnings: [] }; }, '../sanitizer/_size': function() { @@ -90,7 +90,7 @@ module.exports.tests.sanitize = function(test, common) { '_deprecate_quattroshapes', '_synthesize_analysis', '_iso2_to_iso3', - '_mount_saint_fort_standardizer', + '_city_name_standardizer', '_size', '_targets/layers', '_targets/sources', From 4f999adce1b36061cb3a4565e1497b1409fe341c Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Tue, 27 Dec 2016 12:32:03 -0500 Subject: [PATCH 5/5] fix: empty values in parsed text caused exceptions --- query/text_parser.js | 25 ++++++++++---------- test/unit/query/text_parser.js | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/query/text_parser.js b/query/text_parser.js index 4ccc8661..fd07cd0d 100644 --- a/query/text_parser.js +++ b/query/text_parser.js @@ -1,67 +1,68 @@ var logger = require('pelias-logger').get('api'); +var _ = require('lodash'); // all the address parsing logic function addParsedVariablesToQueryVariables( parsed_text, vs ){ // ==== add parsed matches [address components] ==== // query - Mexitaly, Sunoco, Lowes - if (parsed_text.hasOwnProperty('query')) { + if ( ! _.isEmpty(parsed_text.query) ) { vs.var('input:query', parsed_text.query); } // categories - restaurants, hotels, bars - if (parsed_text.hasOwnProperty('category')) { + if ( ! _.isEmpty(parsed_text.category) ) { vs.var('input:category', parsed_text.category); } - if (parsed_text.hasOwnProperty('address')) { + if ( ! _.isEmpty(parsed_text.address) ) { vs.var( 'input:address', parsed_text.address ); } // house number - if( parsed_text.hasOwnProperty('number') ){ + if( ! _.isEmpty(parsed_text.number) ){ vs.var( 'input:housenumber', parsed_text.number ); } // street name - if( parsed_text.hasOwnProperty('street') ){ + if( ! _.isEmpty(parsed_text.street) ){ vs.var( 'input:street', parsed_text.street ); } // neighbourhood - if (parsed_text.hasOwnProperty('neighbourhood')) { + if ( ! _.isEmpty(parsed_text.neighbourhood) ) { vs.var( 'input:neighbourhood', parsed_text.neighbourhood); } // borough - if (parsed_text.hasOwnProperty('borough')) { + if ( ! _.isEmpty(parsed_text.borough) ) { vs.var( 'input:borough', parsed_text.borough); } // postal code - if( parsed_text.hasOwnProperty('postalcode') ){ + if( ! _.isEmpty(parsed_text.postalcode) ){ vs.var( 'input:postcode', parsed_text.postalcode ); } // ==== add parsed matches [admin components] ==== // city - if( parsed_text.hasOwnProperty('city') ){ + if( ! _.isEmpty(parsed_text.city) ){ vs.var( 'input:locality', parsed_text.city ); } // county - if( parsed_text.hasOwnProperty('county') ){ + if( ! _.isEmpty(parsed_text.county) ){ vs.var( 'input:county', parsed_text.county ); } // state - if( parsed_text.hasOwnProperty('state') ){ + if( ! _.isEmpty(parsed_text.state) ){ vs.var( 'input:region', parsed_text.state ); } // country - if( parsed_text.hasOwnProperty('country') ){ + if( ! _.isEmpty(parsed_text.country) ){ vs.var( 'input:country', parsed_text.country ); } diff --git a/test/unit/query/text_parser.js b/test/unit/query/text_parser.js index 839ddfbd..86d443fc 100644 --- a/test/unit/query/text_parser.js +++ b/test/unit/query/text_parser.js @@ -157,6 +157,48 @@ module.exports.tests.housenumber_special_cases = function(test, common) { }; +module.exports.tests.empty_values = function(test, common) { + test('empty string values not set', function (t) { + var parsed_text = { + query: '', + category: '', + number: '', + street: '', + address: '', + neighbourhood: '', + borough: '', + postalcode: '', + city: '', + county: '', + state: '', + country: '' + }; + var vs = new VariableStore(); + + function testIt() { + text_parser(parsed_text, vs); + } + + t.doesNotThrow(testIt, 'exception should not be thrown'); + + t.false(vs.isset('input:query')); + t.false(vs.isset('input:category')); + t.false(vs.isset('input:housenumber')); + t.false(vs.isset('input:street')); + t.false(vs.isset('input:address')); + t.false(vs.isset('input:neighbourhood')); + t.false(vs.isset('input:borough')); + t.false(vs.isset('input:postcode')); + t.false(vs.isset('input:locality')); + t.false(vs.isset('input:county')); + t.false(vs.isset('input:region')); + t.false(vs.isset('input:country')); + t.end(); + + }); + +}; + module.exports.all = function (tape, common) { function test(name, testFunction) { return tape('text_parser ' + name, testFunction);