Browse Source

transliterate mt/ft/saint/sainte into mount/fort/st/ste

respectively
pull/767/head
Stephen Hess 8 years ago
parent
commit
35ab050341
  1. 31
      sanitizer/_mount_saint_fort_standardizer.js
  2. 1
      sanitizer/search.js
  3. 1
      sanitizer/structured_geocoding.js
  4. 1
      test/unit/run.js
  5. 237
      test/unit/sanitizer/_mount_saint_fort_standardizer.js
  6. 5
      test/unit/sanitizer/search.js
  7. 5
      test/unit/sanitizer/structured_geocoding.js

31
sanitizer/_mount_saint_fort_standardizer.js

@ -0,0 +1,31 @@
const _ = require('lodash');
// matches 'ft', 'mt', 'saint', and 'sainte' on word boundary
const mountSaintFort = /\b([fm]t|sainte?)\b/g;
const translations = {
'mt': 'mount',
'ft': 'fort',
'saint': 'st',
'sainte': 'ste'
};
function translate(match) {
return _.get(translations, match);
}
function sanitize(raw, clean) {
// error & warning messages
// this function doesn't add any error or warning messages
const messages = { errors: [], warnings: [] };
if (!_.isEmpty(_.get(clean, 'parsed_text.city'))) {
// replace ft/mt/saint/sainte with fort/mount/st/ste, respectively
clean.parsed_text.city = _.toLower(clean.parsed_text.city.replace(/\./g, '')).replace(mountSaintFort, translate);
}
return messages;
}
module.exports = sanitize;

1
sanitizer/search.js

@ -6,6 +6,7 @@ var sanitizeAll = require('../sanitizer/sanitizeAll'),
quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'), quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'),
text: require('../sanitizer/_text'), text: require('../sanitizer/_text'),
iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'), iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'),
mount_saint_fort_standardizer: require('../sanitizer/_mount_saint_fort_standardizer'),
size: require('../sanitizer/_size')(/* use defaults*/), size: require('../sanitizer/_size')(/* use defaults*/),
layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping), layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping),
sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping), sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping),

1
sanitizer/structured_geocoding.js

@ -6,6 +6,7 @@ var sanitizeAll = require('../sanitizer/sanitizeAll'),
quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'), quattroshapes_deprecation: require('../sanitizer/_deprecate_quattroshapes'),
synthesize_analysis: require('../sanitizer/_synthesize_analysis'), synthesize_analysis: require('../sanitizer/_synthesize_analysis'),
iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'), iso2_to_iso3: require('../sanitizer/_iso2_to_iso3'),
mount_saint_fort_standardizer: require('../sanitizer/_mount_saint_fort_standardizer'),
size: require('../sanitizer/_size')(/* use defaults*/), size: require('../sanitizer/_size')(/* use defaults*/),
layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping), layers: require('../sanitizer/_targets')('layers', type_mapping.layer_mapping),
sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping), sources: require('../sanitizer/_targets')('sources', type_mapping.source_mapping),

1
test/unit/run.js

@ -48,6 +48,7 @@ var tests = [
require('./sanitizer/_ids'), require('./sanitizer/_ids'),
require('./sanitizer/_iso2_to_iso3'), require('./sanitizer/_iso2_to_iso3'),
require('./sanitizer/_layers'), require('./sanitizer/_layers'),
require('./sanitizer/_mount_saint_fort_standardizer'),
require('./sanitizer/_single_scalar_parameters'), require('./sanitizer/_single_scalar_parameters'),
require('./sanitizer/_size'), require('./sanitizer/_size'),
require('./sanitizer/_sources'), require('./sanitizer/_sources'),

237
test/unit/sanitizer/_mount_saint_fort_standardizer.js

@ -0,0 +1,237 @@
const _ = require('lodash');
const sanitizer = require('../../../sanitizer/_mount_saint_fort_standardizer');
module.exports.tests = {};
module.exports.tests.text_parser = function(test, common) {
test('clean without parsed_text should not throw exception', function(t) {
const raw = {};
const clean = {
};
const expected_clean = {
};
const messages = sanitizer(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('undefined parsed_text.city should be unchanged', function(t) {
const raw = {};
const clean = {
parsed_text: {
address: 'address value',
city: undefined
}
};
const expected_clean = {
parsed_text: {
address: 'address value',
city: undefined
}
};
const messages = sanitizer(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('\'saint\' should be abbreviated to \'st\' wherever it appears in the city', function(t) {
const raw = {};
const clean = {
parsed_text: {
query: 'saint query value',
neighbourhood: 'saint neighbourhood value',
borough: 'saint borough value',
city: 'saint city saint value saint',
county: 'saint county value',
state: 'saint state value',
postalcode: 'saint postalcode value',
country: 'saint country value'
}
};
const expected_clean = {
parsed_text: {
query: 'saint query value',
neighbourhood: 'saint neighbourhood value',
borough: 'saint borough value',
city: 'st city st value st',
county: 'saint county value',
state: 'saint state value',
postalcode: 'saint postalcode value',
country: 'saint country value'
}
};
const messages = sanitizer(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('\'sainte\' should be abbreviated to \'ste\' wherever it appears in the city', function(t) {
const raw = {};
const clean = {
parsed_text: {
query: 'sainte query value',
neighbourhood: 'sainte neighbourhood value',
borough: 'sainte borough value',
city: 'sainte city sainte value sainte',
county: 'sainte county value',
state: 'sainte state value',
postalcode: 'sainte postalcode value',
country: 'sainte country value'
}
};
const expected_clean = {
parsed_text: {
query: 'sainte query value',
neighbourhood: 'sainte neighbourhood value',
borough: 'sainte borough value',
city: 'ste city ste value ste',
county: 'sainte county value',
state: 'sainte state value',
postalcode: 'sainte postalcode value',
country: 'sainte country value'
}
};
const messages = sanitizer(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('\'ft\' should be expanded to \'fort\' wherever it appears in the city', function(t) {
const raw = {};
const clean = {
parsed_text: {
query: 'ft query value',
neighbourhood: 'ft neighbourhood value',
borough: 'ft borough value',
city: 'ft city ft value ft',
county: 'ft county value',
state: 'ft state value',
postalcode: 'ft postalcode value',
country: 'ft country value'
}
};
const expected_clean = {
parsed_text: {
query: 'ft query value',
neighbourhood: 'ft neighbourhood value',
borough: 'ft borough value',
city: 'fort city fort value fort',
county: 'ft county value',
state: 'ft state value',
postalcode: 'ft postalcode value',
country: 'ft country value'
}
};
const messages = sanitizer(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('\'mt\' should be expanded to \'mount\' wherever it appears in the city', function(t) {
const raw = {};
const clean = {
parsed_text: {
query: 'mt query value',
neighbourhood: 'mt neighbourhood value',
borough: 'mt borough value',
city: 'mt city mt value mt',
county: 'mt county value',
state: 'mt state value',
postalcode: 'mt postalcode value',
country: 'mt country value'
}
};
const expected_clean = {
parsed_text: {
query: 'mt query value',
neighbourhood: 'mt neighbourhood value',
borough: 'mt borough value',
city: 'mount city mount value mount',
county: 'mt county value',
state: 'mt state value',
postalcode: 'mt postalcode value',
country: 'mt country value'
}
};
const messages = sanitizer(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('mixture of \'mt\', \'ft\', \'saint\', and \'sainte\' should be expanded/abbreviated', function(t) {
const raw = {};
const clean = {
parsed_text: {
city: 'mt. ft. saint sainte'
}
};
const expected_clean = {
parsed_text: {
city: 'mount fort st ste'
}
};
const messages = sanitizer(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('sanitizer _mount_saint_fort_standardizer: ' + name, testFunction);
}
for( const testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

5
test/unit/sanitizer/search.js

@ -25,6 +25,10 @@ module.exports.tests.sanitize = function(test, common) {
called_sanitizers.push('_iso2_to_iso3'); called_sanitizers.push('_iso2_to_iso3');
return { errors: [], warnings: [] }; return { errors: [], warnings: [] };
}, },
'../sanitizer/_mount_saint_fort_standardizer': function() {
called_sanitizers.push('_mount_saint_fort_standardizer');
return { errors: [], warnings: [] };
},
'../sanitizer/_size': function() { '../sanitizer/_size': function() {
if (arguments.length === 0) { if (arguments.length === 0) {
return function() { return function() {
@ -86,6 +90,7 @@ module.exports.tests.sanitize = function(test, common) {
'_deprecate_quattroshapes', '_deprecate_quattroshapes',
'_text', '_text',
'_iso2_to_iso3', '_iso2_to_iso3',
'_mount_saint_fort_standardizer',
'_size', '_size',
'_targets/layers', '_targets/layers',
'_targets/sources', '_targets/sources',

5
test/unit/sanitizer/structured_geocoding.js

@ -25,6 +25,10 @@ module.exports.tests.sanitize = function(test, common) {
called_sanitizers.push('_iso2_to_iso3'); called_sanitizers.push('_iso2_to_iso3');
return { errors: [], warnings: [] }; return { errors: [], warnings: [] };
}, },
'../sanitizer/_mount_saint_fort_standardizer': function() {
called_sanitizers.push('_mount_saint_fort_standardizer');
return { errors: [], warnings: [] };
},
'../sanitizer/_size': function() { '../sanitizer/_size': function() {
if (arguments.length === 0) { if (arguments.length === 0) {
return function() { return function() {
@ -86,6 +90,7 @@ module.exports.tests.sanitize = function(test, common) {
'_deprecate_quattroshapes', '_deprecate_quattroshapes',
'_synthesize_analysis', '_synthesize_analysis',
'_iso2_to_iso3', '_iso2_to_iso3',
'_mount_saint_fort_standardizer',
'_size', '_size',
'_targets/layers', '_targets/layers',
'_targets/sources', '_targets/sources',

Loading…
Cancel
Save