api/sanitizer/_city_name_standardizer.js

const _ = require('lodash');

// matches 'ft', 'mt', 'saint', and 'sainte' on word boundary
const mountSaintFort = /\b([fm]t|ste?)\b/g;

const transliterations = {
  'mt':     'mount',
  'ft':     'fort',
  'st':  'saint',
  'ste': 'sainte'
};

function transliterate(match) {
  return _.get(transliterations, match);
}

// transliterate ft/mt/saint/sainte to fort/mount/st/ste, respectively
function _sanitize(raw, clean) {
  // error & warning messages
  // this function doesn't add any error or warning messages
  const messages = { errors: [], warnings: [] };

  // only try to transliterate if there is a city in parsed_text
  if (!_.isEmpty(_.get(clean, 'parsed_text.city'))) {
    // eg input: Ft. st Louis
    // after 1.  ft  st louis
    // after 2.  fort  saint louis
    // after 3.  fort saint louis

    // 1.  remove '.' that could abbreviate ft and mt (makes transliteration regex easier)
    const periods_removed = _.toLower(clean.parsed_text.city).replace(/\b(mt|ft)\./g, '$1 ');

    // 2.  transliterate 'st'->'saint', etc
    const transliterated = periods_removed.replace(mountSaintFort, transliterate);

    // 3.  reduce whitespace sequences that can occur when removing periods down to a single space
    const whitespace_normalized = _.trimEnd(transliterated.replace(/\s+/, ' '));

    clean.parsed_text.city = whitespace_normalized;

  }

  return messages;

}

module.exports = () => ({
  sanitize: _sanitize
});
transliterate mt/ft/saint/sainte into mount/fort/st/ste respectively 8 years ago			`const _ = require('lodash');`

			`// matches 'ft', 'mt', 'saint', and 'sainte' on word boundary`
Fix for #1077, fail to search `Saint..` cities in structured queries 7 years ago			`const mountSaintFort = /\b([fm]t\|ste?)\b/g;`
transliterate mt/ft/saint/sainte into mount/fort/st/ste respectively 8 years ago
streamlined standardizer more accurate since it's smarter about where to replace periods 8 years ago			`const transliterations = {`
			`'mt': 'mount',`
			`'ft': 'fort',`
Fix for #1077, fail to search `Saint..` cities in structured queries 7 years ago			`'st': 'saint',`
			`'ste': 'sainte'`
transliterate mt/ft/saint/sainte into mount/fort/st/ste respectively 8 years ago			`};`

streamlined standardizer more accurate since it's smarter about where to replace periods 8 years ago			`function transliterate(match) {`
			`return _.get(transliterations, match);`
transliterate mt/ft/saint/sainte into mount/fort/st/ste respectively 8 years ago			`}`

streamlined standardizer more accurate since it's smarter about where to replace periods 8 years ago			`// transliterate ft/mt/saint/sainte to fort/mount/st/ste, respectively`
Standardize sanitizers to export setup function that returns sanitize and expected function 7 years ago			`function _sanitize(raw, clean) {`
transliterate mt/ft/saint/sainte into mount/fort/st/ste respectively 8 years ago			`// error & warning messages`
			`// this function doesn't add any error or warning messages`
			`const messages = { errors: [], warnings: [] };`

streamlined standardizer more accurate since it's smarter about where to replace periods 8 years ago			`// only try to transliterate if there is a city in parsed_text`
transliterate mt/ft/saint/sainte into mount/fort/st/ste respectively 8 years ago			`if (!_.isEmpty(_.get(clean, 'parsed_text.city'))) {`
Fix for #1077, fail to search `Saint..` cities in structured queries 7 years ago			`// eg input: Ft. st Louis`
			`// after 1. ft st louis`
			`// after 2. fort saint louis`
			`// after 3. fort saint louis`
streamlined standardizer more accurate since it's smarter about where to replace periods 8 years ago
			`// 1. remove '.' that could abbreviate ft and mt (makes transliteration regex easier)`
			`const periods_removed = _.toLower(clean.parsed_text.city).replace(/\b(mt\|ft)\./g, '$1 ');`

Fix for #1077, fail to search `Saint..` cities in structured queries 7 years ago			`// 2. transliterate 'st'->'saint', etc`
streamlined standardizer more accurate since it's smarter about where to replace periods 8 years ago			`const transliterated = periods_removed.replace(mountSaintFort, transliterate);`

fix comments grammar 8 years ago			`// 3. reduce whitespace sequences that can occur when removing periods down to a single space`
streamlined standardizer more accurate since it's smarter about where to replace periods 8 years ago			`const whitespace_normalized = _.trimEnd(transliterated.replace(/\s+/, ' '));`

			`clean.parsed_text.city = whitespace_normalized;`

transliterate mt/ft/saint/sainte into mount/fort/st/ste respectively 8 years ago			`}`

			`return messages;`

			`}`

Standardize sanitizers to export setup function that returns sanitize and expected function 7 years ago			`module.exports = () => ({`
			`sanitize: _sanitize`
			`});`