diff --git a/sanitizer/_synthesize_analysis.js b/sanitizer/_synthesize_analysis.js index 3822d9d4..da67c752 100644 --- a/sanitizer/_synthesize_analysis.js +++ b/sanitizer/_synthesize_analysis.js @@ -1,5 +1,4 @@ const _ = require('lodash'); -const text_analyzer = require('pelias-text-analyzer'); const fields = { 'venue': 'query', @@ -17,20 +16,6 @@ function normalizeWhitespaceToSingleSpace(val) { return _.replace(_.trim(val), /\s+/g, ' '); } -function isPostalCodeOnly(parsed_text) { - return Object.keys(parsed_text).length === 1 && - parsed_text.hasOwnProperty('postalcode'); -} - -// figure out which field contains the probable house number, prefer number -// libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street -// so because we're treating the entire field as a street address, it's safe -// to assume that an identified postcode is actually a house number. -function getHouseNumberField(analyzed_address) { - // return the first field available in the libpostal response, undefined if none - return _.find(['number', 'postalcode'], _.partial(_.has, analyzed_address)); -} - function _sanitize( raw, clean ){ // error & warning messages @@ -51,35 +36,8 @@ function _sanitize( raw, clean ){ `at least one of the following fields is required: ${Object.keys(fields).join(', ')}`); } - if (clean.parsed_text.hasOwnProperty('address')) { - const analyzed_address = text_analyzer.parse(clean.parsed_text.address); - - const house_number_field = getHouseNumberField(analyzed_address); - - // if we're fairly certain that libpostal identified a house number - // (from either the house_number or postcode field), place it into the - // number field and remove the first instance of that value from address - // and assign to street - // eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St - if (house_number_field) { - clean.parsed_text.number = analyzed_address[house_number_field]; - - // remove the first instance of the number and trim whitespace - clean.parsed_text.street = _.trim(_.replace(clean.parsed_text.address, clean.parsed_text.number, '')); - - } else { - // otherwise no house number was identifiable, so treat the entire input - // as a street - clean.parsed_text.street = clean.parsed_text.address; - - } - - // the address field no longer means anything since it's been parsed, so remove it - delete clean.parsed_text.address; - - } - return messages; + } function _expected() { diff --git a/test/unit/sanitizer/_synthesize_analysis.js b/test/unit/sanitizer/_synthesize_analysis.js index 64243dc9..19ff57bd 100644 --- a/test/unit/sanitizer/_synthesize_analysis.js +++ b/test/unit/sanitizer/_synthesize_analysis.js @@ -1,18 +1,14 @@ const _ = require('lodash'); const proxyquire = require('proxyquire').noCallThru(); +const sanitizer = require('../../../sanitizer/_synthesize_analysis'); module.exports.tests = {}; module.exports.tests.text_parser = function(test, common) { test('all variables should be parsed', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.fail('parse should not have been called'); - } - }}); - const raw = { venue: ' \t venue \t value \t ', + address: ' \t address \t value \t ', neighbourhood: ' \t neighbourhood \t value \t ', borough: ' \t borough \t value \t ', locality: ' \t locality \t value \t ', @@ -27,6 +23,7 @@ module.exports.tests.text_parser = function(test, common) { const expected_clean = { parsed_text: { query: 'venue value', + address: 'address value', neighbourhood: 'neighbourhood value', borough: 'borough value', city: 'locality value', @@ -47,12 +44,6 @@ module.exports.tests.text_parser = function(test, common) { }); test('non-string and blank string values should be treated as not supplied', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.fail('parse should not have been called'); - } - }}); - // helper to return a random value that's considered invalid function getInvalidValue() { return _.sample([{}, [], false, '', ' \t ', 17, undefined]); @@ -87,12 +78,6 @@ module.exports.tests.text_parser = function(test, common) { }); test('no supplied fields should return error', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.fail('parse should not have been called'); - } - }}); - const raw = {}; const clean = {}; @@ -110,12 +95,6 @@ module.exports.tests.text_parser = function(test, common) { }); test('postalcode-only parsed_text should return error', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.fail('parse should not have been called'); - } - }}); - const raw = { postalcode: 'postalcode value' }; @@ -137,132 +116,6 @@ module.exports.tests.text_parser = function(test, common) { }); - test('text_analyzer identifying house number should extract it and street', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.equals(query, 'Number Value Street Value Number Value'); - - return { - number: 'Number Value' - }; - } - }}); - - const raw = { - address: 'Number Value Street Value Number Value' - }; - - const clean = {}; - - const expected_clean = { - parsed_text: { - number: 'Number Value', - street: 'Street Value Number Value' - } - }; - - const messages = sanitizer().sanitize(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('text_analyzer identifying postalcode but not house number should assign to number and remove from address', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.equals(query, 'Number Value Street Value Number Value'); - - return { - postalcode: 'Number Value' - }; - } - }}); - - const raw = { - address: 'Number Value Street Value Number Value' - }; - - const clean = {}; - - const expected_clean = { - parsed_text: { - number: 'Number Value', - street: 'Street Value Number Value' - } - }; - - const messages = sanitizer().sanitize(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('text_analyzer not revealing possible number should move address to street', function(t) { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.equals(query, 'Street Value'); - - return {}; - } - }}); - - const raw = { - address: 'Street Value' - }; - - const clean = {}; - - const expected_clean = { - parsed_text: { - street: 'Street Value' - } - }; - - const messages = sanitizer().sanitize(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - - test('text_analyzer returning undefined on address resolution should treat as if no house number field was found', t => { - var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { - 'pelias-text-analyzer': { parse: function(query) { - t.equals(query, 'Street Value'); - - return undefined; - } - }}); - - const raw = { - address: 'Street Value' - }; - - const clean = {}; - - const expected_clean = { - parsed_text: { - street: 'Street Value' - } - }; - - const messages = sanitizer().sanitize(raw, clean); - - t.deepEquals(clean, expected_clean); - t.deepEquals(messages.errors, [], 'no errors'); - t.deepEquals(messages.warnings, [], 'no warnings'); - t.end(); - - }); - test('return an array of expected parameters in object form for validation', function (t) { const sanitizer = require('../../../sanitizer/_synthesize_analysis'); const expected = [