From 75366f98c420c8b4fd3079bf35ff3aa24837e139 Mon Sep 17 00:00:00 2001 From: Stephen Hess Date: Wed, 23 Nov 2016 00:14:25 -0500 Subject: [PATCH] added text-analyzer call to parse supplied `address` field --- sanitizer/_synthesize_analysis.js | 28 +++++ test/unit/sanitizer/_synthesize_analysis.js | 124 +++++++++++++++++++- 2 files changed, 149 insertions(+), 3 deletions(-) diff --git a/sanitizer/_synthesize_analysis.js b/sanitizer/_synthesize_analysis.js index 04ffeb34..79c53187 100644 --- a/sanitizer/_synthesize_analysis.js +++ b/sanitizer/_synthesize_analysis.js @@ -1,4 +1,5 @@ const _ = require('lodash'); +const text_analyzer = require('pelias-text-analyzer'); const fields = { 'address': 'address', @@ -20,6 +21,15 @@ function isPostalCodeOnly(parsed_text) { parsed_text.hasOwnProperty('postalcode'); } +function getHouseNumberField(analyzed_address) { + for (var field of ['number', 'postalcode']) { + if (analyzed_address.hasOwnProperty(field)) { + return field; + } + } + +} + function sanitize( raw, clean ){ // error & warning messages @@ -43,6 +53,24 @@ function sanitize( raw, clean ){ `at least one of the following fields is required: ${Object.keys(fields).join(', ')}`); } + if (clean.parsed_text.hasOwnProperty('address')) { + var analyzed_address = text_analyzer.parse(clean.parsed_text.address); + + const house_number_field = getHouseNumberField(analyzed_address); + + if (house_number_field) { + clean.parsed_text.number = analyzed_address[house_number_field]; + + clean.parsed_text.street = _.trim(_.replace(clean.parsed_text.address, clean.parsed_text.number, '')); + delete clean.parsed_text.address; + + } else { + clean.parsed_text.street = clean.parsed_text.address; + delete clean.parsed_text.address; + } + + } + return messages; } diff --git a/test/unit/sanitizer/_synthesize_analysis.js b/test/unit/sanitizer/_synthesize_analysis.js index 20f05fc0..dabacef8 100644 --- a/test/unit/sanitizer/_synthesize_analysis.js +++ b/test/unit/sanitizer/_synthesize_analysis.js @@ -1,13 +1,18 @@ -const sanitizer = require('../../../sanitizer/_synthesize_analysis'); const _ = require('lodash'); +const proxyquire = require('proxyquire').noCallThru(); module.exports.tests = {}; module.exports.tests.text_parser = function(test, common) { test('all variables should be parsed', function(t) { + var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { + 'pelias-text-analyzer': { parse: function(query) { + t.fail('parse should not have been called'); + } + }}); + const raw = { query: ' \t query \t value \t ', - address: ' \t address \t value \t ', neighbourhood: ' \t neighbourhood \t value \t ', borough: ' \t borough \t value \t ', locality: ' \t locality \t value \t ', @@ -21,7 +26,6 @@ module.exports.tests.text_parser = function(test, common) { const expected_clean = { parsed_text: { - address: 'address value', neighbourhood: 'neighbourhood value', borough: 'borough value', city: 'locality value', @@ -42,6 +46,12 @@ module.exports.tests.text_parser = function(test, common) { }); test('non-string and blank string values should be treated as not supplied', function(t) { + var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { + 'pelias-text-analyzer': { parse: function(query) { + t.fail('parse should not have been called'); + } + }}); + // helper to return a random value that's considered invalid function getInvalidValue() { return _.sample([{}, [], false, '', ' \t ', 17, undefined]); @@ -75,6 +85,12 @@ module.exports.tests.text_parser = function(test, common) { }); test('no supplied fields should return error', function(t) { + var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { + 'pelias-text-analyzer': { parse: function(query) { + t.fail('parse should not have been called'); + } + }}); + const raw = {}; const clean = {}; @@ -92,6 +108,12 @@ module.exports.tests.text_parser = function(test, common) { }); test('postalcode-only parsed_text should return error', function(t) { + var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { + 'pelias-text-analyzer': { parse: function(query) { + t.fail('parse should not have been called'); + } + }}); + const raw = { postalcode: 'postalcode value' }; @@ -113,6 +135,102 @@ module.exports.tests.text_parser = function(test, common) { }); + test('text_analyzer identifying house number should extract it and street', function(t) { + var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { + 'pelias-text-analyzer': { parse: function(query) { + t.equals(query, 'Number Value Street Value Number Value'); + + return { + number: 'Number Value' + }; + } + }}); + + const raw = { + address: 'Number Value Street Value Number Value' + }; + + const clean = {}; + + const expected_clean = { + parsed_text: { + number: 'Number Value', + street: 'Street Value Number Value' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('text_analyzer identifying postalcode but not house number should assign to number and remove from address', function(t) { + var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { + 'pelias-text-analyzer': { parse: function(query) { + t.equals(query, 'Number Value Street Value Number Value'); + + return { + postalcode: 'Number Value' + }; + } + }}); + + const raw = { + address: 'Number Value Street Value Number Value' + }; + + const clean = {}; + + const expected_clean = { + parsed_text: { + number: 'Number Value', + street: 'Street Value Number Value' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + + test('text_analyzer not revealing possible number should move address to street', function(t) { + var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', { + 'pelias-text-analyzer': { parse: function(query) { + t.equals(query, 'Street Value'); + + return {}; + } + }}); + + const raw = { + address: 'Street Value' + }; + + const clean = {}; + + const expected_clean = { + parsed_text: { + street: 'Street Value' + } + }; + + const messages = sanitizer(raw, clean); + + t.deepEquals(clean, expected_clean); + t.deepEquals(messages.errors, [], 'no errors'); + t.deepEquals(messages.warnings, [], 'no warnings'); + t.end(); + + }); + }; module.exports.all = function (tape, common) {