Browse Source

remove text-analyzer address operation

pull/1061/head
Stephen Hess 7 years ago
parent
commit
b5e48afb35
  1. 44
      sanitizer/_synthesize_analysis.js
  2. 153
      test/unit/sanitizer/_synthesize_analysis.js

44
sanitizer/_synthesize_analysis.js

@ -1,5 +1,4 @@
const _ = require('lodash'); const _ = require('lodash');
const text_analyzer = require('pelias-text-analyzer');
const fields = { const fields = {
'venue': 'query', 'venue': 'query',
@ -17,20 +16,6 @@ function normalizeWhitespaceToSingleSpace(val) {
return _.replace(_.trim(val), /\s+/g, ' '); return _.replace(_.trim(val), /\s+/g, ' ');
} }
function isPostalCodeOnly(parsed_text) {
return Object.keys(parsed_text).length === 1 &&
parsed_text.hasOwnProperty('postalcode');
}
// figure out which field contains the probable house number, prefer number
// libpostal parses some inputs, like `3370 cobbe ave`, as a postcode+street
// so because we're treating the entire field as a street address, it's safe
// to assume that an identified postcode is actually a house number.
function getHouseNumberField(analyzed_address) {
// return the first field available in the libpostal response, undefined if none
return _.find(['number', 'postalcode'], _.partial(_.has, analyzed_address));
}
function _sanitize( raw, clean ){ function _sanitize( raw, clean ){
// error & warning messages // error & warning messages
@ -51,35 +36,8 @@ function _sanitize( raw, clean ){
`at least one of the following fields is required: ${Object.keys(fields).join(', ')}`); `at least one of the following fields is required: ${Object.keys(fields).join(', ')}`);
} }
if (clean.parsed_text.hasOwnProperty('address')) {
const analyzed_address = text_analyzer.parse(clean.parsed_text.address);
const house_number_field = getHouseNumberField(analyzed_address);
// if we're fairly certain that libpostal identified a house number
// (from either the house_number or postcode field), place it into the
// number field and remove the first instance of that value from address
// and assign to street
// eg - '1090 N Charlotte St' becomes number=1090 and street=N Charlotte St
if (house_number_field) {
clean.parsed_text.number = analyzed_address[house_number_field];
// remove the first instance of the number and trim whitespace
clean.parsed_text.street = _.trim(_.replace(clean.parsed_text.address, clean.parsed_text.number, ''));
} else {
// otherwise no house number was identifiable, so treat the entire input
// as a street
clean.parsed_text.street = clean.parsed_text.address;
}
// the address field no longer means anything since it's been parsed, so remove it
delete clean.parsed_text.address;
}
return messages; return messages;
} }
function _expected() { function _expected() {

153
test/unit/sanitizer/_synthesize_analysis.js

@ -1,18 +1,14 @@
const _ = require('lodash'); const _ = require('lodash');
const proxyquire = require('proxyquire').noCallThru(); const proxyquire = require('proxyquire').noCallThru();
const sanitizer = require('../../../sanitizer/_synthesize_analysis');
module.exports.tests = {}; module.exports.tests = {};
module.exports.tests.text_parser = function(test, common) { module.exports.tests.text_parser = function(test, common) {
test('all variables should be parsed', function(t) { test('all variables should be parsed', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.fail('parse should not have been called');
}
}});
const raw = { const raw = {
venue: ' \t venue \t value \t ', venue: ' \t venue \t value \t ',
address: ' \t address \t value \t ',
neighbourhood: ' \t neighbourhood \t value \t ', neighbourhood: ' \t neighbourhood \t value \t ',
borough: ' \t borough \t value \t ', borough: ' \t borough \t value \t ',
locality: ' \t locality \t value \t ', locality: ' \t locality \t value \t ',
@ -27,6 +23,7 @@ module.exports.tests.text_parser = function(test, common) {
const expected_clean = { const expected_clean = {
parsed_text: { parsed_text: {
query: 'venue value', query: 'venue value',
address: 'address value',
neighbourhood: 'neighbourhood value', neighbourhood: 'neighbourhood value',
borough: 'borough value', borough: 'borough value',
city: 'locality value', city: 'locality value',
@ -47,12 +44,6 @@ module.exports.tests.text_parser = function(test, common) {
}); });
test('non-string and blank string values should be treated as not supplied', function(t) { test('non-string and blank string values should be treated as not supplied', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.fail('parse should not have been called');
}
}});
// helper to return a random value that's considered invalid // helper to return a random value that's considered invalid
function getInvalidValue() { function getInvalidValue() {
return _.sample([{}, [], false, '', ' \t ', 17, undefined]); return _.sample([{}, [], false, '', ' \t ', 17, undefined]);
@ -87,12 +78,6 @@ module.exports.tests.text_parser = function(test, common) {
}); });
test('no supplied fields should return error', function(t) { test('no supplied fields should return error', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.fail('parse should not have been called');
}
}});
const raw = {}; const raw = {};
const clean = {}; const clean = {};
@ -110,12 +95,6 @@ module.exports.tests.text_parser = function(test, common) {
}); });
test('postalcode-only parsed_text should return error', function(t) { test('postalcode-only parsed_text should return error', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.fail('parse should not have been called');
}
}});
const raw = { const raw = {
postalcode: 'postalcode value' postalcode: 'postalcode value'
}; };
@ -137,132 +116,6 @@ module.exports.tests.text_parser = function(test, common) {
}); });
test('text_analyzer identifying house number should extract it and street', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.equals(query, 'Number Value Street Value Number Value');
return {
number: 'Number Value'
};
}
}});
const raw = {
address: 'Number Value Street Value Number Value'
};
const clean = {};
const expected_clean = {
parsed_text: {
number: 'Number Value',
street: 'Street Value Number Value'
}
};
const messages = sanitizer().sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('text_analyzer identifying postalcode but not house number should assign to number and remove from address', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.equals(query, 'Number Value Street Value Number Value');
return {
postalcode: 'Number Value'
};
}
}});
const raw = {
address: 'Number Value Street Value Number Value'
};
const clean = {};
const expected_clean = {
parsed_text: {
number: 'Number Value',
street: 'Street Value Number Value'
}
};
const messages = sanitizer().sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('text_analyzer not revealing possible number should move address to street', function(t) {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.equals(query, 'Street Value');
return {};
}
}});
const raw = {
address: 'Street Value'
};
const clean = {};
const expected_clean = {
parsed_text: {
street: 'Street Value'
}
};
const messages = sanitizer().sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('text_analyzer returning undefined on address resolution should treat as if no house number field was found', t => {
var sanitizer = proxyquire('../../../sanitizer/_synthesize_analysis', {
'pelias-text-analyzer': { parse: function(query) {
t.equals(query, 'Street Value');
return undefined;
}
}});
const raw = {
address: 'Street Value'
};
const clean = {};
const expected_clean = {
parsed_text: {
street: 'Street Value'
}
};
const messages = sanitizer().sanitize(raw, clean);
t.deepEquals(clean, expected_clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('return an array of expected parameters in object form for validation', function (t) { test('return an array of expected parameters in object form for validation', function (t) {
const sanitizer = require('../../../sanitizer/_synthesize_analysis'); const sanitizer = require('../../../sanitizer/_synthesize_analysis');
const expected = [ const expected = [

Loading…
Cancel
Save