Browse Source

handle addressit case where parsed_text.street is produced and parsed_text.name is not

pull/526/head
missinglink 9 years ago
parent
commit
05240626fd
  1. 19
      sanitiser/_tokenizer.js
  2. 32
      test/unit/sanitiser/_tokenizer.js

19
sanitiser/_tokenizer.js

@ -27,11 +27,28 @@ function sanitize( raw, clean ){
// if the text parser has run then we only tokenize the 'name' section // if the text parser has run then we only tokenize the 'name' section
// of the 'parsed_text' object, ignoring the 'admin' parts. // of the 'parsed_text' object, ignoring the 'admin' parts.
if( clean.hasOwnProperty('parsed_text') && clean.parsed_text.hasOwnProperty('name') ){ if( clean.hasOwnProperty('parsed_text') ) {
inputParserRanSuccessfully = true; inputParserRanSuccessfully = true;
// parsed_text.name is set, this is the highest priority, use this string
if( clean.parsed_text.hasOwnProperty('name') ){
text = clean.parsed_text.name; // use this string instead text = clean.parsed_text.name; // use this string instead
} }
// else handle the case where parsed_text.street was produced but
// no parsed_text.name is produced.
// additionally, handle the case where parsed_text.number is present
// note: the addressit module may also produce parsed_text.unit info
// for now, we discard that information as we don't have an appropriate
else if( clean.parsed_text.hasOwnProperty('street') ){
text = [
clean.parsed_text.number,
clean.parsed_text.street
].filter(function(el){return el;})
.join(' '); // remove empty elements
}
}
// always set 'clean.tokens*' arrays for consistency and to avoid upstream errors. // always set 'clean.tokens*' arrays for consistency and to avoid upstream errors.
clean.tokens = []; clean.tokens = [];
clean.tokens_complete = []; clean.tokens_complete = [];

32
test/unit/sanitiser/_tokenizer.js

@ -81,6 +81,38 @@ module.exports.tests.sanity_checks = function(test, common) {
t.deepEquals(messages.errors, [], 'no errors'); t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings'); t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('favor clean.parsed_text street data over clean.text', function(t) {
var clean = { parsed_text: { number: '190', street: 'foo st' }, text: 'bar' };
var messages = sanitiser({}, clean);
// favor clean.parsed_text.name over clean.text
t.deepEquals(clean.tokens, [ '190', 'foo', 'st' ], 'use street name + number');
t.deepEquals(clean.tokens_complete, [ '190', 'foo', 'st' ], 'use street name + number');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
test('favor clean.parsed_text.name over clean.parsed_text street data', function(t) {
var clean = { parsed_text: { number: '190', street: 'foo st', name: 'foo' }, text: 'bar' };
var messages = sanitiser({}, clean);
// favor clean.parsed_text.name over all other variables
t.deepEquals(clean.tokens, [ 'foo' ], 'use clean.parsed_text.name');
t.deepEquals(clean.tokens_complete, [ 'foo' ], 'use clean.parsed_text.name');
t.deepEquals(clean.tokens_incomplete, [], 'no tokens');
// no errors/warnings produced
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end(); t.end();
}); });
}; };

Loading…
Cancel
Save