Browse Source

avoid reducing input:name to only stop words

temp_ngrams_strip_housenumbers
Peter Johnson 9 years ago
parent
commit
f2a45a3c19
  1. 18
      query/view/temp_ngrams_strip_housenumbers.js
  2. 7
      test/unit/query/view/temp_ngrams_strip_housenumbers.js

18
query/view/temp_ngrams_strip_housenumbers.js

@ -14,7 +14,9 @@
service has been fully decomissioned.
**/
var peliasQuery = require('pelias-query');
var _ = require('lodash'),
peliasQuery = require('pelias-query'),
stopWords = require('pelias-schema/street_suffix').terms;
module.exports = function( vs ){
@ -36,7 +38,19 @@ module.exports = function( vs ){
// remove the housenumber
// be careful of numeric street names such as '1st street'
function removeHouseNumber( name ){
return name.replace(/(\d+\s)/g, '');
// most of the time this is sufficient
var stripped = name.replace(/(\d+\s)/g, '').trim();
// in this case we need to avoid stripping ALL the numbers and leaving only stop words
// because in this case the analyser will return in a blank input string.
// eg. the same issue exists for 'avenue street' (not covered here).
// if this happens we simply return the original name
if( !stripped || _.contains( stopWords, stripped.toLowerCase() ) ){
return name;
}
return stripped;
}
// export for testing

7
test/unit/query/view/temp_ngrams_strip_housenumbers.js

@ -41,9 +41,14 @@ module.exports.tests.removeHouseNumber = function(test, common) {
t.equal(rm('101 west 26th street'), 'west 26th street', 'house number removed');
t.equal(rm('10th avenue'), '10th avenue', 'don\'t remove ordinal numbers');
t.equal(rm('123 main st new york ny 10010 US'), 'main st new york ny US', 'also removes postcodes');
// in this case we need to avoid stripping ALL the numbers and leaving only stop words
// because in this case the analyser will return in a blank input string.
// eg. the same issue exists for 'avenue street' (not covered here).
t.equal(rm('1359 54 street'), '1359 54 street', 'avoid stripping ALL valid tokens');
t.equal(rm('310 7 street'), '310 7 street', 'avoid stripping ALL valid tokens');
t.end();
});
};

Loading…
Cancel
Save