Browse Source

temporary hack to remove numbers (without a suffix) eg. housenumber, postcode from the ngrams analysis only; while not removing the numbers from other types of analysis

temp_ngrams_strip_housenumbers
Peter Johnson 9 years ago
parent
commit
f0bc07d807
  1. 3
      query/autocomplete.js
  2. 3
      query/search.js
  3. 43
      query/view/temp_ngrams_strip_housenumbers.js
  4. 76
      test/unit/fixture/autocomplete_full_address.js
  5. 2
      test/unit/fixture/search_full_address.js
  6. 2
      test/unit/fixture/search_regions_address.js
  7. 11
      test/unit/query/autocomplete.js
  8. 60
      test/unit/query/view/temp_ngrams_strip_housenumbers.js
  9. 1
      test/unit/run.js

3
query/autocomplete.js

@ -1,5 +1,6 @@
var peliasQuery = require('pelias-query'),
ngramsStripHouseNumbersView = require('./view/temp_ngrams_strip_housenumbers'),
defaults = require('./defaults'),
check = require('check-types');
@ -9,7 +10,7 @@ var peliasQuery = require('pelias-query'),
var query = new peliasQuery.layout.FilteredBooleanQuery();
// mandatory matches
query.score( peliasQuery.view.ngrams, 'must' );
query.score( ngramsStripHouseNumbersView, 'must' );
// scoring boost
query.score( peliasQuery.view.phrase );

3
query/search.js

@ -1,4 +1,5 @@
var peliasQuery = require('pelias-query'),
ngramsStripHouseNumbersView = require('./view/temp_ngrams_strip_housenumbers'),
defaults = require('./defaults'),
textParser = require('./text_parser'),
check = require('check-types');
@ -10,7 +11,7 @@ var query = new peliasQuery.layout.FilteredBooleanQuery();
// mandatory matches
query.score( peliasQuery.view.boundary_country, 'must' );
query.score( peliasQuery.view.ngrams, 'must' );
query.score( ngramsStripHouseNumbersView, 'must' );
// scoring boost
query.score( peliasQuery.view.phrase );

43
query/view/temp_ngrams_strip_housenumbers.js

@ -0,0 +1,43 @@
/**
This is (should be!) only a temporary solution.
It is intended to strip housenumbers from input text BUT
should only apply to the ngrams analysis and not affect
the other textual analysis.
eg: 'phrase' matching should still include the housenumber
This file can go away once the peliasOneEdgeGram and peliasTwoEdgeGram
analysers have been modified in pelias/schema, but as would require
a full re-index and (potentially) break backwards compatibily with the
v0 legacy codebase it, unfortunately, has to wait until that legacy
service has been fully decomissioned.
**/
var peliasQuery = require('pelias-query');
module.exports = function( vs ){
// clone the $vs so we can modify this copy without
// mutating the 'actual' query variables which get shared
// with the other views.
var vsClone = new peliasQuery.Vars( vs.export() );
// set 'input:name' to the result of removeHouseNumber($name);
if( vsClone.isset('input:name') ){
var nameVar = vsClone.var('input:name');
nameVar.set( removeHouseNumber( nameVar.get() ) );
}
// run the original ngram view but with the modified input:name' var
return peliasQuery.view.ngrams( vsClone );
};
// remove the housenumber
// be careful of numeric street names such as '1st street'
function removeHouseNumber( name ){
return name.replace(/(\d+\s)/g, '');
}
// export for testing
module.exports.removeHouseNumber = removeHouseNumber;

76
test/unit/fixture/autocomplete_full_address.js

@ -0,0 +1,76 @@
module.exports = {
'query': {
'filtered': {
'query': {
'bool': {
'must': [{
'match': {
'name.default': {
'query': 'main st new york ny US',
'boost': 1,
'analyzer': 'peliasOneEdgeGram'
}
}
}],
'should': [{
'match': {
'phrase.default': {
'query': '123 main st new york ny 10010 US',
'analyzer': 'peliasPhrase',
'type': 'phrase',
'boost': 1,
'slop': 2
}
}
},
{
'function_score': {
'query': {
'filtered': {
'filter': {
'exists': {
'field': 'popularity'
}
}
}
},
'max_boost': 2,
'score_mode': 'first',
'boost_mode': 'replace',
'filter': {
'or': [
{
'type': {
'value': 'admin0'
}
},
{
'type': {
'value': 'admin1'
}
},
{
'type': {
'value': 'admin2'
}
}
]
},
'functions': [{
'field_value_factor': {
'modifier': 'sqrt',
'field': 'popularity'
},
'weight': 1
}]
}
}]
}
}
}
},
'sort': [ '_score' ],
'size': 10,
'track_scores': true
};

2
test/unit/fixture/search_full_address.js

@ -10,7 +10,7 @@ module.exports = {
'must': [{
'match': {
'name.default': {
'query': '123 main st',
'query': 'main st',
'analyzer': 'peliasOneEdgeGram',
'boost': 1
}

2
test/unit/fixture/search_regions_address.js

@ -10,7 +10,7 @@ module.exports = {
'must': [{
'match': {
'name.default': {
'query': '1 water st',
'query': 'water st',
'analyzer': 'peliasOneEdgeGram',
'boost': 1
}

11
test/unit/query/autocomplete.js

@ -51,6 +51,17 @@ module.exports.tests.query = function(test, common) {
t.deepEqual(compiled, expected, 'valid autocomplete query');
t.end();
});
test('valid autocomplete with a full valid address', function(t) {
var address = '123 main st new york ny 10010 US';
var query = generate({ text: address });
var compiled = JSON.parse( JSON.stringify( query ) );
var expected = require('../fixture/autocomplete_full_address');
t.deepEqual(compiled, expected, 'valid autocomplete query');
t.end();
});
};
module.exports.all = function (tape, common) {

60
test/unit/query/view/temp_ngrams_strip_housenumbers.js

@ -0,0 +1,60 @@
var peliasQuery = require('pelias-query');
var ngramsStripHouseNumbersView = require('../../../../query/view/temp_ngrams_strip_housenumbers');
module.exports.tests = {};
module.exports.tests.interface = function(test, common) {
test('valid interface', function(t) {
t.equal(typeof ngramsStripHouseNumbersView, 'function', 'valid function');
t.equal(typeof ngramsStripHouseNumbersView.removeHouseNumber, 'function', 'valid function');
t.end();
});
};
module.exports.tests.view = function(test, common) {
var view = ngramsStripHouseNumbersView;
test('input:name set', function(t) {
var vs1 = new peliasQuery.Vars( peliasQuery.defaults );
vs1.var('input:name').set('101 west 26th street');
var compiled = JSON.stringify( view( vs1 ) );
var expected = '{"match":{"name.default":{"analyzer":"peliasOneEdgeGram","boost":1,"query":"west 26th street"}}}';
t.equal(compiled, expected, 'view compiled correctly');
t.equal(vs1.var('input:name').get(), '101 west 26th street', 'original var not mutated');
t.end();
});
test('input:name not set', function(t) {
var vs1 = new peliasQuery.Vars( peliasQuery.defaults );
t.equal(view(vs1), null, 'view failed compilation due to missing var');
t.end();
});
};
module.exports.tests.removeHouseNumber = function(test, common) {
var rm = ngramsStripHouseNumbersView.removeHouseNumber;
test('removeHouseNumber', function(t) {
t.equal(rm('101 west 26th street'), 'west 26th street', 'house number removed');
t.equal(rm('10th avenue'), '10th avenue', 'house number removed');
t.equal(rm('123 main st new york ny 10010 US'), 'main st new york ny US', 'also removes postcodes');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('ngrams strip housenumber view: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

1
test/unit/run.js

@ -21,6 +21,7 @@ var tests = [
require('./query/autocomplete'),
require('./query/reverse'),
require('./query/defaults'),
require('./query/view/temp_ngrams_strip_housenumbers'),
require('./helper/query_parser'),
require('./helper/geojsonify'),
require('./helper/outputSchema'),

Loading…
Cancel
Save