From f0bc07d8073b3105bfddd624fdf52f71d2bfbf61 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Wed, 23 Sep 2015 15:40:09 +0200 Subject: [PATCH] temporary hack to remove numbers (without a suffix) eg. housenumber, postcode from the ngrams analysis only; while not removing the numbers from other types of analysis --- query/autocomplete.js | 3 +- query/search.js | 3 +- query/view/temp_ngrams_strip_housenumbers.js | 43 +++++++++++ .../unit/fixture/autocomplete_full_address.js | 76 +++++++++++++++++++ test/unit/fixture/search_full_address.js | 2 +- test/unit/fixture/search_regions_address.js | 2 +- test/unit/query/autocomplete.js | 11 +++ .../view/temp_ngrams_strip_housenumbers.js | 60 +++++++++++++++ test/unit/run.js | 1 + 9 files changed, 197 insertions(+), 4 deletions(-) create mode 100644 query/view/temp_ngrams_strip_housenumbers.js create mode 100644 test/unit/fixture/autocomplete_full_address.js create mode 100644 test/unit/query/view/temp_ngrams_strip_housenumbers.js diff --git a/query/autocomplete.js b/query/autocomplete.js index 62fb775e..e76e62b0 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -1,5 +1,6 @@ var peliasQuery = require('pelias-query'), + ngramsStripHouseNumbersView = require('./view/temp_ngrams_strip_housenumbers'), defaults = require('./defaults'), check = require('check-types'); @@ -9,7 +10,7 @@ var peliasQuery = require('pelias-query'), var query = new peliasQuery.layout.FilteredBooleanQuery(); // mandatory matches -query.score( peliasQuery.view.ngrams, 'must' ); +query.score( ngramsStripHouseNumbersView, 'must' ); // scoring boost query.score( peliasQuery.view.phrase ); diff --git a/query/search.js b/query/search.js index bf95b76b..bfd31152 100644 --- a/query/search.js +++ b/query/search.js @@ -1,4 +1,5 @@ var peliasQuery = require('pelias-query'), + ngramsStripHouseNumbersView = require('./view/temp_ngrams_strip_housenumbers'), defaults = require('./defaults'), textParser = require('./text_parser'), check = require('check-types'); @@ -10,7 +11,7 @@ var query = new peliasQuery.layout.FilteredBooleanQuery(); // mandatory matches query.score( peliasQuery.view.boundary_country, 'must' ); -query.score( peliasQuery.view.ngrams, 'must' ); +query.score( ngramsStripHouseNumbersView, 'must' ); // scoring boost query.score( peliasQuery.view.phrase ); diff --git a/query/view/temp_ngrams_strip_housenumbers.js b/query/view/temp_ngrams_strip_housenumbers.js new file mode 100644 index 00000000..dd1e8ddf --- /dev/null +++ b/query/view/temp_ngrams_strip_housenumbers.js @@ -0,0 +1,43 @@ + +/** + This is (should be!) only a temporary solution. + + It is intended to strip housenumbers from input text BUT + should only apply to the ngrams analysis and not affect + the other textual analysis. + eg: 'phrase' matching should still include the housenumber + + This file can go away once the peliasOneEdgeGram and peliasTwoEdgeGram + analysers have been modified in pelias/schema, but as would require + a full re-index and (potentially) break backwards compatibily with the + v0 legacy codebase it, unfortunately, has to wait until that legacy + service has been fully decomissioned. +**/ + +var peliasQuery = require('pelias-query'); + +module.exports = function( vs ){ + + // clone the $vs so we can modify this copy without + // mutating the 'actual' query variables which get shared + // with the other views. + var vsClone = new peliasQuery.Vars( vs.export() ); + + // set 'input:name' to the result of removeHouseNumber($name); + if( vsClone.isset('input:name') ){ + var nameVar = vsClone.var('input:name'); + nameVar.set( removeHouseNumber( nameVar.get() ) ); + } + + // run the original ngram view but with the modified input:name' var + return peliasQuery.view.ngrams( vsClone ); +}; + +// remove the housenumber +// be careful of numeric street names such as '1st street' +function removeHouseNumber( name ){ + return name.replace(/(\d+\s)/g, ''); +} + +// export for testing +module.exports.removeHouseNumber = removeHouseNumber; \ No newline at end of file diff --git a/test/unit/fixture/autocomplete_full_address.js b/test/unit/fixture/autocomplete_full_address.js new file mode 100644 index 00000000..466eebe2 --- /dev/null +++ b/test/unit/fixture/autocomplete_full_address.js @@ -0,0 +1,76 @@ + +module.exports = { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'must': [{ + 'match': { + 'name.default': { + 'query': 'main st new york ny US', + 'boost': 1, + 'analyzer': 'peliasOneEdgeGram' + } + } + }], + 'should': [{ + 'match': { + 'phrase.default': { + 'query': '123 main st new york ny 10010 US', + 'analyzer': 'peliasPhrase', + 'type': 'phrase', + 'boost': 1, + 'slop': 2 + } + } + }, + { + 'function_score': { + 'query': { + 'filtered': { + 'filter': { + 'exists': { + 'field': 'popularity' + } + } + } + }, + 'max_boost': 2, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'filter': { + 'or': [ + { + 'type': { + 'value': 'admin0' + } + }, + { + 'type': { + 'value': 'admin1' + } + }, + { + 'type': { + 'value': 'admin2' + } + } + ] + }, + 'functions': [{ + 'field_value_factor': { + 'modifier': 'sqrt', + 'field': 'popularity' + }, + 'weight': 1 + }] + } + }] + } + } + } + }, + 'sort': [ '_score' ], + 'size': 10, + 'track_scores': true +}; \ No newline at end of file diff --git a/test/unit/fixture/search_full_address.js b/test/unit/fixture/search_full_address.js index aff242b8..a93a495e 100644 --- a/test/unit/fixture/search_full_address.js +++ b/test/unit/fixture/search_full_address.js @@ -10,7 +10,7 @@ module.exports = { 'must': [{ 'match': { 'name.default': { - 'query': '123 main st', + 'query': 'main st', 'analyzer': 'peliasOneEdgeGram', 'boost': 1 } diff --git a/test/unit/fixture/search_regions_address.js b/test/unit/fixture/search_regions_address.js index 05d4ffe5..fcdb6b65 100644 --- a/test/unit/fixture/search_regions_address.js +++ b/test/unit/fixture/search_regions_address.js @@ -10,7 +10,7 @@ module.exports = { 'must': [{ 'match': { 'name.default': { - 'query': '1 water st', + 'query': 'water st', 'analyzer': 'peliasOneEdgeGram', 'boost': 1 } diff --git a/test/unit/query/autocomplete.js b/test/unit/query/autocomplete.js index ecdf9553..a53ed0b9 100644 --- a/test/unit/query/autocomplete.js +++ b/test/unit/query/autocomplete.js @@ -51,6 +51,17 @@ module.exports.tests.query = function(test, common) { t.deepEqual(compiled, expected, 'valid autocomplete query'); t.end(); }); + + test('valid autocomplete with a full valid address', function(t) { + var address = '123 main st new york ny 10010 US'; + var query = generate({ text: address }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_full_address'); + + t.deepEqual(compiled, expected, 'valid autocomplete query'); + t.end(); + }); }; module.exports.all = function (tape, common) { diff --git a/test/unit/query/view/temp_ngrams_strip_housenumbers.js b/test/unit/query/view/temp_ngrams_strip_housenumbers.js new file mode 100644 index 00000000..3d71ee90 --- /dev/null +++ b/test/unit/query/view/temp_ngrams_strip_housenumbers.js @@ -0,0 +1,60 @@ +var peliasQuery = require('pelias-query'); +var ngramsStripHouseNumbersView = require('../../../../query/view/temp_ngrams_strip_housenumbers'); + +module.exports.tests = {}; + +module.exports.tests.interface = function(test, common) { + test('valid interface', function(t) { + t.equal(typeof ngramsStripHouseNumbersView, 'function', 'valid function'); + t.equal(typeof ngramsStripHouseNumbersView.removeHouseNumber, 'function', 'valid function'); + t.end(); + }); +}; + +module.exports.tests.view = function(test, common) { + var view = ngramsStripHouseNumbersView; + test('input:name set', function(t) { + + var vs1 = new peliasQuery.Vars( peliasQuery.defaults ); + vs1.var('input:name').set('101 west 26th street'); + + var compiled = JSON.stringify( view( vs1 ) ); + var expected = '{"match":{"name.default":{"analyzer":"peliasOneEdgeGram","boost":1,"query":"west 26th street"}}}'; + + t.equal(compiled, expected, 'view compiled correctly'); + t.equal(vs1.var('input:name').get(), '101 west 26th street', 'original var not mutated'); + + t.end(); + }); + test('input:name not set', function(t) { + + var vs1 = new peliasQuery.Vars( peliasQuery.defaults ); + t.equal(view(vs1), null, 'view failed compilation due to missing var'); + + t.end(); + }); +}; + +module.exports.tests.removeHouseNumber = function(test, common) { + var rm = ngramsStripHouseNumbersView.removeHouseNumber; + test('removeHouseNumber', function(t) { + + t.equal(rm('101 west 26th street'), 'west 26th street', 'house number removed'); + t.equal(rm('10th avenue'), '10th avenue', 'house number removed'); + + t.equal(rm('123 main st new york ny 10010 US'), 'main st new york ny US', 'also removes postcodes'); + + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('ngrams strip housenumber view: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index 74202992..fe7a4b3e 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -21,6 +21,7 @@ var tests = [ require('./query/autocomplete'), require('./query/reverse'), require('./query/defaults'), + require('./query/view/temp_ngrams_strip_housenumbers'), require('./helper/query_parser'), require('./helper/geojsonify'), require('./helper/outputSchema'),