mirror of https://github.com/pelias/api.git
Peter Johnson
9 years ago
9 changed files with 197 additions and 4 deletions
@ -0,0 +1,43 @@
|
||||
|
||||
/** |
||||
This is (should be!) only a temporary solution. |
||||
|
||||
It is intended to strip housenumbers from input text BUT |
||||
should only apply to the ngrams analysis and not affect |
||||
the other textual analysis. |
||||
eg: 'phrase' matching should still include the housenumber |
||||
|
||||
This file can go away once the peliasOneEdgeGram and peliasTwoEdgeGram |
||||
analysers have been modified in pelias/schema, but as would require |
||||
a full re-index and (potentially) break backwards compatibily with the |
||||
v0 legacy codebase it, unfortunately, has to wait until that legacy |
||||
service has been fully decomissioned. |
||||
**/ |
||||
|
||||
var peliasQuery = require('pelias-query'); |
||||
|
||||
module.exports = function( vs ){ |
||||
|
||||
// clone the $vs so we can modify this copy without
|
||||
// mutating the 'actual' query variables which get shared
|
||||
// with the other views.
|
||||
var vsClone = new peliasQuery.Vars( vs.export() ); |
||||
|
||||
// set 'input:name' to the result of removeHouseNumber($name);
|
||||
if( vsClone.isset('input:name') ){ |
||||
var nameVar = vsClone.var('input:name'); |
||||
nameVar.set( removeHouseNumber( nameVar.get() ) ); |
||||
} |
||||
|
||||
// run the original ngram view but with the modified input:name' var
|
||||
return peliasQuery.view.ngrams( vsClone ); |
||||
}; |
||||
|
||||
// remove the housenumber
|
||||
// be careful of numeric street names such as '1st street'
|
||||
function removeHouseNumber( name ){ |
||||
return name.replace(/(\d+\s)/g, ''); |
||||
} |
||||
|
||||
// export for testing
|
||||
module.exports.removeHouseNumber = removeHouseNumber; |
@ -0,0 +1,76 @@
|
||||
|
||||
module.exports = { |
||||
'query': { |
||||
'filtered': { |
||||
'query': { |
||||
'bool': { |
||||
'must': [{ |
||||
'match': { |
||||
'name.default': { |
||||
'query': 'main st new york ny US', |
||||
'boost': 1, |
||||
'analyzer': 'peliasOneEdgeGram' |
||||
} |
||||
} |
||||
}], |
||||
'should': [{ |
||||
'match': { |
||||
'phrase.default': { |
||||
'query': '123 main st new york ny 10010 US', |
||||
'analyzer': 'peliasPhrase', |
||||
'type': 'phrase', |
||||
'boost': 1, |
||||
'slop': 2 |
||||
} |
||||
} |
||||
}, |
||||
{ |
||||
'function_score': { |
||||
'query': { |
||||
'filtered': { |
||||
'filter': { |
||||
'exists': { |
||||
'field': 'popularity' |
||||
} |
||||
} |
||||
} |
||||
}, |
||||
'max_boost': 2, |
||||
'score_mode': 'first', |
||||
'boost_mode': 'replace', |
||||
'filter': { |
||||
'or': [ |
||||
{ |
||||
'type': { |
||||
'value': 'admin0' |
||||
} |
||||
}, |
||||
{ |
||||
'type': { |
||||
'value': 'admin1' |
||||
} |
||||
}, |
||||
{ |
||||
'type': { |
||||
'value': 'admin2' |
||||
} |
||||
} |
||||
] |
||||
}, |
||||
'functions': [{ |
||||
'field_value_factor': { |
||||
'modifier': 'sqrt', |
||||
'field': 'popularity' |
||||
}, |
||||
'weight': 1 |
||||
}] |
||||
} |
||||
}] |
||||
} |
||||
} |
||||
} |
||||
}, |
||||
'sort': [ '_score' ], |
||||
'size': 10, |
||||
'track_scores': true |
||||
}; |
@ -0,0 +1,60 @@
|
||||
var peliasQuery = require('pelias-query'); |
||||
var ngramsStripHouseNumbersView = require('../../../../query/view/temp_ngrams_strip_housenumbers'); |
||||
|
||||
module.exports.tests = {}; |
||||
|
||||
module.exports.tests.interface = function(test, common) { |
||||
test('valid interface', function(t) { |
||||
t.equal(typeof ngramsStripHouseNumbersView, 'function', 'valid function'); |
||||
t.equal(typeof ngramsStripHouseNumbersView.removeHouseNumber, 'function', 'valid function'); |
||||
t.end(); |
||||
}); |
||||
}; |
||||
|
||||
module.exports.tests.view = function(test, common) { |
||||
var view = ngramsStripHouseNumbersView; |
||||
test('input:name set', function(t) { |
||||
|
||||
var vs1 = new peliasQuery.Vars( peliasQuery.defaults ); |
||||
vs1.var('input:name').set('101 west 26th street'); |
||||
|
||||
var compiled = JSON.stringify( view( vs1 ) ); |
||||
var expected = '{"match":{"name.default":{"analyzer":"peliasOneEdgeGram","boost":1,"query":"west 26th street"}}}'; |
||||
|
||||
t.equal(compiled, expected, 'view compiled correctly'); |
||||
t.equal(vs1.var('input:name').get(), '101 west 26th street', 'original var not mutated'); |
||||
|
||||
t.end(); |
||||
}); |
||||
test('input:name not set', function(t) { |
||||
|
||||
var vs1 = new peliasQuery.Vars( peliasQuery.defaults ); |
||||
t.equal(view(vs1), null, 'view failed compilation due to missing var'); |
||||
|
||||
t.end(); |
||||
}); |
||||
}; |
||||
|
||||
module.exports.tests.removeHouseNumber = function(test, common) { |
||||
var rm = ngramsStripHouseNumbersView.removeHouseNumber; |
||||
test('removeHouseNumber', function(t) { |
||||
|
||||
t.equal(rm('101 west 26th street'), 'west 26th street', 'house number removed'); |
||||
t.equal(rm('10th avenue'), '10th avenue', 'house number removed'); |
||||
|
||||
t.equal(rm('123 main st new york ny 10010 US'), 'main st new york ny US', 'also removes postcodes'); |
||||
|
||||
t.end(); |
||||
}); |
||||
}; |
||||
|
||||
module.exports.all = function (tape, common) { |
||||
|
||||
function test(name, testFunction) { |
||||
return tape('ngrams strip housenumber view: ' + name, testFunction); |
||||
} |
||||
|
||||
for( var testCase in module.exports.tests ){ |
||||
module.exports.tests[testCase](test, common); |
||||
} |
||||
}; |
Loading…
Reference in new issue