|
|
|
var data = require('../fixture/dedupe_elasticsearch_results');
|
|
|
|
var nonAsciiData = require('../fixture/dedupe_elasticsearch_nonascii_results');
|
|
|
|
var dedupe = require('../../../middleware/dedupe')();
|
|
|
|
|
|
|
|
module.exports.tests = {};
|
|
|
|
|
|
|
|
module.exports.tests.dedupe = function(test, common) {
|
|
|
|
test('filter out duplicates', function(t) {
|
|
|
|
var req = {
|
|
|
|
clean: {
|
|
|
|
text: 'lampeter strasburg high school',
|
|
|
|
size: 100
|
|
|
|
}
|
|
|
|
};
|
|
|
|
var res = {
|
|
|
|
data: data
|
|
|
|
};
|
|
|
|
|
|
|
|
var expectedCount = 8;
|
|
|
|
dedupe(req, res, function () {
|
|
|
|
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
|
|
|
|
t.end();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
test('handle non-ascii gracefully', function(t) {
|
|
|
|
var req = {
|
|
|
|
clean: {
|
|
|
|
size: 100
|
|
|
|
}
|
|
|
|
};
|
|
|
|
var res = {
|
|
|
|
data: nonAsciiData
|
|
|
|
};
|
|
|
|
|
|
|
|
var expectedCount = 4;
|
|
|
|
dedupe(req, res, function () {
|
|
|
|
t.equal(res.data.length, expectedCount, 'none were removed');
|
|
|
|
t.end();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
test('truncate results based on specified size', function(t) {
|
|
|
|
var req = {
|
|
|
|
clean: {
|
|
|
|
text: 'lampeter strasburg high school',
|
|
|
|
size: 3
|
|
|
|
}
|
|
|
|
};
|
|
|
|
var res = {
|
|
|
|
data: data
|
|
|
|
};
|
|
|
|
|
|
|
|
dedupe(req, res, function () {
|
|
|
|
t.equal(res.data.length, req.clean.size, 'results have fewer items than before');
|
|
|
|
t.end();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
|
|
|
module.exports.tests.trump = function(test, common) {
|
|
|
|
test('whosonfirst trumps geonames, replace', function (t) {
|
|
|
|
var req = {
|
|
|
|
clean: {
|
|
|
|
text: 'Lancaster',
|
|
|
|
size: 100
|
|
|
|
}
|
|
|
|
};
|
|
|
|
var res = {
|
|
|
|
data: [
|
|
|
|
{
|
|
|
|
'name': { 'default': 'Lancaster' },
|
|
|
|
'source': 'geonames',
|
|
|
|
'source_id': '123456',
|
|
|
|
'layer': 'locality'
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'name': { 'default': 'Lancaster' },
|
|
|
|
'source': 'whosonfirst',
|
|
|
|
'source_id': '654321',
|
|
|
|
'layer': 'locality'
|
|
|
|
}
|
|
|
|
]
|
|
|
|
};
|
|
|
|
|
|
|
|
var expectedCount = 1;
|
|
|
|
dedupe(req, res, function () {
|
|
|
|
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
|
|
|
|
t.deepEqual(res.data[0].source, 'whosonfirst', 'whosonfirst result won');
|
|
|
|
t.end();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
test('whosonfirst trumps geonames, no replace', function (t) {
|
|
|
|
var req = {
|
|
|
|
clean: {
|
|
|
|
text: 'Lancaster',
|
|
|
|
size: 100
|
|
|
|
}
|
|
|
|
};
|
|
|
|
var res = {
|
|
|
|
data: [
|
|
|
|
{
|
|
|
|
'name': { 'default': 'Lancaster' },
|
|
|
|
'source': 'whosonfirst',
|
|
|
|
'source_id': '123456',
|
|
|
|
'layer': 'locality'
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'name': { 'default': 'Lancaster' },
|
|
|
|
'source': 'geonames',
|
|
|
|
'source_id': '654321',
|
|
|
|
'layer': 'locality'
|
|
|
|
}
|
|
|
|
]
|
|
|
|
};
|
|
|
|
|
|
|
|
var expectedCount = 1;
|
|
|
|
dedupe(req, res, function () {
|
|
|
|
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
|
|
|
|
t.deepEqual(res.data[0].source, 'whosonfirst', 'whosonfirst result won');
|
|
|
|
t.end();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
test('openstreetmap trumps whosonfirst venues', function (t) {
|
|
|
|
var req = {
|
|
|
|
clean: {
|
|
|
|
text: 'Lancaster Dairy Farm',
|
|
|
|
size: 100
|
|
|
|
}
|
|
|
|
};
|
|
|
|
var res = {
|
|
|
|
data: [
|
|
|
|
{
|
|
|
|
'name': { 'default': 'Lancaster Dairy Farm' },
|
|
|
|
'source': 'openstreetmap',
|
|
|
|
'source_id': '123456',
|
|
|
|
'layer': 'venue'
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'name': { 'default': 'Lancaster Dairy Farm' },
|
|
|
|
'source': 'whosonfirst',
|
|
|
|
'source_id': '654321',
|
|
|
|
'layer': 'venue'
|
|
|
|
}
|
|
|
|
]
|
|
|
|
};
|
|
|
|
|
|
|
|
var expectedCount = 1;
|
|
|
|
dedupe(req, res, function () {
|
|
|
|
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
|
|
|
|
t.deepEqual(res.data[0].source, 'openstreetmap', 'openstreetmap result won');
|
|
|
|
t.end();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
test('openaddresses trumps openstreetmap', function (t) {
|
|
|
|
var req = {
|
|
|
|
clean: {
|
|
|
|
text: '100 Main St',
|
|
|
|
size: 100
|
|
|
|
}
|
|
|
|
};
|
|
|
|
var res = {
|
|
|
|
data: [
|
|
|
|
{
|
|
|
|
'name': { 'default': '100 Main St' },
|
|
|
|
'source': 'openstreetmap',
|
|
|
|
'source_id': '123456',
|
|
|
|
'layer': 'address'
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'name': { 'default': '100 Main St' },
|
|
|
|
'source': 'openaddresses',
|
|
|
|
'source_id': '654321',
|
|
|
|
'layer': 'address'
|
|
|
|
}
|
|
|
|
]
|
|
|
|
};
|
|
|
|
|
|
|
|
var expectedCount = 1;
|
|
|
|
dedupe(req, res, function () {
|
|
|
|
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
|
|
|
|
t.deepEqual(res.data[0].source, 'openaddresses', 'openaddresses result won');
|
|
|
|
t.end();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
test('openaddresses with zip trumps openaddresses without zip', function (t) {
|
|
|
|
var req = {
|
|
|
|
clean: {
|
|
|
|
text: '100 Main St',
|
|
|
|
size: 100
|
|
|
|
}
|
|
|
|
};
|
|
|
|
var res = {
|
|
|
|
data: [
|
|
|
|
{
|
|
|
|
'name': { 'default': '100 Main St' },
|
|
|
|
'source': 'openaddresses',
|
|
|
|
'source_id': '123456',
|
|
|
|
'layer': 'address',
|
|
|
|
'address_parts': {}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'name': { 'default': '100 Main St' },
|
|
|
|
'source': 'openaddresses',
|
|
|
|
'source_id': '654321',
|
|
|
|
'layer': 'address',
|
|
|
|
'address_parts': {
|
|
|
|
'zip': '54321'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
};
|
|
|
|
|
|
|
|
var expectedCount = 1;
|
|
|
|
dedupe(req, res, function () {
|
|
|
|
t.equal(res.data.length, expectedCount, 'results have fewer items than before');
|
|
|
|
t.deepEqual(res.data[0].source_id, '654321', 'openaddresses result with zip won');
|
|
|
|
t.end();
|
|
|
|
});
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
|
|
|
module.exports.all = function (tape, common) {
|
|
|
|
|
|
|
|
function test(name, testFunction) {
|
|
|
|
return tape('[middleware] dedupe: ' + name, testFunction);
|
|
|
|
}
|
|
|
|
|
|
|
|
for( var testCase in module.exports.tests ){
|
|
|
|
module.exports.tests[testCase](test, common);
|
|
|
|
}
|
|
|
|
};
|