You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

339 lines
7.6 KiB

var sanitiser = require('../../../sanitiser/_text_addressit');
var type_mapping = require('../../../helper/type_mapping');
module.exports.tests = {};
module.exports.tests.text_parser = function(test, common) {
test('short input text has admin layers set ', function(t) {
var raw = {
text: 'emp' //start of empire state building
};
var clean = {
};
var messages = sanitiser(raw, clean);
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');
t.end();
});
var usQueries = [
{ name: 'soho', admin_parts: 'new york', state: 'NY' },
{ name: '123 main', admin_parts: 'new york', state: 'NY' }
];
usQueries.forEach(function (query) {
test('naive parsing ' + query, function(t) {
var raw = {
text: query.name + ', ' + query.admin_parts
};
var clean = {};
var expected_clean = {
text: query.name + ', ' + query.admin_parts,
parsed_text: {
name: query.name,
regions: [ query.name ],
admin_parts: query.admin_parts,
state: query.state
}
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('naive parsing ' + query + ' without spaces', function(t) {
var raw = {
text: query.name + ',' + query.admin_parts
};
var clean = {};
var expected_clean = {
text: query.name + ',' + query.admin_parts,
parsed_text: {
name: query.name,
regions: [ query.name ],
admin_parts: query.admin_parts,
state: query.state
}
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
});
var nonUSQueries = [
{ name: 'chelsea', admin_parts: 'london' },
];
nonUSQueries.forEach(function (query) {
test('naive parsing ' + query, function(t) {
var raw = {
text: query.name + ', ' + query.admin_parts
};
var clean = {};
var expected_clean = {
text: query.name + ', ' + query.admin_parts,
parsed_text: {
name: query.name,
regions: [ query.name, query.admin_parts ],
admin_parts: query.admin_parts
}
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('naive parsing ' + query + ' without spaces', function(t) {
var raw = {
text: query.name + ',' + query.admin_parts
};
var clean = {};
var expected_clean = {
text: query.name + ',' + query.admin_parts,
parsed_text: {
name: query.name,
regions: [ query.name, query.admin_parts ],
admin_parts: query.admin_parts
}
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
});
test('query with one token', function (t) {
var raw = {
text: 'yugolsavia'
};
var clean = {};
clean.parsed_text = 'this should be removed';
var expected_clean = {
text: 'yugolsavia'
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('query with two tokens, no numbers', function (t) {
var raw = {
text: 'small town'
};
var clean = {};
clean.parsed_text = 'this should be removed';
var expected_clean = {
text: 'small town'
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('query with two tokens, number first', function (t) {
var raw = {
text: '123 main'
};
var clean = {};
clean.parsed_text = 'this should be removed';
var expected_clean = {
text: '123 main'
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('query with two tokens, number second', function (t) {
var raw = {
text: 'main 123'
};
var clean = {};
clean.parsed_text = 'this should be removed';
var expected_clean = {
text: 'main 123'
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('query with many tokens', function(t) {
var raw = {
text: 'main particle new york'
};
var clean = {};
clean.parsed_text = 'this should be removed';
var expected_clean = {
text: 'main particle new york',
parsed_text: {
regions: [ 'main particle' ],
state: 'NY'
}
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('valid address, house number', function(t) {
var raw = {
text: '123 main st new york ny'
};
var clean = {};
var expected_clean = {
text: '123 main st new york ny',
parsed_text: {
number: '123',
street: 'main st',
state: 'NY',
regions: [ 'new york' ]
}
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('valid address, zipcode', function(t) {
var raw = {
text: '123 main st new york ny 10010'
};
var clean = {};
var expected_clean = {
text: '123 main st new york ny 10010',
parsed_text: {
number: '123',
street: 'main st',
state: 'NY',
postalcode: '10010',
regions: [ 'new york' ]
}
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('valid address with leading 0s in zipcode', function(t) {
var raw = {
text: '339 W Main St, Cheshire, 06410'
};
var clean = {};
var expected_clean = {
text: '339 W Main St, Cheshire, 06410',
parsed_text: {
name: '339 W Main St',
number: '339',
street: 'W Main St',
postalcode: '06410',
regions: [ 'Cheshire' ],
admin_parts: 'Cheshire, 06410'
}
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
test('valid address without spaces after commas', function(t) {
var raw = {
text: '339 W Main St,Lancaster,PA'
};
var clean = {};
var expected_clean = {
text: '339 W Main St,Lancaster,PA',
parsed_text: {
name: '339 W Main St',
number: '339',
street: 'W Main St',
state: 'PA',
regions: [ 'Lancaster' ],
admin_parts: 'Lancaster, PA'
}
};
var messages = sanitiser(raw, clean);
t.deepEqual(messages, { errors: [], warnings: [] } );
t.deepEqual(clean, expected_clean);
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('SANITISER _text: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};