From 562338635c826596e95e50fc6959445b3a229fa0 Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Fri, 4 Sep 2015 11:28:19 -0400 Subject: [PATCH 01/23] Confidence score calculation Fixes pelias/pelias#120 --- controller/search.js | 5 +- helper/geojsonify.js | 3 +- middleware/confidenceScore.js | 255 ++++++++++++++++++++++++++++++++++ package.json | 1 + routes/v1.js | 3 + service/search.js | 10 +- test/unit/mock/backend.js | 4 + test/unit/service/search.js | 2 + 8 files changed, 279 insertions(+), 4 deletions(-) create mode 100644 middleware/confidenceScore.js diff --git a/controller/search.js b/controller/search.js index f38091cd..8af6bec4 100644 --- a/controller/search.js +++ b/controller/search.js @@ -26,13 +26,14 @@ function setup( backend, query ){ } // query backend - service.search( backend, cmd, function( err, docs ){ + service.search( backend, cmd, function( err, docs, meta ){ // error handler if( err ){ return next( err ); } req.results = { - data: docs + data: docs, + meta: meta }; next(); diff --git a/helper/geojsonify.js b/helper/geojsonify.js index 5392e957..aa88471e 100644 --- a/helper/geojsonify.js +++ b/helper/geojsonify.js @@ -16,7 +16,8 @@ var DETAILS_PROPS = [ 'county', 'localadmin', 'locality', - 'neighbourhood' + 'neighbourhood', + 'confidence' ]; var META_MAP = { diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js new file mode 100644 index 00000000..ade6f4fe --- /dev/null +++ b/middleware/confidenceScore.js @@ -0,0 +1,255 @@ +/** + * + *Basic confidence score should be computed and returned for each item in the results. + * The score should range between 0-1, and take into consideration as many factors as possible. + * + * Some factors to consider: + * + * - number of results from ES + * - score of item within the range of highest-lowest scores from ES (within the returned set) + * - linguistic match of query + * - detection (or specification) of query type. i.e. an address shouldn't match an admin address. + */ + +var stats = require('stats-lite'); +var logger = require('pelias-logger').get('api'); + +var RELATIVE_SCORES = true; + +function setup(peliasConfig) { + RELATIVE_SCORES = peliasConfig.relativeScores || true; + return computeScores; +} + +function computeScores(req, res, next) { + // do nothing if no result data set + if (!req.results || !req.results.data || !req.results.meta) { + return next(); + } + + // compute standard deviation and mean from all scores + var scores = req.results.meta.scores; + var stdev = computeStandardDeviation(scores); + var mean = stats.mean(scores); + + // loop through data items and determine confidence scores + req.results.data = req.results.data.map(computeConfidenceScore.bind(null, req, mean, stdev)); + + next(); +} + +/** + * Check all types of things to determine how confident we are that this result + * is correct. Score is based on overall score distribution in the result set + * as well as how closely the result matches the input parameters. + * + * @param {object} req + * @param {number} mean + * @param {number} stdev + * @param {object} hit + * @returns {object} + */ +function computeConfidenceScore(req, mean, stdev, hit) { + var dealBreakers = checkForDealBreakers(req, hit); + if (dealBreakers) { + hit.confidence = 0; + return hit; + } + + var checkCount = 3; + hit.confidence = 0; + + if (RELATIVE_SCORES) { + checkCount += 2; + hit.confidence += checkDistanceFromMean(hit._score, mean, stdev); + hit.confidence += computeZScore(hit._score, mean, stdev); + } + hit.confidence += checkName(req.clean.input, req.clean.parsed_input, hit); + hit.confidence += checkQueryType(req.clean.parsed_input, hit); + hit.confidence += checkAddress(req.clean.parsed_input, hit); + + // TODO: look at categories and location + + hit.confidence /= checkCount; + + logger.debug('[confidence]:', hit.confidence, hit.name.default); + + return hit; +} + +function checkForDealBreakers(req, hit) { + if (!req.clean.parsed_input) { + return false; + } + + if (req.clean.parsed_input.state && req.clean.parsed_input.state !== hit.admin1_abbr) { + logger.debug('[confidence][deal-breaker]: state !== admin1_abbr'); + return true; + } + + if (req.clean.parsed_input.postalcode && req.clean.parsed_input.postalcode !== hit.zip) { + logger.debug('[confidence][deal-breaker]: postalcode !== zip'); + return true; + } +} + +/** + * Check how statistically significant the score of this result is + * given mean and standard deviation + * + * @param {number} score + * @param {number} mean + * @param {number} stdev + * @returns {number} + */ +function checkDistanceFromMean(score, mean, stdev) { + return (score - mean) > stdev ? 1 : 0; +} + +/** + * Compare input string or name component of parsed_input against + * default name in result + * + * @param {string} input + * @param {object|undefined} parsed_input + * @param {object} hit + * @returns {number} + */ +function checkName(input, parsed_input, hit) { + // parsed_input name should take precedence if available since it's the cleaner name property + if (parsed_input && parsed_input.name && hit.name.default.toLowerCase() === parsed_input.name.toLowerCase()) { + return 1; + } + + // if no parsed_input check the input value as provided against result's default name + if (hit.name.default.toLowerCase() === input.toLowerCase()) { + return 1; + } + + // if no matches detected, don't judge too harshly since it was a longshot anyway + return 0.7; +} + +/** + * Input being set indicates the query was for an address + * check if house number was specified and found in result + * + * @param {object|undefined} input + * @param {object} hit + * @returns {number} + */ +function checkQueryType(input, hit) { + if (!!input.number && (!hit.address || (hit.address && !hit.address.number))) { + return 0; + } + return 1; +} + +/** + * Determine the quality of the property match + * + * @param {string|number|undefined|null} inputProp + * @param {string|number|undefined|null} hitProp + * @param {boolean} expectEnriched + * @returns {number} + */ +function propMatch(inputProp, hitProp, expectEnriched) { + + // both missing, but expect to have enriched value in result => BAD + if (!inputProp && !hitProp && expectEnriched) { return 0; } + + // both missing, and no enrichment expected => GOOD + if (!inputProp && !hitProp) { return 1; } + + // input has it, result doesn't => BAD + if (inputProp && !hitProp) { return 0; } + + // input missing, result has it, and enrichment is expected => GOOD + if (!inputProp && hitProp && expectEnriched) { return 1; } + + // input missing, result has it, enrichment not desired => 50/50 + if (!inputProp && hitProp) { return 0.5; } + + // both present, values match => GREAT + if (inputProp && hitProp && inputProp.toString().toLowerCase() === hitProp.toString().toLowerCase()) { return 1; } + + // ¯\_(ツ)_/¯ + return 0.7; +} + +/** + * Check various parts of the parsed input address + * against the results + * + * @param {object} input + * @param {string|number} [input.number] + * @param {string} [input.street] + * @param {string} [input.postalcode] + * @param {string} [input.state] + * @param {string} [input.country] + * @param {object} hit + * @param {object} [hit.address] + * @param {string|number} [hit.address.number] + * @param {string} [hit.address.street] + * @param {string|number} [hit.zip] + * @param {string} [hit.admin1_abbr] + * @param {string} [hit.alpha3] + * @returns {number} + */ +function checkAddress(input, hit) { + var checkCount = 5; + var res = 0; + + if (input && input.number && input.street) { + res += propMatch(input.number, (hit.address ? hit.address.number : null), false); + res += propMatch(input.street, (hit.address ? hit.address.street : null), false); + res += propMatch(input.postalcode, (hit.address ? hit.address.zip: null), true); + res += propMatch(input.state, hit.admin1_abbr, true); + res += propMatch(input.country, hit.alpha3, true); + + res /= checkCount; + } + else { + res = 1; + } + + return res; +} + +/** + * z-scores have an effective range of -3.00 to +3.00. + * An average z-score is ZERO. + * A negative z-score indicates that the item/element is below + * average and a positive z-score means that the item/element + * in above average. When teachers say they are going to "curve" + * the test, they do this by computing z-scores for the students' test scores. + * + * @param {number} score + * @param {number} mean + * @param {number} stdev + * @returns {number} + */ +function computeZScore(score, mean, stdev) { + if (stdev < 0.01) { + return 0; + } + // because the effective range of z-scores is -3.00 to +3.00 + // add 10 to ensure a positive value, and then divide by 10+3+3 + // to further normalize to %-like result + return (((score - mean) / (stdev)) + 10) / 16; +} + +/** + * Computes standard deviation given an array of values + * + * @param {Array} scores + * @returns {number} + */ +function computeStandardDeviation(scores) { + var stdev = stats.stdev(scores); + // if stdev is low, just consider it 0 + return (stdev < 0.01) ? 0 : stdev; +} + + +module.exports = setup; \ No newline at end of file diff --git a/package.json b/package.json index 78304ebb..3917f1ad 100644 --- a/package.json +++ b/package.json @@ -51,6 +51,7 @@ "pelias-logger": "^0.0.8", "pelias-schema": "1.0.0", "pelias-suggester-pipeline": "2.0.2", + "stats-lite": "^1.0.3", "through2": "0.6.5" }, "devDependencies": { diff --git a/routes/v1.js b/routes/v1.js index cb3b53e2..f17a9cd1 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -19,6 +19,7 @@ var controllers = { /** ----------------------- controllers ----------------------- **/ var postProc = { + confidenceScores: require('../middleware/confidenceScore'), renamePlacenames: require('../middleware/renamePlacenames'), geocodeJSON: require('../middleware/geocodeJSON'), sendJSON: require('../middleware/sendJSON') @@ -41,6 +42,7 @@ function addRoutes(app, peliasConfig) { search: createRouter([ sanitisers.search.middleware, controllers.search(), + postProc.confidenceScores(peliasConfig), postProc.renamePlacenames(), postProc.geocodeJSON(peliasConfig), postProc.sendJSON @@ -48,6 +50,7 @@ function addRoutes(app, peliasConfig) { reverse: createRouter([ sanitisers.reverse.middleware, controllers.search(undefined, reverseQuery), + // TODO: add confidence scores postProc.renamePlacenames(), postProc.geocodeJSON(peliasConfig), postProc.sendJSON diff --git a/service/search.js b/service/search.js index 15b5cd14..1e77f69f 100644 --- a/service/search.js +++ b/service/search.js @@ -23,20 +23,28 @@ function service( backend, cmd, cb ){ // map returned documents var docs = []; + var meta = { + scores: [] + }; + if( data && data.hits && data.hits.total && Array.isArray(data.hits.hits)){ + docs = data.hits.hits.map( function( hit ){ + meta.scores.push(hit._score); + // map metadata in to _source so we // can serve it up to the consumer hit._source._id = hit._id; hit._source._type = hit._type; + hit._source._score = hit._score; return hit._source; }); } // fire callback - return cb( null, docs ); + return cb( null, docs, meta ); }); } diff --git a/test/unit/mock/backend.js b/test/unit/mock/backend.js index fe617fe7..201ab7b5 100644 --- a/test/unit/mock/backend.js +++ b/test/unit/mock/backend.js @@ -10,6 +10,7 @@ responses['client/search/ok/1'] = function( cmd, cb ){ return cb( undefined, searchEnvelope([{ _id: 'myid1', _type: 'mytype1', + _score: 10, _source: { value: 1, center_point: { lat: 100.1, lon: -50.5 }, @@ -19,6 +20,7 @@ responses['client/search/ok/1'] = function( cmd, cb ){ }, { _id: 'myid2', _type: 'mytype2', + _score: 20, _source: { value: 2, center_point: { lat: 100.2, lon: -51.5 }, @@ -35,6 +37,7 @@ responses['client/mget/ok/1'] = function( cmd, cb ){ return cb( undefined, mgetEnvelope([{ _id: 'myid1', _type: 'mytype1', + _score: 10, found: true, _source: { value: 1, @@ -45,6 +48,7 @@ responses['client/mget/ok/1'] = function( cmd, cb ){ }, { _id: 'myid2', _type: 'mytype2', + _score: 20, found: true, _source: { value: 2, diff --git a/test/unit/service/search.js b/test/unit/service/search.js index e1f97541..a7212775 100644 --- a/test/unit/service/search.js +++ b/test/unit/service/search.js @@ -19,6 +19,7 @@ module.exports.tests.functional_success = function(test, common) { var expected = [ { _id: 'myid1', _type: 'mytype1', + _score: 10, value: 1, center_point: { lat: 100.1, lon: -50.5 }, name: { default: 'test name1' }, @@ -26,6 +27,7 @@ module.exports.tests.functional_success = function(test, common) { }, { _id: 'myid2', _type: 'mytype2', + _score: 20, value: 2, center_point: { lat: 100.2, lon: -51.5 }, name: { default: 'test name2' }, From d9cb96dad22e7df0764b71992b95bd16d0ecf20c Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Fri, 4 Sep 2015 11:39:40 -0400 Subject: [PATCH 02/23] Fix relativeScore config value setting --- middleware/confidenceScore.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js index ade6f4fe..ef27b1e5 100644 --- a/middleware/confidenceScore.js +++ b/middleware/confidenceScore.js @@ -17,7 +17,7 @@ var logger = require('pelias-logger').get('api'); var RELATIVE_SCORES = true; function setup(peliasConfig) { - RELATIVE_SCORES = peliasConfig.relativeScores || true; + RELATIVE_SCORES = peliasConfig.hasOwnProperty('relativeScores') ? peliasConfig.relativeScores : true; return computeScores; } From f7044d2435b5baa343176661a6cdd8238aff152c Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Mon, 7 Sep 2015 22:41:20 -0400 Subject: [PATCH 03/23] Add content for `attribution` static endpoint Fixes #212 --- controller/index.js | 31 ------------------------------- controller/markdownToHtml.js | 28 ++++++++++++++++++++++++++++ middleware/geocodeJSON.js | 8 +------- DOCS.md => public/apiDoc.md | 0 public/attribution.md | 7 +++++++ routes/v1.js | 9 +++++++-- test/unit/controller/index.js | 20 +++++++++++--------- 7 files changed, 54 insertions(+), 49 deletions(-) delete mode 100644 controller/index.js create mode 100644 controller/markdownToHtml.js rename DOCS.md => public/apiDoc.md (100%) create mode 100644 public/attribution.md diff --git a/controller/index.js b/controller/index.js deleted file mode 100644 index d900793f..00000000 --- a/controller/index.js +++ /dev/null @@ -1,31 +0,0 @@ - -var pkg = require('../package'); -var markdown = require('markdown').markdown; -var fs = require('fs'); - -function setup(){ - - var styleString = ''; - var text = '# Pelias API\n'; - text += '### Version: ['+ pkg.version+ '](https://github.com/pelias/api/releases)\n'; - text += fs.readFileSync( './DOCS.md', 'utf8'); - var indexHtml = styleString + markdown.toHTML(text); - - function controller( req, res, next ) { - if (req.accepts('html')) { - res.send(indexHtml); - return; - } - // default behaviour - res.json({ - name: pkg.name, - version: { - number: pkg.version - } - }); - } - - return controller; -} - -module.exports = setup; diff --git a/controller/markdownToHtml.js b/controller/markdownToHtml.js new file mode 100644 index 00000000..656afe01 --- /dev/null +++ b/controller/markdownToHtml.js @@ -0,0 +1,28 @@ + +var markdown = require('markdown').markdown; +var fs = require('fs'); + +function setup(peliasConfig, markdownFile){ + + var styleString = ''; + var text = '# Pelias API\n'; + text += '### Version: [' + peliasConfig.version + '](https://github.com/pelias/api/releases)\n'; + text += fs.readFileSync( markdownFile, 'utf8'); + var html = styleString + markdown.toHTML(text); + + function controller( req, res ) { + if (req.accepts('html')) { + res.send(html); + return; + } + // default behaviour + res.json({ + markdown: text, + html: html + }); + } + + return controller; +} + +module.exports = setup; diff --git a/middleware/geocodeJSON.js b/middleware/geocodeJSON.js index da6cfdf6..d2f58b46 100644 --- a/middleware/geocodeJSON.js +++ b/middleware/geocodeJSON.js @@ -25,16 +25,10 @@ function convertToGeocodeJSON(peliasConfig, req, next) { // the GeocodeJSON spec that is implemented by this instance. req.results.geojson.geocoding.version = '0.1'; - // OPTIONAL. Default: null. The licence of the data. In case of multiple sources, - // and then multiple licences, can be an object with one key by source. - // Can be a freeform text property describing the licensing details. - // Can be a URI on the server, which outlines licensing details. - req.results.geojson.geocoding.license = peliasConfig.host + '/license'; - // OPTIONAL. Default: null. The attribution of the data. In case of multiple sources, // and then multiple attributions, can be an object with one key by source. // Can be a URI on the server, which outlines attribution details. - req.results.geojson.geocoding.attribution = peliasConfig.host + '/attribution'; + req.results.geojson.geocoding.attribution = peliasConfig.host + 'attribution'; // OPTIONAL. Default: null. The query that has been issued to trigger the // search. diff --git a/DOCS.md b/public/apiDoc.md similarity index 100% rename from DOCS.md rename to public/apiDoc.md diff --git a/public/attribution.md b/public/attribution.md new file mode 100644 index 00000000..db9c392e --- /dev/null +++ b/public/attribution.md @@ -0,0 +1,7 @@ +## Attribution +* Geocoding by [Pelias](https://mapzen.com/pelias) from [Mapzen](https://mapzen.com) +* Data from + * [OpenStreetMap](http://www.openstreetmap.org/copyright) © OpenStreetMap contributors under [ODbL](http://opendatacommons.org/licenses/odbl/) + * [Quattroshapes](https://github.com/foursquare/quattroshapes/blob/master/LICENSE.md) under [CC-BY-2.0](https://creativecommons.org/licenses/by/2.0/) + * [GeoNames](http://www.geonames.org/) under [CC-BY-3.0](https://creativecommons.org/licenses/by/2.0/) + * and other sources diff --git a/routes/v1.js b/routes/v1.js index cb3b53e2..37512008 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -1,3 +1,4 @@ +var express = require('express'); var Router = require('express').Router; var reverseQuery = require('../query/reverse'); @@ -11,7 +12,7 @@ var sanitisers = { /** ----------------------- controllers ----------------------- **/ var controllers = { - index: require('../controller/index'), + mdToHTML: require('../controller/markdownToHtml'), place: require('../controller/place'), search: require('../controller/search') }; @@ -36,7 +37,10 @@ function addRoutes(app, peliasConfig) { var routers = { index: createRouter([ - controllers.index() + controllers.mdToHTML(peliasConfig, './public/apiDoc.md') + ]), + attribution: createRouter([ + controllers.mdToHTML(peliasConfig, './public/attribution.md') ]), search: createRouter([ sanitisers.search.middleware, @@ -66,6 +70,7 @@ function addRoutes(app, peliasConfig) { // api root app.get ( base, routers.index ); + app.get ( base + 'attribution', routers.attribution ); app.get ( base + 'place', routers.place ); app.get ( base + 'autocomplete', routers.search ); app.get ( base + 'search', routers.search ); diff --git a/test/unit/controller/index.js b/test/unit/controller/index.js index f59dacd4..c2f10aba 100644 --- a/test/unit/controller/index.js +++ b/test/unit/controller/index.js @@ -1,19 +1,19 @@ -var setup = require('../../../controller/index'); +var setup = require('../../../controller/markdownToHtml'); module.exports.tests = {}; module.exports.tests.interface = function(test, common) { test('valid interface', function(t) { t.equal(typeof setup, 'function', 'setup is a function'); - t.equal(typeof setup(), 'function', 'setup returns a controller'); + t.equal(typeof setup({}, './public/apiDoc.md'), 'function', 'setup returns a controller'); t.end(); }); }; module.exports.tests.info_json = function(test, common) { test('returns server info in json', function(t) { - var controller = setup(); + var controller = setup({}, './public/attribution.md'); var req = { accepts: function (format) { t.equal(format, 'html', 'check for Accepts:html'); @@ -22,9 +22,8 @@ module.exports.tests.info_json = function(test, common) { }; var res = { json: function( json ){ t.equal(typeof json, 'object', 'returns json'); - t.equal(typeof json.name, 'string', 'name'); - t.equal(typeof json.version, 'object', 'version'); - t.equal(typeof json.version.number, 'string', 'version number'); + t.assert(json.hasOwnProperty('markdown'), 'return object contains markdown property'); + t.assert(json.hasOwnProperty('html'), 'return object contains html property'); t.end(); }}; controller( req, res ); @@ -33,21 +32,24 @@ module.exports.tests.info_json = function(test, common) { module.exports.tests.info_html = function(test, common) { test('returns server info in html', function(t) { + var filePath = './foo.md'; var style = ''; var mockText = 'this text should show up in the html content'; var fsMock = { readFileSync: function (path, format) { - t.equal(path, './DOCS.md', 'open DOCS.md file'); + t.equal(path, filePath, 'open specified file'); t.equal(format, 'utf8', 'file format'); return mockText; } }; var proxyquire = require('proxyquire'); - var setup = proxyquire('../../../controller/index', { 'fs': fsMock }); + var setup = proxyquire('../../../controller/markdownToHTML', { 'fs': fsMock }); - var controller = setup(); + var config = { version: '1.1.1' }; + + var controller = setup(config, filePath); var req = { accepts: function () { return true; From 2eccfc5c9a3a474b3df6bef3543194901f15e43b Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Mon, 7 Sep 2015 22:53:16 -0400 Subject: [PATCH 04/23] Fix unit test --- test/unit/controller/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/controller/index.js b/test/unit/controller/index.js index c2f10aba..cffe1030 100644 --- a/test/unit/controller/index.js +++ b/test/unit/controller/index.js @@ -45,7 +45,7 @@ module.exports.tests.info_html = function(test, common) { }; var proxyquire = require('proxyquire'); - var setup = proxyquire('../../../controller/markdownToHTML', { 'fs': fsMock }); + var setup = proxyquire('../../../controller/markdownToHtml', { 'fs': fsMock }); var config = { version: '1.1.1' }; From 66710f0c88c94d0d793f1ceb930d4de0b576c806 Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Tue, 8 Sep 2015 11:56:10 -0400 Subject: [PATCH 05/23] Change magic number for deal breakers to 0.5 --- middleware/confidenceScore.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js index ef27b1e5..6c55bc27 100644 --- a/middleware/confidenceScore.js +++ b/middleware/confidenceScore.js @@ -52,7 +52,7 @@ function computeScores(req, res, next) { function computeConfidenceScore(req, mean, stdev, hit) { var dealBreakers = checkForDealBreakers(req, hit); if (dealBreakers) { - hit.confidence = 0; + hit.confidence = 0.5; return hit; } @@ -252,4 +252,4 @@ function computeStandardDeviation(scores) { } -module.exports = setup; \ No newline at end of file +module.exports = setup; From e71fb7cfd416c3332a4af1dd69d9ef3282b5883f Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 3 Sep 2015 14:34:01 -0400 Subject: [PATCH 06/23] Whitespace --- controller/search.js | 1 - helper/layers.js | 2 +- helper/query_parser.js | 18 +++++++++--------- sanitiser/_id.js | 10 ++++------ sanitiser/_input.js | 11 ++++------- sanitiser/_layers.js | 3 +-- test/unit/helper/query_parser.js | 32 ++++++++++++++++---------------- 7 files changed, 35 insertions(+), 42 deletions(-) diff --git a/controller/search.js b/controller/search.js index 8af6bec4..7e17b0bd 100644 --- a/controller/search.js +++ b/controller/search.js @@ -1,4 +1,3 @@ - var service = { search: require('../service/search') }; function setup( backend, query ){ diff --git a/helper/layers.js b/helper/layers.js index aa625a6e..a5a57d7f 100644 --- a/helper/layers.js +++ b/helper/layers.js @@ -1,6 +1,6 @@ module.exports = function(alias_layers) { // make a copy of the array so, you are not modifying original ref - var layers = alias_layers.slice(0); + var layers = alias_layers.slice(0); // expand aliases var expand_aliases = function(alias, layers, layer_indeces) { diff --git a/helper/query_parser.js b/helper/query_parser.js index 88729c86..7c5ba4b0 100644 --- a/helper/query_parser.js +++ b/helper/query_parser.js @@ -5,7 +5,7 @@ var get_layers = require('../helper/layers'); var delim = ','; module.exports = function(query) { - + var tokenized = query.split(/[ ,]+/); var hasNumber = /\d/.test(query); @@ -17,11 +17,11 @@ module.exports = function(query) { if ( delimIndex !== -1 ) { address.name = query.substring(0, delimIndex); address.admin_parts = query.substring(delimIndex + 1).trim(); - } + } return address; }; - + var getTargetLayersWhenAddressParsingIsNotNecessary = function(query) { var address = {}; // set target_layer if input length <= 3 characters @@ -32,7 +32,7 @@ module.exports = function(query) { // no need to hit address layers if there's only one (or two) token(s) address.target_layer = get_layers(['admin', 'poi']); } - + return address.target_layer ? address : null; }; @@ -49,10 +49,10 @@ module.exports = function(query) { var addressWithAdminParts = getAdminPartsBySplittingOnDelim(query); var addressWithTargetLayers= getTargetLayersWhenAddressParsingIsNotNecessary(query); - var addressWithAddressParts= !addressWithTargetLayers ? getAddressParts(query) : {}; + var addressWithAddressParts= !addressWithTargetLayers ? getAddressParts(query) : {}; - var parsedAddress = extend(addressWithAdminParts, - addressWithTargetLayers, + var parsedAddress = extend(addressWithAdminParts, + addressWithTargetLayers, addressWithAddressParts); var address_parts = [ 'name', @@ -69,7 +69,7 @@ module.exports = function(query) { var parsed_input = {}; - address_parts.forEach(function(part){ + address_parts.forEach(function(part){ if (parsedAddress[part]) { parsed_input[part] = parsedAddress[part]; } @@ -90,4 +90,4 @@ module.exports = function(query) { // regions: parsedAddress.regions, // admin_parts: parsedAddress.admin_parts, // target_layer: parsedAddress.target_layer -// } \ No newline at end of file +// } diff --git a/sanitiser/_id.js b/sanitiser/_id.js index ac2432e6..a8424363 100644 --- a/sanitiser/_id.js +++ b/sanitiser/_id.js @@ -5,7 +5,6 @@ var isObject = require('is-object'); // so, both type and id are required fields. function sanitize( req ){ - req.clean = req.clean || {}; var params = req.query; var indeces = require('../query/indeces'); @@ -30,7 +29,7 @@ function sanitize( req ){ if( params && params.id && params.id.length ){ req.clean.ids = []; params.ids = Array.isArray(params.id) ? params.id : [params.id]; - + // de-dupe params.ids = params.ids.filter(function(item, pos) { return params.ids.indexOf(item) === pos; @@ -38,7 +37,7 @@ function sanitize( req ){ for (var i=0; i0' }; } - + req.clean.input = params.input; req.clean.parsed_input = query_parse(params.input); - return { 'error': false }; - } // export function -module.exports = sanitize; \ No newline at end of file +module.exports = sanitize; diff --git a/sanitiser/_layers.js b/sanitiser/_layers.js index fa44336e..2ad5c42f 100644 --- a/sanitiser/_layers.js +++ b/sanitiser/_layers.js @@ -5,7 +5,6 @@ var isObject = require('is-object'), // validate inputs, convert types and apply defaults function sanitize( req ){ - var clean = req.clean || {}; var params= req.query; @@ -42,7 +41,7 @@ function sanitize( req ){ // pass validated params to next middleware clean.layers = get_layers(layers); req.clean = clean; - + return { 'error': false }; } diff --git a/test/unit/helper/query_parser.js b/test/unit/helper/query_parser.js index bf10b154..c82a31a2 100644 --- a/test/unit/helper/query_parser.js +++ b/test/unit/helper/query_parser.js @@ -67,13 +67,13 @@ module.exports.tests.parse_one_or_more_tokens = function(test, common) { var target_layer = get_layers(['admin', 'poi']); t.equal(typeof address, 'object', 'valid object'); - + if (parse_address) { t.deepEqual(address.regions.join(''), query, 'since query contained a number, it went through address parsing'); } else { - t.deepEqual(address.target_layer, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); + t.deepEqual(address.target_layer, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); } - + t.end(); }); }; @@ -88,32 +88,32 @@ module.exports.tests.parse_one_or_more_tokens = function(test, common) { }; module.exports.tests.parse_address = function(test, common) { - var addresses_nonum = [{ non_street: 'main particle', city: 'new york'}, - { non_street: 'biggg city block' }, + var addresses_nonum = [{ non_street: 'main particle', city: 'new york'}, + { non_street: 'biggg city block' }, { non_street: 'the empire state building' } ]; - var address_with_num = [{ number: 123, street: 'main st', city: 'new york', state: 'ny'}, - { number: 456, street: 'pine ave', city: 'san francisco', state: 'CA'}, + var address_with_num = [{ number: 123, street: 'main st', city: 'new york', state: 'ny'}, + { number: 456, street: 'pine ave', city: 'san francisco', state: 'CA'}, { number: 1980, street: 'house st', city: 'hoboken', state: 'NY'} ]; - var address_with_zip = [{ number: 1, street: 'main st', city: 'new york', state: 'ny', zip: 10010}, - { number: 4, street: 'ape ave', city: 'san diego', state: 'CA', zip: 98970}, + var address_with_zip = [{ number: 1, street: 'main st', city: 'new york', state: 'ny', zip: 10010}, + { number: 4, street: 'ape ave', city: 'san diego', state: 'CA', zip: 98970}, { number: 19, street: 'house dr', city: 'houston', state: 'TX', zip: 79089} ]; var testParse = function(query, hasNumber, hasZip) { - var testcase = 'parse query with ' + (hasNumber ? 'a house number ': 'no house number '); + var testcase = 'parse query with ' + (hasNumber ? 'a house number ': 'no house number '); testcase += 'and ' + (hasZip ? 'a zip ' : 'no zip '); test(testcase, function(t) { var query_string = ''; - for (var k in query) { + for (var k in query) { query_string += ' ' + query[k]; } // remove leading whitespace query_string = query_string.substring(1); - + var address = parser(query_string); var non_address_layer = get_layers(['admin', 'poi']); @@ -126,14 +126,14 @@ module.exports.tests.parse_address = function(test, common) { } if ((hasNumber || hasZip) && query.street) { - t.equal(typeof address.number, 'number', 'valid house number format (' + address.number + ')'); + t.equal(typeof address.number, 'number', 'valid house number format (' + address.number + ')'); t.equal(address.number, query.number, 'correct house number (' + query.number + ')'); - t.equal(typeof address.street, 'string', 'valid street name format (' + address.street + ')'); + t.equal(typeof address.street, 'string', 'valid street name format (' + address.street + ')'); t.equal(address.street, query.street, 'correct street name (' + query.street + ')'); } - + if (hasZip) { - t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')'); + t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')'); t.equal(address.postalcode, query.zip, 'correct postal code (' + query.zip + ')'); } From 82d3a6603c2339a366d1b09fe8e6d82f8cac4b3f Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 3 Sep 2015 14:58:58 -0400 Subject: [PATCH 07/23] Store sanitised types from layers parameter This moves the list of types created by sanitising the layer API parameter from clean.layers to clean.types.from_layers. In subsequent commits, types created from address parsing, and the yet-to-be-implemented source parameter will also live in the clean.types object. This will allow moving logic to set cmd.type out of controllers, and into separate logic that can be a littler smarter. Also, it will no longer require the clean.default_layers_set flag to be passed all around like a nasty global variable. --- controller/search.js | 4 ++-- sanitiser/_layers.js | 4 +++- test/unit/sanitiser/reverse.js | 24 +++++++++++++----------- test/unit/sanitiser/search.js | 24 +++++++++++++----------- test/unit/sanitiser/suggest.js | 2 +- 5 files changed, 32 insertions(+), 26 deletions(-) diff --git a/controller/search.js b/controller/search.js index 7e17b0bd..4371e274 100644 --- a/controller/search.js +++ b/controller/search.js @@ -15,8 +15,8 @@ function setup( backend, query ){ body: query( req.clean ) }; - if (req.clean.layers) { - cmd.type = req.clean.layers; + if (req.clean.types && req.clean.types.from_layers) { + cmd.type = req.clean.types.from_layers; } // set type if input suggests targeting a layer(s) diff --git a/sanitiser/_layers.js b/sanitiser/_layers.js index 2ad5c42f..25e8ba26 100644 --- a/sanitiser/_layers.js +++ b/sanitiser/_layers.js @@ -8,6 +8,8 @@ function sanitize( req ){ var clean = req.clean || {}; var params= req.query; + clean.types = clean.types || {}; + // ensure the input params are a valid object if( !isObject( params ) ){ params = {}; @@ -39,7 +41,7 @@ function sanitize( req ){ } // pass validated params to next middleware - clean.layers = get_layers(layers); + clean.types.from_layers = get_layers(layers); req.clean = clean; return { 'error': false }; diff --git a/test/unit/sanitiser/reverse.js b/test/unit/sanitiser/reverse.js index d2ff8293..d8d666c1 100644 --- a/test/unit/sanitiser/reverse.js +++ b/test/unit/sanitiser/reverse.js @@ -5,8 +5,10 @@ var suggest = require('../../../sanitiser/reverse'), delim = ',', defaultError = 'missing param \'lat\'', defaultClean = { lat:0, - layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', - 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], + types: { + from_layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', + 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], + }, lon: 0, size: 10, details: true, @@ -158,7 +160,7 @@ module.exports.tests.sanitize_details = function(test, common) { module.exports.tests.sanitize_layers = function(test, common) { test('unspecified', function(t) { sanitize({ layers: undefined, input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, defaultClean.layers, 'default layers set'); + t.deepEqual(clean.types.from_layers, defaultClean.types.from_layers, 'default layers set'); t.end(); }); }); @@ -173,21 +175,21 @@ module.exports.tests.sanitize_layers = function(test, common) { test('poi (alias) layer', function(t) { var poi_layers = ['geoname','osmnode','osmway']; sanitize({ layers: 'poi', input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, poi_layers, 'poi layers set'); + t.deepEqual(clean.types.from_layers, poi_layers, 'poi layers set'); t.end(); }); }); test('admin (alias) layer', function(t) { var admin_layers = ['admin0','admin1','admin2','neighborhood','locality','local_admin']; sanitize({ layers: 'admin', input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, admin_layers, 'admin layers set'); + t.deepEqual(clean.types.from_layers, admin_layers, 'admin layers set'); t.end(); }); }); test('address (alias) layer', function(t) { var address_layers = ['osmaddress','openaddresses']; sanitize({ layers: 'address', input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, address_layers, 'address layers set'); + t.deepEqual(clean.types.from_layers, address_layers, 'address layers set'); t.end(); }); }); @@ -195,7 +197,7 @@ module.exports.tests.sanitize_layers = function(test, common) { var poi_layers = ['geoname','osmnode','osmway']; var reg_layers = ['admin0', 'admin1']; sanitize({ layers: 'poi,admin0,admin1', input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, reg_layers.concat(poi_layers), 'poi + regular layers'); + t.deepEqual(clean.types.from_layers, reg_layers.concat(poi_layers), 'poi + regular layers'); t.end(); }); }); @@ -203,7 +205,7 @@ module.exports.tests.sanitize_layers = function(test, common) { var admin_layers = ['admin0','admin1','admin2','neighborhood','locality','local_admin']; var reg_layers = ['geoname', 'osmway']; sanitize({ layers: 'admin,geoname,osmway', input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, reg_layers.concat(admin_layers), 'admin + regular layers set'); + t.deepEqual(clean.types.from_layers, reg_layers.concat(admin_layers), 'admin + regular layers set'); t.end(); }); }); @@ -211,21 +213,21 @@ module.exports.tests.sanitize_layers = function(test, common) { var address_layers = ['osmaddress','openaddresses']; var reg_layers = ['geoname', 'osmway']; sanitize({ layers: 'address,geoname,osmway', input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, reg_layers.concat(address_layers), 'address + regular layers set'); + t.deepEqual(clean.types.from_layers, reg_layers.concat(address_layers), 'address + regular layers set'); t.end(); }); }); test('alias layer plus regular layers (no duplicates)', function(t) { var poi_layers = ['geoname','osmnode','osmway']; sanitize({ layers: 'poi,geoname,osmnode', input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, poi_layers, 'poi layers found (no duplicates)'); + t.deepEqual(clean.types.from_layers, poi_layers, 'poi layers found (no duplicates)'); t.end(); }); }); test('multiple alias layers (no duplicates)', function(t) { var alias_layers = ['geoname','osmnode','osmway','admin0','admin1','admin2','neighborhood','locality','local_admin']; sanitize({ layers: 'poi,admin', input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, alias_layers, 'all layers found (no duplicates)'); + t.deepEqual(clean.types.from_layers, alias_layers, 'all layers found (no duplicates)'); t.end(); }); }); diff --git a/test/unit/sanitiser/search.js b/test/unit/sanitiser/search.js index 702bbcb5..1f37fa13 100644 --- a/test/unit/sanitiser/search.js +++ b/test/unit/sanitiser/search.js @@ -8,8 +8,10 @@ var search = require('../../../sanitiser/search'), delim = ',', defaultError = 'invalid param \'input\': text length, must be >0', defaultClean = { input: 'test', - layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', - 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], + types: { + from_layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', + 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], + }, size: 10, details: true, parsed_input: defaultParsed, @@ -320,7 +322,7 @@ module.exports.tests.sanitize_details = function(test, common) { module.exports.tests.sanitize_layers = function(test, common) { test('unspecified', function(t) { sanitize({ layers: undefined, input: 'test' }, function( err, clean ){ - t.deepEqual(clean.layers, defaultClean.layers, 'default layers set'); + t.deepEqual(clean.types.from_layers, defaultClean.types.from_layers, 'default layers set'); t.end(); }); }); @@ -335,21 +337,21 @@ module.exports.tests.sanitize_layers = function(test, common) { test('poi (alias) layer', function(t) { var poi_layers = ['geoname','osmnode','osmway']; sanitize({ layers: 'poi', input: 'test' }, function( err, clean ){ - t.deepEqual(clean.layers, poi_layers, 'poi layers set'); + t.deepEqual(clean.types.from_layers, poi_layers, 'poi layers set'); t.end(); }); }); test('admin (alias) layer', function(t) { var admin_layers = ['admin0','admin1','admin2','neighborhood','locality','local_admin']; sanitize({ layers: 'admin', input: 'test' }, function( err, clean ){ - t.deepEqual(clean.layers, admin_layers, 'admin layers set'); + t.deepEqual(clean.types.from_layers, admin_layers, 'admin layers set'); t.end(); }); }); test('address (alias) layer', function(t) { var address_layers = ['osmaddress','openaddresses']; sanitize({ layers: 'address', input: 'test' }, function( err, clean ){ - t.deepEqual(clean.layers, address_layers, 'address layers set'); + t.deepEqual(clean.types.from_layers, address_layers, 'address layers set'); t.end(); }); }); @@ -357,7 +359,7 @@ module.exports.tests.sanitize_layers = function(test, common) { var poi_layers = ['geoname','osmnode','osmway']; var reg_layers = ['admin0', 'admin1']; sanitize({ layers: 'poi,admin0,admin1', input: 'test' }, function( err, clean ){ - t.deepEqual(clean.layers, reg_layers.concat(poi_layers), 'poi + regular layers'); + t.deepEqual(clean.types.from_layers, reg_layers.concat(poi_layers), 'poi + regular layers'); t.end(); }); }); @@ -365,7 +367,7 @@ module.exports.tests.sanitize_layers = function(test, common) { var admin_layers = ['admin0','admin1','admin2','neighborhood','locality','local_admin']; var reg_layers = ['geoname', 'osmway']; sanitize({ layers: 'admin,geoname,osmway', input: 'test' }, function( err, clean ){ - t.deepEqual(clean.layers, reg_layers.concat(admin_layers), 'admin + regular layers set'); + t.deepEqual(clean.types.from_layers, reg_layers.concat(admin_layers), 'admin + regular layers set'); t.end(); }); }); @@ -373,21 +375,21 @@ module.exports.tests.sanitize_layers = function(test, common) { var address_layers = ['osmaddress','openaddresses']; var reg_layers = ['geoname', 'osmway']; sanitize({ layers: 'address,geoname,osmway', input: 'test' }, function( err, clean ){ - t.deepEqual(clean.layers, reg_layers.concat(address_layers), 'address + regular layers set'); + t.deepEqual(clean.types.from_layers, reg_layers.concat(address_layers), 'address + regular layers set'); t.end(); }); }); test('alias layer plus regular layers (no duplicates)', function(t) { var poi_layers = ['geoname','osmnode','osmway']; sanitize({ layers: 'poi,geoname,osmnode', input: 'test' }, function( err, clean ){ - t.deepEqual(clean.layers, poi_layers, 'poi layers found (no duplicates)'); + t.deepEqual(clean.types.from_layers, poi_layers, 'poi layers found (no duplicates)'); t.end(); }); }); test('multiple alias layers (no duplicates)', function(t) { var alias_layers = ['geoname','osmnode','osmway','admin0','admin1','admin2','neighborhood','locality','local_admin']; sanitize({ layers: 'poi,admin', input: 'test' }, function( err, clean ){ - t.deepEqual(clean.layers, alias_layers, 'all layers found (no duplicates)'); + t.deepEqual(clean.types.from_layers, alias_layers, 'all layers found (no duplicates)'); t.end(); }); }); diff --git a/test/unit/sanitiser/suggest.js b/test/unit/sanitiser/suggest.js index b1ad1be6..003b44ae 100644 --- a/test/unit/sanitiser/suggest.js +++ b/test/unit/sanitiser/suggest.js @@ -272,7 +272,7 @@ module.exports.tests.sanitize_details = function(test, common) { module.exports.tests.sanitize_layers = function(test, common) { test('unspecified', function(t) { sanitize({ layers: undefined, input: 'test', lat: 0, lon: 0 }, function( err, clean ){ - t.deepEqual(clean.layers, defaultClean.layers, 'default layers set'); + t.deepEqual(clean.types.from_layers, defaultClean.types.from_layers, 'default layers set'); t.end(); }); }); From 1fae622c39d90799440786157ba410d2c3e3bf7f Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 3 Sep 2015 16:15:31 -0400 Subject: [PATCH 08/23] Remove layer configuration based on address parsing This code doesn't seem like it will be triggered very often (due to it comapring space delimited words with comma delimited words from the text field), and also has the potential to cause quite a bit of weird behavior. --- helper/query_parser.js | 11 +++-------- test/unit/helper/query_parser.js | 8 ++------ 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/helper/query_parser.js b/helper/query_parser.js index 7c5ba4b0..ad9d57ac 100644 --- a/helper/query_parser.js +++ b/helper/query_parser.js @@ -37,14 +37,9 @@ module.exports = function(query) { }; var getAddressParts = function(query) { - // address parsing - var address = parser( query ); - // set target_layer if input suggests no address - if (address.text === address.regions.join(' ') && !hasNumber) { - address.target_layer = get_layers(['admin', 'poi']); - } - - return address; + // perform full address parsing + // except on queries so short they obviously can't contain an address + return parser( query ); }; var addressWithAdminParts = getAdminPartsBySplittingOnDelim(query); diff --git a/test/unit/helper/query_parser.js b/test/unit/helper/query_parser.js index c82a31a2..6f0f67cd 100644 --- a/test/unit/helper/query_parser.js +++ b/test/unit/helper/query_parser.js @@ -136,11 +136,7 @@ module.exports.tests.parse_address = function(test, common) { t.equal(typeof address.postalcode, 'number', 'valid zip (' + address.postalcode + ')'); t.equal(address.postalcode, query.zip, 'correct postal code (' + query.zip + ')'); } - - if (address.text === address.regions.join(' ')) { - t.deepEqual(address.target_layer, query.target_layer, 'admin_parts set correctly to ' + query.target_layer.join(', ')); - } - + t.end(); }); }; @@ -165,4 +161,4 @@ module.exports.all = function (tape, common) { for( var testCase in module.exports.tests ){ module.exports.tests[testCase](test, common); } -}; \ No newline at end of file +}; From 9b94a0e4ef665a2190ea4d007df8a0583bc4f832 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 3 Sep 2015 16:22:33 -0400 Subject: [PATCH 09/23] Fix inconsistently named imported module --- sanitiser/_input.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sanitiser/_input.js b/sanitiser/_input.js index d73048a0..c4b03f2c 100644 --- a/sanitiser/_input.js +++ b/sanitiser/_input.js @@ -1,5 +1,5 @@ -var isObject = require('is-object'); -var query_parse= require('../helper/query_parser'); +var isObject = require('is-object'); +var query_parser = require('../helper/query_parser'); // validate inputs, convert types and apply defaults function sanitize( req ){ @@ -21,7 +21,7 @@ function sanitize( req ){ req.clean.input = params.input; - req.clean.parsed_input = query_parse(params.input); + req.clean.parsed_input = query_parser(params.input); return { 'error': false }; } From 0adc2821d2a1e7427da1bb696b6ea45e73dcc308 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 3 Sep 2015 16:24:20 -0400 Subject: [PATCH 10/23] Remove seemingly unhelpful comment --- helper/query_parser.js | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/helper/query_parser.js b/helper/query_parser.js index ad9d57ac..99242a5e 100644 --- a/helper/query_parser.js +++ b/helper/query_parser.js @@ -72,17 +72,3 @@ module.exports = function(query) { return parsed_input; }; - - -// parsed_input = { -// name : parsedAddress.name, -// number : parsedAddress.number, -// street : parsedAddress.street, -// city : parsedAddress.city, -// state : parsedAddress.state, -// country: parsedAddress.country, -// postalcode : parsedAddress.postalcode, -// regions: parsedAddress.regions, -// admin_parts: parsedAddress.admin_parts, -// target_layer: parsedAddress.target_layer -// } From bc8eabe4e61096888f3ec980085a2ddc8313b29e Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 3 Sep 2015 17:21:16 -0400 Subject: [PATCH 11/23] Remove loops and many global checks from sanitiser tests Modifying these sanitiser tests became extremely hard because almost all of them were looping over lots of individual test cases, which places assumptions about the common behavior of potentialy very different test cases, as well as making assertions about huge swaths of output when only a small amount of that output was really under test. Hopefully these changes will make our tests easier to modify, and not really lose any ability to catch bugs. --- test/unit/sanitiser/_input.js | 4 +- test/unit/sanitiser/search.js | 91 +++++++++++++---------------------- 2 files changed, 36 insertions(+), 59 deletions(-) diff --git a/test/unit/sanitiser/_input.js b/test/unit/sanitiser/_input.js index 98eab084..378b8140 100644 --- a/test/unit/sanitiser/_input.js +++ b/test/unit/sanitiser/_input.js @@ -7,7 +7,7 @@ var input = require('../../../sanitiser/_input'), 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], nonAddressLayers = [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', 'locality', 'local_admin' ], - defaultParsed= { target_layer: nonAddressLayers }, + defaultParsed= { }, defaultClean = { input: 'test', layers: allLayers, size: 10, @@ -26,4 +26,4 @@ module.exports = { defaultParsed: defaultParsed, defaultClean : defaultClean, getTargetLayers: getTargetLayers -}; \ No newline at end of file +}; diff --git a/test/unit/sanitiser/search.js b/test/unit/sanitiser/search.js index 1f37fa13..f6c050cf 100644 --- a/test/unit/sanitiser/search.js +++ b/test/unit/sanitiser/search.js @@ -34,13 +34,10 @@ module.exports.tests.interface = function(test, common) { }); }; -module.exports.tests.sanitize_input = function(test, common) { - var inputs = { - invalid: [ '', 100, null, undefined, new Date() ], - valid: [ 'a', 'aa', 'aaaaaaaa' ] - }; +module.exports.tests.sanitize_invalid_input = function(test, common) { test('invalid input', function(t) { - inputs.invalid.forEach( function( input ){ + var invalid = [ '', 100, null, undefined, new Date() ]; + invalid.forEach( function( input ){ sanitize({ input: input }, function( err, clean ){ t.equal(err, 'invalid param \'input\': text length, must be >0', input + ' is an invalid input'); t.equal(clean, undefined, 'clean not set'); @@ -48,16 +45,26 @@ module.exports.tests.sanitize_input = function(test, common) { }); t.end(); }); - test('valid input', function(t) { - inputs.valid.forEach( function( input ){ - sanitize({ input: input }, function( err, clean ){ - var expected = JSON.parse(JSON.stringify( defaultClean )); - expected.input = input; - expected.parsed_input.target_layer = _input.getTargetLayers(input); +}; - t.equal(err, undefined, 'no error'); - t.deepEqual(clean, expected, 'clean set correctly (' + input + ')'); - }); +module.exports.tests.sanitise_valid_input = function(test, common) { + test('valid short input', function(t) { + sanitize({ input: 'a' }, function( err, clean ){ + t.equal(err, undefined, 'no error'); + }); + t.end(); + }); + + test('valid not-quite-as-short input', function(t) { + sanitize({ input: 'aa' }, function( err, clean ){ + t.equal(err, undefined, 'no error'); + }); + t.end(); + }); + + test('valid longer input', function(t) { + sanitize({ input: 'aaaaaaaa' }, function( err, clean ){ + t.equal(err, undefined, 'no error'); }); t.end(); }); @@ -75,8 +82,6 @@ module.exports.tests.sanitize_input_with_delim = function(test, common) { expected.parsed_input = parser(input); t.equal(err, undefined, 'no error'); t.equal(clean.parsed_input.name, expected.parsed_input.name, 'clean name set correctly'); - t.equal(clean.parsed_input.admin_parts, expected.parsed_input.admin_parts, 'clean admin_parts set correctly'); - t.deepEqual(clean, expected, 'clean set correctly (' + input + ')'); }); }); @@ -103,10 +108,8 @@ module.exports.tests.sanitize_lat = function(test, common) { sanitize({ input: 'test', lat: lat, lon: 0 }, function( err, clean ){ var expected = JSON.parse(JSON.stringify( defaultClean )); expected.lat = parseFloat( lat ); - expected.lon = 0; t.equal(err, undefined, 'no error'); - expected.parsed_input = parser('test'); - t.deepEqual(clean, expected, 'clean set correctly (' + lat + ')'); + t.deepEqual(clean.lat, expected.lat, 'clean lat set correctly (' + lat + ')'); }); }); t.end(); @@ -122,10 +125,8 @@ module.exports.tests.sanitize_lon = function(test, common) { sanitize({ input: 'test', lat: 0, lon: lon }, function( err, clean ){ var expected = JSON.parse(JSON.stringify( defaultClean )); expected.lon = parseFloat( lon ); - expected.lat = 0; t.equal(err, undefined, 'no error'); - expected.parsed_input = parser('test'); - t.deepEqual(clean, expected, 'clean set correctly (' + lon + ')'); + t.deepEqual(clean.lon, expected.lon, 'clean set correctly (' + lon + ')'); }); }); t.end(); @@ -133,34 +134,27 @@ module.exports.tests.sanitize_lon = function(test, common) { }; module.exports.tests.sanitize_optional_geo = function(test, common) { - test('no lat/lon', function(t) { + test('no lat/lon', function(t) { sanitize({ input: 'test' }, function( err, clean ){ - var expected = defaultClean; t.equal(err, undefined, 'no error'); t.equal(clean.lat, undefined, 'clean set without lat'); t.equal(clean.lon, undefined, 'clean set without lon'); - expected.parsed_input = parser('test'); - t.deepEqual(clean, expected, 'clean set without lat/lon'); }); t.end(); }); - test('no lat', function(t) { + test('no lat', function(t) { sanitize({ input: 'test', lon: 0 }, function( err, clean ){ - var expected = JSON.parse(JSON.stringify( defaultClean )); - expected.lon = 0; + var expected_lon = 0; t.equal(err, undefined, 'no error'); - expected.parsed_input = parser('test'); - t.deepEqual(clean, expected, 'clean set correctly (without any lat)'); + t.deepEqual(clean.lon, expected_lon, 'clean set correctly (without any lat)'); }); t.end(); }); - test('no lon', function(t) { + test('no lon', function(t) { sanitize({ input: 'test', lat: 0 }, function( err, clean ){ - var expected = JSON.parse(JSON.stringify( defaultClean )); - expected.lat = 0; + var expected_lat = 0; t.equal(err, undefined, 'no error'); - expected.parsed_input = parser('test'); - t.deepEqual(clean, expected, 'clean set correctly (without any lon)'); + t.deepEqual(clean.lat, expected_lat, 'clean set correctly (without any lon)'); }); t.end(); }); @@ -168,8 +162,6 @@ module.exports.tests.sanitize_optional_geo = function(test, common) { module.exports.tests.sanitize_bbox = function(test, common) { var bboxes = { - invalid_coordinates: [ - ], invalid: [ '91;-181,-91,181', // invalid - semicolon between coordinates 'these,are,not,numbers', @@ -196,22 +188,11 @@ module.exports.tests.sanitize_bbox = function(test, common) { ] }; - test('invalid bbox coordinates', function(t) { - bboxes.invalid_coordinates.forEach( function( bbox ){ - sanitize({ input: 'test', bbox: bbox }, function( err, clean ){ - t.ok(err.match(/Invalid (lat|lon)/), bbox + ' is invalid'); - t.equal(clean, undefined, 'clean not set'); - }); - }); - t.end(); - }); test('invalid bbox', function(t) { bboxes.invalid.forEach( function( bbox ){ sanitize({ input: 'test', bbox: bbox }, function( err, clean ){ - var expected = JSON.parse(JSON.stringify( defaultClean )); t.equal(err, undefined, 'no error'); - expected.parsed_input = parser('test'); - t.deepEqual(clean, expected, 'falling back on 50km distance from centroid'); + t.equal(clean.bbox, undefined, 'falling back on 50km distance from centroid'); }); }); t.end(); @@ -219,19 +200,17 @@ module.exports.tests.sanitize_bbox = function(test, common) { test('valid bbox', function(t) { bboxes.valid.forEach( function( bbox ){ sanitize({ input: 'test', bbox: bbox }, function( err, clean ){ - var expected = JSON.parse(JSON.stringify( defaultClean )); var bboxArray = bbox.split(',').map(function(i) { return parseInt(i); }); - expected.bbox = { + var expected_bbox = { right: Math.max(bboxArray[0], bboxArray[2]), top: Math.max(bboxArray[1], bboxArray[3]), left: Math.min(bboxArray[0], bboxArray[2]), bottom: Math.min(bboxArray[1], bboxArray[3]) }; t.equal(err, undefined, 'no error'); - expected.parsed_input = parser('test'); - t.deepEqual(clean, expected, 'clean set correctly (' + bbox + ')'); + t.deepEqual(clean.bbox, expected_bbox, 'clean set correctly (' + bbox + ')'); }); }); t.end(); @@ -422,8 +401,6 @@ module.exports.tests.middleware_success = function(test, common) { var req = { query: { input: 'test' }}; var next = function( message ){ t.equal(message, undefined, 'no error message set'); - req.clean.parsed_input = parser('test'); - t.deepEqual(req.clean, defaultClean); t.end(); }; middleware( req, undefined, next ); From 7a6ac8541bbd1f0acf7bb5c91b5de340d61b4230 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 3 Sep 2015 17:23:57 -0400 Subject: [PATCH 12/23] Separate concerns of address parser The address parser currently does two things: 1.) make some intelligent guesses as to possible admin regions to explicitly search against to improve the quality of results returned 2.) make some intelligent guesses as to when no part of the query needs to search against anything other than admin regions. This somewhat improves the quality of results returned but mostly improves the speed of the Elasticsearch query since it's searching significantly fewer recoords. These two concerns are now split into two separate methods within the query_parser helper module. They are mostly independent today, but don't have to be in the future. --- helper/query_parser.js | 44 ++++++++++++++++---------------- sanitiser/_input.js | 5 +++- test/unit/helper/query_parser.js | 17 +++++++----- test/unit/query/search.js | 6 ++--- test/unit/sanitiser/_input.js | 2 +- test/unit/sanitiser/search.js | 7 +++-- 6 files changed, 45 insertions(+), 36 deletions(-) diff --git a/helper/query_parser.js b/helper/query_parser.js index 99242a5e..dcdbbec6 100644 --- a/helper/query_parser.js +++ b/helper/query_parser.js @@ -1,10 +1,25 @@ var parser = require('addressit'); var extend = require('extend'); -var get_layers = require('../helper/layers'); +var get_layers_helper = require('../helper/layers'); var delim = ','; -module.exports = function(query) { +module.exports = {}; + +module.exports.get_layers = function get_layers(query) { + var tokenized = query.split(/[ ,]+/); + var hasNumber = /\d/.test(query); + + if (query.length <= 3 ) { + // no address parsing required + return get_layers_helper(['admin']); + } else if (tokenized.length === 1 || (tokenized.length < 3 && !hasNumber)) { + // no need to hit address layers if there's only one (or two) token(s) + return get_layers_helper(['admin', 'poi']); + } +}; + +module.exports.get_parsed_address = function get_parsed_address(query) { var tokenized = query.split(/[ ,]+/); var hasNumber = /\d/.test(query); @@ -22,32 +37,18 @@ module.exports = function(query) { return address; }; - var getTargetLayersWhenAddressParsingIsNotNecessary = function(query) { - var address = {}; - // set target_layer if input length <= 3 characters - if (query.length <= 3 ) { - // no address parsing required - address.target_layer = get_layers(['admin']); - } else if (tokenized.length === 1 || (tokenized.length < 3 && !hasNumber)) { - // no need to hit address layers if there's only one (or two) token(s) - address.target_layer = get_layers(['admin', 'poi']); - } - - return address.target_layer ? address : null; - }; - var getAddressParts = function(query) { // perform full address parsing // except on queries so short they obviously can't contain an address - return parser( query ); + if (query.length > 3) { + return parser( query ); + } }; var addressWithAdminParts = getAdminPartsBySplittingOnDelim(query); - var addressWithTargetLayers= getTargetLayersWhenAddressParsingIsNotNecessary(query); - var addressWithAddressParts= !addressWithTargetLayers ? getAddressParts(query) : {}; + var addressWithAddressParts= getAddressParts(query); var parsedAddress = extend(addressWithAdminParts, - addressWithTargetLayers, addressWithAddressParts); var address_parts = [ 'name', @@ -58,8 +59,7 @@ module.exports = function(query) { 'country', 'postalcode', 'regions', - 'admin_parts', - 'target_layer' + 'admin_parts' ]; var parsed_input = {}; diff --git a/sanitiser/_input.js b/sanitiser/_input.js index c4b03f2c..2ec61970 100644 --- a/sanitiser/_input.js +++ b/sanitiser/_input.js @@ -21,7 +21,10 @@ function sanitize( req ){ req.clean.input = params.input; - req.clean.parsed_input = query_parser(params.input); + req.clean.parsed_input = query_parser.get_parsed_address(params.input); + + req.clean.types = req.clean.layers || {}; + req.clean.types.from_address_parsing = query_parser.get_layers(params.input); return { 'error': false }; } diff --git a/test/unit/helper/query_parser.js b/test/unit/helper/query_parser.js index 6f0f67cd..5cb84b83 100644 --- a/test/unit/helper/query_parser.js +++ b/test/unit/helper/query_parser.js @@ -6,7 +6,8 @@ module.exports.tests = {}; module.exports.tests.interface = function(test, common) { test('interface', function(t) { - t.equal(typeof parser, 'function', 'valid function'); + t.equal(typeof parser.get_parsed_address, 'function', 'valid function'); + t.equal(typeof parser.get_layers, 'function', 'valid function'); t.end(); }); }; @@ -17,7 +18,7 @@ module.exports.tests.split_on_comma = function(test, common) { var testParse = function(query) { test('naive parsing ' + query, function(t) { - var address = parser(query); + var address = parser.get_parsed_address(query); var delimIndex = query.indexOf(delim); var name = query.substring(0, delimIndex); var admin_parts = query.substring(delimIndex + 1).trim(); @@ -41,11 +42,12 @@ module.exports.tests.parse_three_chars_or_less = function(test, common) { var testParse = function(query) { test('query length < 3 (' + query + ')', function(t) { - var address = parser(query); + var address = parser.get_parsed_address(query); var target_layer = get_layers(['admin']); + var layers = parser.get_layers(query); t.equal(typeof address, 'object', 'valid object'); - t.deepEqual(address.target_layer, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); + t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); t.end(); }); }; @@ -63,15 +65,16 @@ module.exports.tests.parse_one_or_more_tokens = function(test, common) { var testParse = function(query, parse_address) { test('query with one or more tokens (' + query + ')', function(t) { - var address = parser(query); + var address = parser.get_parsed_address(query); var target_layer = get_layers(['admin', 'poi']); + var layers = parser.get_layers(query); t.equal(typeof address, 'object', 'valid object'); if (parse_address) { t.deepEqual(address.regions.join(''), query, 'since query contained a number, it went through address parsing'); } else { - t.deepEqual(address.target_layer, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); + t.deepEqual(layers, target_layer, 'admin_parts set correctly to ' + target_layer.join(', ')); } t.end(); @@ -114,7 +117,7 @@ module.exports.tests.parse_address = function(test, common) { // remove leading whitespace query_string = query_string.substring(1); - var address = parser(query_string); + var address = parser.get_parsed_address(query_string); var non_address_layer = get_layers(['admin', 'poi']); t.equal(typeof address, 'object', 'valid object for the address ('+query_string+')'); diff --git a/test/unit/query/search.js b/test/unit/query/search.js index f2e6d9dc..c4797f16 100644 --- a/test/unit/query/search.js +++ b/test/unit/query/search.js @@ -274,7 +274,7 @@ module.exports.tests.query = function(test, common) { 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], size: 10, details: true, - parsed_input: parser(address), + parsed_input: parser.get_parsed_address(address), default_layers_set: true }); @@ -476,7 +476,7 @@ module.exports.tests.query = function(test, common) { 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], size: 10, details: true, - parsed_input: parser(partial_address), + parsed_input: parser.get_parsed_address(partial_address), default_layers_set: true }); @@ -644,7 +644,7 @@ module.exports.tests.query = function(test, common) { 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], size: 10, details: true, - parsed_input: parser(partial_address), + parsed_input: parser.get_parsed_address(partial_address), default_layers_set: true }); diff --git a/test/unit/sanitiser/_input.js b/test/unit/sanitiser/_input.js index 378b8140..b6073358 100644 --- a/test/unit/sanitiser/_input.js +++ b/test/unit/sanitiser/_input.js @@ -17,7 +17,7 @@ var input = require('../../../sanitiser/_input'), lon:0 }, getTargetLayers = function(query) { - var address = parser(query); + var address = parser.get_parsed_address(query); return address.target_layer; }; diff --git a/test/unit/sanitiser/search.js b/test/unit/sanitiser/search.js index f6c050cf..e2ccd4ce 100644 --- a/test/unit/sanitiser/search.js +++ b/test/unit/sanitiser/search.js @@ -11,6 +11,8 @@ var search = require('../../../sanitiser/search'), types: { from_layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], + from_address_parsing: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', + 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], }, size: 10, details: true, @@ -79,7 +81,7 @@ module.exports.tests.sanitize_input_with_delim = function(test, common) { var expected = JSON.parse(JSON.stringify( defaultClean )); expected.input = input; - expected.parsed_input = parser(input); + expected.parsed_input = parser.get_parsed_address(input); t.equal(err, undefined, 'no error'); t.equal(clean.parsed_input.name, expected.parsed_input.name, 'clean name set correctly'); @@ -330,7 +332,8 @@ module.exports.tests.sanitize_layers = function(test, common) { test('address (alias) layer', function(t) { var address_layers = ['osmaddress','openaddresses']; sanitize({ layers: 'address', input: 'test' }, function( err, clean ){ - t.deepEqual(clean.types.from_layers, address_layers, 'address layers set'); + t.deepEqual(clean.types.from_layers, address_layers, 'types from layers set'); + t.deepEqual(clean.types.from_address_parser, _input.allLayers, 'address parser uses default layers'); t.end(); }); }); From 258fa4390e3c7bc9f5ab3b80d10c751afe60c77b Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 3 Sep 2015 18:06:40 -0400 Subject: [PATCH 13/23] Move cmd.type setting logic to types helper --- controller/search.js | 13 ++++---- helper/types.js | 15 +++++++++ test/unit/helper/types.js | 67 +++++++++++++++++++++++++++++++++++++++ test/unit/run.js | 1 + 4 files changed, 89 insertions(+), 7 deletions(-) create mode 100644 helper/types.js create mode 100644 test/unit/helper/types.js diff --git a/controller/search.js b/controller/search.js index 4371e274..0ed1f9cd 100644 --- a/controller/search.js +++ b/controller/search.js @@ -1,4 +1,5 @@ var service = { search: require('../service/search') }; +var types = require ( '../helper/types' ); function setup( backend, query ){ @@ -15,13 +16,11 @@ function setup( backend, query ){ body: query( req.clean ) }; - if (req.clean.types && req.clean.types.from_layers) { - cmd.type = req.clean.types.from_layers; - } - - // set type if input suggests targeting a layer(s) - if (req.clean.default_layers_set && req.clean.parsed_input) { - cmd.type = req.clean.parsed_input.target_layer || cmd.type; + // don't directly set cmd.type from types helper to avoid sometimes + // setting cmd.type to undefined (having the key not set is cleaner) + var type = types(req.clean.types); + if (type !== undefined) { + cmd.type = type; } // query backend diff --git a/helper/types.js b/helper/types.js new file mode 100644 index 00000000..30ab74d5 --- /dev/null +++ b/helper/types.js @@ -0,0 +1,15 @@ +var valid_types = require( '../query/indeces' ); + +module.exports = function calculate_types(clean_types) { + if (!clean_types) { + return undefined; + } + + if (clean_types.from_layers) { + return clean_types.from_layers; + } + + if (clean_types.from_address_parser) { + return clean_types.from_address_parser; + } +}; diff --git a/test/unit/helper/types.js b/test/unit/helper/types.js new file mode 100644 index 00000000..1e7d1813 --- /dev/null +++ b/test/unit/helper/types.js @@ -0,0 +1,67 @@ +var types = require('../../../helper/types'); + +var valid_types = require( '../../../query/indeces' ); +module.exports.tests = {}; + +module.exports.tests.no_cleaned_types = function(test, common) { + test('no cleaned types', function(t) { + var actual = types(undefined); + t.equal(actual, undefined, 'all valid types returned for empty input'); + t.end(); + }); + + test('no cleaned types', function(t) { + var cleaned_types = {}; + var actual = types(cleaned_types); + t.equal(actual, undefined, 'all valid types returned for empty input'); + t.end(); + }); +}; + +module.exports.tests.address_parser = function(test, common) { + test('address parser specifies only admin layers', function(t) { + var cleaned_types = { + from_address_parser: ['admin0'] // simplified return value from address parser + }; + var actual = types(cleaned_types); + var expected = ['admin0']; // simplified expected value for all admin layers + t.deepEqual(actual, expected, 'only layers specified by address parser returned'); + t.end(); + }); +}; + +module.exports.tests.layers_parameter = function(test, common) { + test('layers parameter specifies only some layers', function(t) { + var cleaned_types = { + from_layers: ['geonames'] + }; + var actual = types(cleaned_types); + var expected = ['geonames']; + t.deepEqual(actual, expected, 'only types specified by layers parameter returned'); + t.end(); + }); +}; + +module.exports.tests.layers_parameter_and_address_parser = function(test, common) { + test('layers parameter and address parser present', function(t) { + var cleaned_types = { + from_layers: ['geonames'], + from_address_parser: ['admin0'] // simplified return value from address parse + }; + var actual = types(cleaned_types); + var expected = ['geonames']; + t.deepEqual(actual, expected, 'layers parameter overrides address parser completely'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('types: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index 619da765..90a14278 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -19,6 +19,7 @@ var tests = [ require('./helper/geojsonify'), require('./helper/outputSchema'), require('./helper/adminFields'), + require('./helper/types'), ]; tests.map(function(t) { From 6f92189519eff20b86f9f0938e8ada436597323a Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 3 Sep 2015 18:24:25 -0400 Subject: [PATCH 14/23] Remove default_layers_set flag! After refactoring, this flag is no longer needed, as all areas of the code that care about layers do so by setting a key within clean.types, and then the types helper intelligently combines those together later. --- sanitiser/_layers.js | 4 ++-- test/unit/query/search.js | 3 --- test/unit/sanitiser/reverse.js | 4 +--- test/unit/sanitiser/search.js | 5 ----- test/unit/sanitiser/suggest.js | 1 - 5 files changed, 3 insertions(+), 14 deletions(-) diff --git a/sanitiser/_layers.js b/sanitiser/_layers.js index 25e8ba26..be13526a 100644 --- a/sanitiser/_layers.js +++ b/sanitiser/_layers.js @@ -16,9 +16,9 @@ function sanitize( req ){ } // default case (no layers specified in GET params) + // don't even set the from_layers key in this case if('string' !== typeof params.layers || !params.layers.length){ - params.layers = 'poi,admin,address'; // default layers - clean.default_layers_set = true; + return { 'error': false }; } // decide which layers can be queried diff --git a/test/unit/query/search.js b/test/unit/query/search.js index c4797f16..ff678401 100644 --- a/test/unit/query/search.js +++ b/test/unit/query/search.js @@ -275,7 +275,6 @@ module.exports.tests.query = function(test, common) { size: 10, details: true, parsed_input: parser.get_parsed_address(address), - default_layers_set: true }); var expected = { @@ -477,7 +476,6 @@ module.exports.tests.query = function(test, common) { size: 10, details: true, parsed_input: parser.get_parsed_address(partial_address), - default_layers_set: true }); var expected = { @@ -645,7 +643,6 @@ module.exports.tests.query = function(test, common) { size: 10, details: true, parsed_input: parser.get_parsed_address(partial_address), - default_layers_set: true }); var expected = { diff --git a/test/unit/sanitiser/reverse.js b/test/unit/sanitiser/reverse.js index d8d666c1..73e3b381 100644 --- a/test/unit/sanitiser/reverse.js +++ b/test/unit/sanitiser/reverse.js @@ -6,16 +6,14 @@ var suggest = require('../../../sanitiser/reverse'), defaultError = 'missing param \'lat\'', defaultClean = { lat:0, types: { - from_layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', - 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], }, lon: 0, size: 10, details: true, - default_layers_set: true, categories: [] }, sanitize = function(query, cb) { _sanitize({'query':query}, cb); }; +var all_layers = ( '../../query/indeces' ); module.exports.tests = {}; diff --git a/test/unit/sanitiser/search.js b/test/unit/sanitiser/search.js index e2ccd4ce..9bf695e4 100644 --- a/test/unit/sanitiser/search.js +++ b/test/unit/sanitiser/search.js @@ -9,15 +9,10 @@ var search = require('../../../sanitiser/search'), defaultError = 'invalid param \'input\': text length, must be >0', defaultClean = { input: 'test', types: { - from_layers: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', - 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], - from_address_parsing: [ 'geoname', 'osmnode', 'osmway', 'admin0', 'admin1', 'admin2', 'neighborhood', - 'locality', 'local_admin', 'osmaddress', 'openaddresses' ], }, size: 10, details: true, parsed_input: defaultParsed, - default_layers_set: true }, sanitize = function(query, cb) { _sanitize({'query':query}, cb); }; diff --git a/test/unit/sanitiser/suggest.js b/test/unit/sanitiser/suggest.js index 003b44ae..71f2ed63 100644 --- a/test/unit/sanitiser/suggest.js +++ b/test/unit/sanitiser/suggest.js @@ -15,7 +15,6 @@ var suggest = require('../../../sanitiser/suggest'), lat:0, lon:0, parsed_input: defaultParsed, - default_layers_set: true }, sanitize = function(query, cb) { _sanitize({'query':query}, cb); }; From 2ebc9352955e628890f96bbb089c76f314e9e9e2 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Fri, 4 Sep 2015 11:56:48 -0400 Subject: [PATCH 15/23] Rename query/indidces to query/types It didn't really contain a list of indices. --- helper/types.js | 2 +- query/{indeces.js => types.js} | 4 ++-- sanitiser/_id.js | 8 ++++---- sanitiser/_layers.js | 8 ++++---- test/unit/helper/types.js | 2 +- test/unit/query/{indeces.js => types.js} | 10 +++++----- test/unit/run.js | 2 +- test/unit/sanitiser/place.js | 4 ++-- test/unit/sanitiser/reverse.js | 1 - 9 files changed, 20 insertions(+), 21 deletions(-) rename query/{indeces.js => types.js} (89%) rename test/unit/query/{indeces.js => types.js} (63%) diff --git a/helper/types.js b/helper/types.js index 30ab74d5..20443ad9 100644 --- a/helper/types.js +++ b/helper/types.js @@ -1,4 +1,4 @@ -var valid_types = require( '../query/indeces' ); +var valid_types = require( '../query/types' ); module.exports = function calculate_types(clean_types) { if (!clean_types) { diff --git a/query/indeces.js b/query/types.js similarity index 89% rename from query/indeces.js rename to query/types.js index 1fff5961..5f8cfab7 100644 --- a/query/indeces.js +++ b/query/types.js @@ -1,5 +1,5 @@ -// querable indeces +// querable types module.exports = [ 'geoname', @@ -13,4 +13,4 @@ module.exports = [ 'local_admin', 'osmaddress', 'openaddresses' -]; \ No newline at end of file +]; diff --git a/sanitiser/_id.js b/sanitiser/_id.js index a8424363..78ff6b1f 100644 --- a/sanitiser/_id.js +++ b/sanitiser/_id.js @@ -7,7 +7,7 @@ var isObject = require('is-object'); function sanitize( req ){ req.clean = req.clean || {}; var params = req.query; - var indeces = require('../query/indeces'); + var types = require('../query/types'); var delim = ':'; // ensure params is a valid object @@ -55,9 +55,9 @@ function sanitize( req ){ if('string' !== typeof type || !type.length){ return errormessage(thisparam); } - // type text must be one of the indeces - if(indeces.indexOf(type) === -1){ - return errormessage('type', type + ' is invalid. It must be one of these values - [' + indeces.join(', ') + ']'); + // type text must be one of the types + if(types.indexOf(type) === -1){ + return errormessage('type', type + ' is invalid. It must be one of these values - [' + types.join(', ') + ']'); } req.clean.ids.push({ id: id, diff --git a/sanitiser/_layers.js b/sanitiser/_layers.js index be13526a..8feb50e9 100644 --- a/sanitiser/_layers.js +++ b/sanitiser/_layers.js @@ -1,6 +1,6 @@ var isObject = require('is-object'), - indeces = require('../query/indeces'), + types = require('../query/types'), get_layers = require('../helper/layers'); // validate inputs, convert types and apply defaults @@ -23,7 +23,7 @@ function sanitize( req ){ // decide which layers can be queried var alias_layers = ['poi', 'admin', 'address']; - var alias_indeces = indeces.concat(alias_layers); + var alias_types = types.concat(alias_layers); // parse GET params var layers = params.layers.split(',').map( function( layer ){ @@ -32,10 +32,10 @@ function sanitize( req ){ // validate layer names for( var x=0; x0'; }, defaultFormatError = 'invalid: must be of the format type:id for ex: \'geoname:4163334\'', defaultError = 'invalid param \'id\': text length, must be >0', defaultMissingTypeError = function(input) { var type = input.split(delimiter)[0]; - return type + ' is invalid. It must be one of these values - [' + indeces.join(', ') + ']'; }, + return type + ' is invalid. It must be one of these values - [' + types.join(', ') + ']'; }, defaultClean = { ids: [ { id: '123', type: 'geoname' } ], details: true }, sanitize = function(query, cb) { _sanitize({'query':query}, cb); }, inputs = { diff --git a/test/unit/sanitiser/reverse.js b/test/unit/sanitiser/reverse.js index 73e3b381..bf9462b8 100644 --- a/test/unit/sanitiser/reverse.js +++ b/test/unit/sanitiser/reverse.js @@ -13,7 +13,6 @@ var suggest = require('../../../sanitiser/reverse'), categories: [] }, sanitize = function(query, cb) { _sanitize({'query':query}, cb); }; -var all_layers = ( '../../query/indeces' ); module.exports.tests = {}; From 426189e87fac3c45f39e631dfa6cd3f1dc714d0d Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Fri, 4 Sep 2015 12:09:30 -0400 Subject: [PATCH 16/23] Add sources -> types mapping file --- query/sources.js | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 query/sources.js diff --git a/query/sources.js b/query/sources.js new file mode 100644 index 00000000..77be7fb9 --- /dev/null +++ b/query/sources.js @@ -0,0 +1,10 @@ +/* + * Mapping from data sources to type values + */ + +module.exports = { + 'geonames': ['geoname'], + 'openaddresses' : ['openaddresses'], + 'quattroshapes': ['admin0', 'admin1', 'admin2', 'neighborhood', 'locality', 'local_admin'], + 'openstreetmap' : ['osmaddress', 'osmnode', 'osmway'] +}; From 3383fa0321f44713aecc4a45acf21a46cec6d6b1 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Fri, 4 Sep 2015 14:45:37 -0400 Subject: [PATCH 17/23] Add source parameter sanitiser --- sanitiser/_source.js | 44 +++++++++++++ test/unit/run.js | 1 + test/unit/sanitiser/_source.js | 114 +++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+) create mode 100644 sanitiser/_source.js create mode 100644 test/unit/sanitiser/_source.js diff --git a/sanitiser/_source.js b/sanitiser/_source.js new file mode 100644 index 00000000..f8f89a68 --- /dev/null +++ b/sanitiser/_source.js @@ -0,0 +1,44 @@ +var isObject = require('is-object'); +var sources_map = require( '../query/sources' ); +var all_sources = Object.keys(sources_map); + +function sanitize( req ) { + req.clean = req.clean || {}; + var params = req.query; + + req.clean.types = req.clean.types || {}; + + // ensure the input params are a valid object + if( !isObject( params ) ){ + params = {}; + } + + // default case (no layers specified in GET params) + // don't even set the from_layers key in this case + if('string' !== typeof params.source || !params.source.length){ + return { error: false }; + } + + var sources = params.source.split(','); + + var invalid_sources = sources.filter(function(source) { + return all_sources.indexOf(source) === -1; + }); + + if (invalid_sources.length > 0) { + return { + error: true, + msg: '`' + invalid_sources[0] + '` is an invalid source parameter. Valid options: ' + all_sources.join(', ') + }; + } + + var types = sources.reduce(function(acc, source) { + return acc.concat(sources_map[source]); + }, []); + + req.clean.types.from_source = types; + + return { error: false }; +} + +module.exports = sanitize; diff --git a/test/unit/run.js b/test/unit/run.js index 26a22f87..d5d55676 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -8,6 +8,7 @@ var tests = [ require('./controller/search'), require('./service/mget'), require('./service/search'), + require('./sanitiser/_source'), require('./sanitiser/search'), require('./sanitiser/reverse'), require('./sanitiser/place'), diff --git a/test/unit/sanitiser/_source.js b/test/unit/sanitiser/_source.js new file mode 100644 index 00000000..235e18c6 --- /dev/null +++ b/test/unit/sanitiser/_source.js @@ -0,0 +1,114 @@ +var sanitize = require( '../../../sanitiser/_source' ); + +var success_response = { error: false }; + +module.exports.tests = {}; + +module.exports.tests.no_sources = function(test, common) { + test('source is not set', function(t) { + var req = { + query: { } + }; + + var response = sanitize(req); + + t.deepEqual(req.clean.types, {}, 'clean.types should be empty object'); + t.deepEqual(response, success_response, 'no error returned'); + t.end(); + }); + + test('source is empty string', function(t) { + var req = { + query: { + source: '' + } + }; + + var response = sanitize(req); + + t.deepEqual(req.clean.types, {}, 'clean.types should be empty object'); + t.deepEqual(response, success_response, 'no error returned'); + t.end(); + }); +}; + +module.exports.tests.valid_sources = function(test, common) { + test('geonames source', function(t) { + var req = { + query: { + source: 'geonames' + } + }; + + var response = sanitize(req); + + t.deepEqual(req.clean.types, { from_source: ['geoname'] }, 'clean.types should contain from_source entry with geonames'); + t.deepEqual(response, success_response, 'no error returned'); + t.end(); + }); + + test('openstreetmap source', function(t) { + var req = { + query: { + source: 'openstreetmap' + } + }; + var expected_types = { + from_source: ['osmaddress', 'osmnode', 'osmway'] + }; + + var response = sanitize(req); + + t.deepEqual(req.clean.types, expected_types, 'clean.types should contain from_source entry with multiple entries for openstreetmap'); + t.deepEqual(response, success_response, 'no error returned'); + t.end(); + }); + + test('multiple sources', function(t) { + var req = { + query: { + source: 'openstreetmap,openaddresses' + } + }; + var expected_types = { + from_source: ['osmaddress', 'osmnode', 'osmway', 'openaddresses'] + }; + + var response = sanitize(req); + + t.deepEqual(req.clean.types, expected_types, + 'clean.types should contain from_source entry with multiple entries for openstreetmap and openadresses'); + t.deepEqual(response, success_response, 'no error returned'); + t.end(); + }); +}; + +module.exports.tests.invalid_sources = function(test, common) { + test('geonames source', function(t) { + var req = { + query: { + source: 'notasource' + } + }; + var expected_response = { + error: true, + msg: '`notasource` is an invalid source parameter. Valid options: geonames, openaddresses, quattroshapes, openstreetmap' + }; + + var response = sanitize(req); + + t.deepEqual(response, expected_response, 'error with message returned'); + t.deepEqual(req.clean.types, { }, 'clean.types should remain empty'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('SANTIZE _source ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; From b4e7793426a4b7285ed3c6f80b2ffb785137cdfe Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Fri, 4 Sep 2015 14:48:58 -0400 Subject: [PATCH 18/23] Fix reference to geoname This is just in a unit test, so it technically passes, but geonames is not a valid layer option (geoname, singular, is used instead), so it's best to correct. --- test/unit/helper/types.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/unit/helper/types.js b/test/unit/helper/types.js index b05db174..d56f12d2 100644 --- a/test/unit/helper/types.js +++ b/test/unit/helper/types.js @@ -33,10 +33,10 @@ module.exports.tests.address_parser = function(test, common) { module.exports.tests.layers_parameter = function(test, common) { test('layers parameter specifies only some layers', function(t) { var cleaned_types = { - from_layers: ['geonames'] + from_layers: ['geoname'] }; var actual = types(cleaned_types); - var expected = ['geonames']; + var expected = ['geoname']; t.deepEqual(actual, expected, 'only types specified by layers parameter returned'); t.end(); }); @@ -45,11 +45,11 @@ module.exports.tests.layers_parameter = function(test, common) { module.exports.tests.layers_parameter_and_address_parser = function(test, common) { test('layers parameter and address parser present', function(t) { var cleaned_types = { - from_layers: ['geonames'], + from_layers: ['geoname'], from_address_parser: ['admin0'] // simplified return value from address parse }; var actual = types(cleaned_types); - var expected = ['geonames']; + var expected = ['geoname']; t.deepEqual(actual, expected, 'layers parameter overrides address parser completely'); t.end(); }); From 103a52cff5cc257dc6689d4872eb26bba4269aa7 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Fri, 4 Sep 2015 15:22:13 -0400 Subject: [PATCH 19/23] Calculate intersection of types requested by source and layers params --- helper/types.js | 24 ++++++++++++++++++++++-- test/unit/helper/types.js | 29 +++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/helper/types.js b/helper/types.js index 20443ad9..a87a65b8 100644 --- a/helper/types.js +++ b/helper/types.js @@ -1,12 +1,32 @@ var valid_types = require( '../query/types' ); +/** + * Calculate the set-style intersection of two arrays + */ +var intersection = function intersection(set1, set2) { + return set2.filter(function(value) { + return set1.indexOf(value) !== -1; + }); +}; + module.exports = function calculate_types(clean_types) { if (!clean_types) { return undefined; } - if (clean_types.from_layers) { - return clean_types.from_layers; + + if (clean_types.from_layers || clean_types.from_source) { + var types = valid_types; + + if (clean_types.from_layers) { + types = intersection(types, clean_types.from_layers); + } + + if (clean_types.from_source) { + types = intersection(types, clean_types.from_source); + } + + return types; } if (clean_types.from_address_parser) { diff --git a/test/unit/helper/types.js b/test/unit/helper/types.js index d56f12d2..2398c6bb 100644 --- a/test/unit/helper/types.js +++ b/test/unit/helper/types.js @@ -55,6 +55,35 @@ module.exports.tests.layers_parameter_and_address_parser = function(test, common }); }; +module.exports.tests.source_parameter = function(test, common) { + test('source parameter specified', function(t) { + var cleaned_types = { + from_source: ['openaddresses'] + }; + + var actual = types(cleaned_types); + + var expected = ['openaddresses']; + t.deepEqual(actual, expected, 'type parameter set to types specified by source'); + t.end(); + }); +}; + +module.exports.tests.source_and_layers_parameters = function(test, common) { + test('source and layers parameter both specified', function(t) { + var cleaned_types = { + from_source: ['openaddresses'], + from_layers: ['osmaddress', 'openaddresses'] + }; + + var actual = types(cleaned_types); + + var expected = ['openaddresses']; + t.deepEqual(actual, expected, 'type set to intersection of source and layer types'); + t.end(); + }); +}; + module.exports.all = function (tape, common) { function test(name, testFunction) { From ade4ce8b17acc996393d5e1e56b791ff3dafb4c3 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Fri, 4 Sep 2015 15:54:25 -0400 Subject: [PATCH 20/23] Add types middleware This middleware looks at the list of types that will be sent to Elasticsearch, if it's an empty array, it sends an error response before Elasticsearch is even quieried, because Elasticsearch interprets an empty type array as "search anything" rather than the intended "don't search anything". --- controller/search.js | 9 +++------ middleware/_types.js | 27 +++++++++++++++++++++++++++ routes/v1.js | 6 ++++++ sanitiser/search.js | 1 + 4 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 middleware/_types.js diff --git a/controller/search.js b/controller/search.js index 0ed1f9cd..2e8ca7e2 100644 --- a/controller/search.js +++ b/controller/search.js @@ -1,5 +1,4 @@ var service = { search: require('../service/search') }; -var types = require ( '../helper/types' ); function setup( backend, query ){ @@ -16,11 +15,9 @@ function setup( backend, query ){ body: query( req.clean ) }; - // don't directly set cmd.type from types helper to avoid sometimes - // setting cmd.type to undefined (having the key not set is cleaner) - var type = types(req.clean.types); - if (type !== undefined) { - cmd.type = type; + if (req.clean.type !== undefined) { + cmd.type = req.clean.type; + delete req.clean.type; // remove type from clean to avoid clutter } // query backend diff --git a/middleware/_types.js b/middleware/_types.js new file mode 100644 index 00000000..8b12719f --- /dev/null +++ b/middleware/_types.js @@ -0,0 +1,27 @@ +var types_helper = require( '../helper/types' ); + +/** + * Validate the types specified to be searched. + * + * Elasticsearch interprets an empty array of types as "search anything" rather + * than "search nothing", so in the case of an empty array, return an error + * message instead of searching at all. + */ +function middleware(req, res, next) { + var types = types_helper(req.clean.types); + + if (types !== undefined && types.length !== undefined) { + if (types.length === 0) { + var err = 'You have specified both the `source` and `layers` ' + + 'parameters in a combination that will return no results.'; + res.status(400); // 400 Bad Request + return next(err); + } else { + req.clean.type = types; + } + } + + next(); +} + +module.exports = middleware; diff --git a/routes/v1.js b/routes/v1.js index 27bda868..24ecccf2 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -9,6 +9,11 @@ var sanitisers = { reverse: require('../sanitiser/reverse') }; +/** ----------------------- middleware ------------------------ **/ +var middleware = { + types: require('../middleware/_types') +}; + /** ----------------------- controllers ----------------------- **/ var controllers = { @@ -45,6 +50,7 @@ function addRoutes(app, peliasConfig) { ]), search: createRouter([ sanitisers.search.middleware, + middleware.types, controllers.search(), postProc.confidenceScores(peliasConfig), postProc.renamePlacenames(), diff --git a/sanitiser/search.js b/sanitiser/search.js index d226dbd9..17409587 100644 --- a/sanitiser/search.js +++ b/sanitiser/search.js @@ -4,6 +4,7 @@ var _sanitize = require('../sanitiser/_sanitize'), input: require('../sanitiser/_input'), size: require('../sanitiser/_size'), layers: require('../sanitiser/_layers'), + source: require('../sanitiser/_source'), details: require('../sanitiser/_details'), latlonzoom: require('../sanitiser/_geo') }; From d45b09b0b31ba41529d2ed0a8564c503ff0a0d35 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Fri, 4 Sep 2015 16:07:45 -0400 Subject: [PATCH 21/23] Add comments to type helper --- helper/types.js | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/helper/types.js b/helper/types.js index a87a65b8..00a8e5a7 100644 --- a/helper/types.js +++ b/helper/types.js @@ -14,7 +14,9 @@ module.exports = function calculate_types(clean_types) { return undefined; } - + /* the layers and source parameters are cumulative: + * perform a set insersection of their specified types + */ if (clean_types.from_layers || clean_types.from_source) { var types = valid_types; @@ -29,6 +31,10 @@ module.exports = function calculate_types(clean_types) { return types; } + /* + * Type restrictions requested by the address parser should only be used + * if both the source and layers parameters are empty, so do this last + */ if (clean_types.from_address_parser) { return clean_types.from_address_parser; } From 6dbb4f8b08696ce95e696a5bae8743e0d78ba4c0 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Tue, 8 Sep 2015 14:37:33 -0400 Subject: [PATCH 22/23] Expose list of document types sent to Elasticsearch --- controller/search.js | 1 - 1 file changed, 1 deletion(-) diff --git a/controller/search.js b/controller/search.js index 2e8ca7e2..d4ed5c2d 100644 --- a/controller/search.js +++ b/controller/search.js @@ -17,7 +17,6 @@ function setup( backend, query ){ if (req.clean.type !== undefined) { cmd.type = req.clean.type; - delete req.clean.type; // remove type from clean to avoid clutter } // query backend From f6fea21782450f8f521e58de4b7216bd3b875fdc Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Tue, 8 Sep 2015 15:00:42 -0400 Subject: [PATCH 23/23] Use hasOwnProperty instead of comparing against undefined --- controller/search.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/search.js b/controller/search.js index d4ed5c2d..8a61478b 100644 --- a/controller/search.js +++ b/controller/search.js @@ -15,7 +15,7 @@ function setup( backend, query ){ body: query( req.clean ) }; - if (req.clean.type !== undefined) { + if ( req.clean.hasOwnProperty('type') ) { cmd.type = req.clean.type; }