From d168698dd61a889dc2cc2a1a961f60a8416e617f Mon Sep 17 00:00:00 2001 From: greenkeeperio-bot Date: Thu, 5 May 2016 12:59:10 -0700 Subject: [PATCH 01/11] chore(package): update proxyquire to version 1.7.7 https://greenkeeper.io/ --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 26ba7808..ba1faa96 100644 --- a/package.json +++ b/package.json @@ -67,7 +67,7 @@ "jshint": "^2.5.6", "nsp": "^2.2.0", "precommit-hook": "^3.0.0", - "proxyquire": "^1.4.0", + "proxyquire": "^1.7.7", "tap-dot": "1.0.5", "tape": "^4.5.1" }, From b9cac7d629a1644038b3cf3a823800272bf8b71e Mon Sep 17 00:00:00 2001 From: greenkeeperio-bot Date: Wed, 11 May 2016 09:38:58 -0700 Subject: [PATCH 02/11] chore(package): update check-types to version 7.0.0 https://greenkeeper.io/ --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ba1faa96..c95a4d7f 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,7 @@ }, "dependencies": { "async": "^1.5.2", - "check-types": "^6.0.0", + "check-types": "^7.0.0", "cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve", "elasticsearch": "^11.0.0", "elasticsearch-exceptions": "0.0.4", From 5293e9692961ae8a85547e340a70d4701f36ebb9 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 12 May 2016 14:50:50 -0400 Subject: [PATCH 03/11] Remove Elasticsearch from engines According to the [docs](https://docs.npmjs.com/files/package.json#engines) only node and NPM versions may be specified here. --- package.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index c95a4d7f..3001ee69 100644 --- a/package.json +++ b/package.json @@ -31,8 +31,7 @@ }, "engines": { "node": ">=0.10.26", - "npm": ">=1.4.3", - "elasticsearch": ">=1.2.1" + "npm": ">=1.4.3" }, "dependencies": { "async": "^1.5.2", From bfd340a0fc9695a0072aa017ef8445b8943a2bd2 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 12 May 2016 14:53:03 -0400 Subject: [PATCH 04/11] Remove NPM engine specification According to the NPM docs on [engine](https://docs.npmjs.com/files/package.json#engines), this specification was only ever advisory, and is now depricated. It seems to cause NPM to pull in a very old version of NPM, which causes warnings when installing dependencies. --- package.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index 3001ee69..26ffa303 100644 --- a/package.json +++ b/package.json @@ -30,8 +30,7 @@ "url": "https://github.com/pelias/api/issues" }, "engines": { - "node": ">=0.10.26", - "npm": ">=1.4.3" + "node": ">=0.10.26" }, "dependencies": { "async": "^1.5.2", From 9e12c650d0540f3ea901f3f54eda9ebbc64bcf00 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 31 Mar 2016 18:34:04 -0400 Subject: [PATCH 05/11] Add logging helper module This module has helper methods for logging. Currently it has methods to: * detect if DNT is enabled * remove sensitive information from input fields for logging --- helper/logging.js | 24 ++++++++++ test/unit/helper/logging.js | 96 +++++++++++++++++++++++++++++++++++++ test/unit/run.js | 1 + 3 files changed, 121 insertions(+) create mode 100644 helper/logging.js create mode 100644 test/unit/helper/logging.js diff --git a/helper/logging.js b/helper/logging.js new file mode 100644 index 00000000..e514ebff --- /dev/null +++ b/helper/logging.js @@ -0,0 +1,24 @@ +var fieldsToRemove = ['text', 'focus.point.lat', 'focus.point.lon', + 'boundary.circle.lat', 'boundary.circle.lon', 'point.lat', 'point.lon']; + +function isDNT(req) { + if (!req.headers) { + return false; + } + return req.headers.DNT || req.headers.dnt || req.headers.do_not_track; +} + +function removeFields(query) { + fieldsToRemove.forEach(function(field) { + if (query[field]) { + query[field] = '[removed]'; + } + }); + + return query; +} + +module.exports = { + isDNT: isDNT, + removeFields: removeFields +}; diff --git a/test/unit/helper/logging.js b/test/unit/helper/logging.js new file mode 100644 index 00000000..90fbfb55 --- /dev/null +++ b/test/unit/helper/logging.js @@ -0,0 +1,96 @@ +var logging = require('../../../helper/logging'); + +module.exports.tests = {}; + +module.exports.tests.dnt = function(test) { + test('DNT=1 triggers DNT detection', function(t) { + var req = { + headers: { + DNT: '1' + } + }; + + t.ok(logging.isDNT(req), 'DNT detected'); + t.end(); + }); + + test('DNT=0 triggers DNT detection', function(t) { + // because this is common apparently, although the spec says to do the opposite + // see https://en.wikipedia.org/wiki/Do_Not_Track + var req = { + headers: { + DNT: '0' + } + }; + + t.ok(logging.isDNT(req), 'DNT detected'); + t.end(); + }); + + test('do_not_track header triggers DNT detection', function(t) { + // according to @riordan, some people use this too + var req = { + headers: { + do_not_track: '1' + } + }; + + t.ok(logging.isDNT(req), 'DNT detected'); + t.end(); + }); + + test('no DNT or do_not_track header does not trigger DNT detection', function(t) { + var req = { + headers: { + 'Accept-Charset': 'utf-8' + } + }; + + t.notOk(logging.isDNT(req), 'DNT detected'); + t.end(); + }); +}; + +module.exports.tests.field_removal = function(test) { + test('removes multiple fields that may have sensitive information', function(t) { + var query = { + text: 'possibly sensitive text', + 'point.lat': 'possibly sensitive location info' + }; + + var cleaned_query = logging.removeFields(query); + + var expected = { + text: '[removed]', + 'point.lat': '[removed]' + }; + + t.deepEquals(cleaned_query, expected, 'multiple sensitive fields removed'); + t.end(); + }); + + test('non-sensitive fields untouched', function(t) { + var query = { + sources: 'wof,gn' + }; + + var cleaned_query = logging.removeFields(query); + + var expected = { + sources: 'wof,gn' + }; + + t.deepEquals(cleaned_query, expected, 'non-sensitive fields are not touched'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('logging: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index 828d99b5..2debcdcc 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -19,6 +19,7 @@ var tests = [ require('./helper/labelGenerator_GBR'), require('./helper/labelGenerator_USA'), require('./helper/labelSchema'), + require('./helper/logging'), require('./helper/type_mapping'), require('./helper/sizeCalculator'), require('./middleware/confidenceScore'), From ecb28bc3e90160b34833d4673d555060ef52f0fe Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 17 Mar 2016 16:49:11 -0400 Subject: [PATCH 06/11] Remove debug statements that print user parameters --- middleware/confidenceScore.js | 4 ---- 1 file changed, 4 deletions(-) diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js index df48b2b1..39ea45b2 100644 --- a/middleware/confidenceScore.js +++ b/middleware/confidenceScore.js @@ -77,8 +77,6 @@ function computeConfidenceScore(req, mean, stdev, hit) { hit.confidence /= checkCount; hit.confidence = Number((hit.confidence).toFixed(3)); - logger.debug('[confidence]:', hit.confidence, hit.name.default); - return hit; } @@ -102,8 +100,6 @@ function checkForDealBreakers(req, hit) { if (check.assigned(req.clean.parsed_text.postalcode) && check.assigned(hit.address_parts) && req.clean.parsed_text.postalcode !== hit.address_parts.zip) { - logger.debug('[confidence][deal-breaker]: postalcode !== zip (' + req.clean.parsed_text.postalcode + - ' !== ' + hit.address_parts.zip + ')'); return true; } } From 57d1c8be1e98921aec9e358c5dfa1dc1432f728b Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Wed, 30 Mar 2016 17:51:31 -0400 Subject: [PATCH 07/11] Return an object from access_log module This allows adding more methods later. --- app.js | 2 +- middleware/access_log.js | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/app.js b/app.js index d05e0365..25573573 100644 --- a/app.js +++ b/app.js @@ -5,7 +5,7 @@ var peliasConfig = require( 'pelias-config' ).generate().api; if( peliasConfig.accessLog ){ - app.use( require( './middleware/access_log' )( peliasConfig.accessLog ) ); + app.use( require( './middleware/access_log' ).createAccessLogger( peliasConfig.accessLog ) ); } /** ----------------------- pre-processing-middleware ----------------------- **/ diff --git a/middleware/access_log.js b/middleware/access_log.js index 812a8849..62d99767 100644 --- a/middleware/access_log.js +++ b/middleware/access_log.js @@ -17,4 +17,6 @@ function createAccessLogger( logFormat ){ }); } -module.exports = createAccessLogger; +module.exports = { + createAccessLogger: createAccessLogger +}; From 23ff51fd57dee34ae16744c500ff651d3f1d59d3 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 17 Mar 2016 16:48:23 -0400 Subject: [PATCH 08/11] Add custom access_log handlers that remove user info These handlers will remove sensitive query info from the logs when DNT is enabled --- controller/search.js | 7 ++- middleware/access_log.js | 38 ++++++++++++++ test/unit/middleware/access_log.js | 83 ++++++++++++++++++++++++++++++ test/unit/run.js | 1 + 4 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 test/unit/middleware/access_log.js diff --git a/controller/search.js b/controller/search.js index e2a8acff..7f7ffb68 100644 --- a/controller/search.js +++ b/controller/search.js @@ -2,6 +2,7 @@ var _ = require('lodash'); var service = { search: require('../service/search') }; var logger = require('pelias-logger').get('api:controller:search'); +var logging = require( '../helper/logging' ); function setup( backend, query ){ @@ -16,8 +17,12 @@ function setup( backend, query ){ return next(); } + var cleanOutput = _.cloneDeep(req.clean); + if (logging.isDNT(req)) { + cleanOutput = logging.removeFields(cleanOutput); + } // log clean parameters for stats - logger.info('[req]', 'endpoint=' + req.path, req.clean); + logger.info('[req]', 'endpoint=' + req.path, cleanOutput); // backend command var cmd = { diff --git a/middleware/access_log.js b/middleware/access_log.js index 62d99767..d87a2e32 100644 --- a/middleware/access_log.js +++ b/middleware/access_log.js @@ -4,11 +4,47 @@ 'use strict'; +var url = require( 'url' ); + +var _ = require( 'lodash' ); var morgan = require( 'morgan' ); var through = require( 'through2' ); + var peliasLogger = require( 'pelias-logger' ).get( 'api' ); +var logging = require( '../helper/logging' ); + +function customRemoteAddr(req, res) { + if (logging.isDNT(req)) { + return '[IP removed]'; + } else { + // from morgan default implementation + return req.ip || + req._remoteAddress || + (req.connection && req.connection.remoteAddress) || + undefined; + } +} + +function customURL(req, res) { + var parsedUrl = _.cloneDeep(req._parsedUrl); + parsedUrl.query = _.cloneDeep(req.query); + + if (logging.isDNT(req)) { + // strip out sensitive fields in the query object + parsedUrl.query = logging.removeFields(parsedUrl.query); + + // search will override the query object when formatting the url + // see https://nodejs.org/api/all.html#all_url_format_urlobj + delete parsedUrl.search; + } + + return url.format(parsedUrl); +} function createAccessLogger( logFormat ){ + morgan.token('remote-addr', customRemoteAddr); + morgan.token('url', customURL); + return morgan( logFormat, { stream: through( function write( ln, _, next ){ peliasLogger.info( ln.toString().trim() ); @@ -18,5 +54,7 @@ function createAccessLogger( logFormat ){ } module.exports = { + customRemoteAddr: customRemoteAddr, + customURL: customURL, createAccessLogger: createAccessLogger }; diff --git a/test/unit/middleware/access_log.js b/test/unit/middleware/access_log.js new file mode 100644 index 00000000..b2926a2d --- /dev/null +++ b/test/unit/middleware/access_log.js @@ -0,0 +1,83 @@ +var access_log = require('../../../middleware/access_log'); + +module.exports.tests = {}; + +module.exports.tests.customRemoteAddress = function(test) { + test('non-DNT request shows IP in logs', function(t) { + var req = { + ip: '8.8.8.8', + query: '/v1/search?....' + }; + + var result = access_log.customRemoteAddr(req, {}); + + t.equals(result, '8.8.8.8', 'IP would be sent to logs'); + t.end(); + }); + + test('DNT request does not show IP in logs', function(t) { + var req = { + ip: '8.8.8.8', + query: '/v1/search?....', + headers: { + DNT: 1 + } + }; + + var result = access_log.customRemoteAddr(req, {}); + + t.equals(result, '[IP removed]', 'IP removed from logs'); + t.end(); + }); +}; + +module.exports.tests.customURL = function(test) { + test('non-DNT request shows full query in logs', function(t) { + var req = { + ip: '8.8.8.8', + query: { + text: 'london' + }, + _parsedUrl: { + pathname: '/v1/search', + path: '/v1/search?text=london' + } + }; + + var result = access_log.customURL(req, {}); + + t.equals(result, '/v1/search?text=london', 'query not removed from logs'); + t.end(); + }); + + test('DNT request removes sensitive fields from logs', function(t) { + var req = { + ip: '8.8.8.8', + query: { + text: 'london' + }, + _parsedUrl: { + pathname: '/v1/search', + path: '/v1/search?text=london' + }, + headers: { + DNT: 1 + } + }; + + var result = access_log.customURL(req, {}); + + t.equals(result, '/v1/search?text=%5Bremoved%5D', 'query has sensitive fields removed'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('[middleware] access_log: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index 2debcdcc..62516bc9 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -22,6 +22,7 @@ var tests = [ require('./helper/logging'), require('./helper/type_mapping'), require('./helper/sizeCalculator'), + require('./middleware/access_log'), require('./middleware/confidenceScore'), require('./middleware/confidenceScoreReverse'), require('./middleware/distance'), From 1435de906df77df63bb049e5d04a09d469e23405 Mon Sep 17 00:00:00 2001 From: Diana Shkolnikov Date: Mon, 16 May 2016 22:09:10 -0400 Subject: [PATCH 09/11] Add source_id property --- helper/geojsonify.js | 5 +++++ test/unit/helper/geojsonify.js | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/helper/geojsonify.js b/helper/geojsonify.js index 666ab60c..b4b57536 100644 --- a/helper/geojsonify.js +++ b/helper/geojsonify.js @@ -48,6 +48,10 @@ function lookupSource(src) { return src.source; } +function lookupSourceId(src) { + return src.source_id; +} + function lookupLayer(src) { return src.layer; } @@ -255,6 +259,7 @@ function addMetaData(src, dst) { dst.gid = makeGid(src); dst.layer = lookupLayer(src); dst.source = lookupSource(src); + dst.source_id = lookupSourceId(src); if (src.hasOwnProperty('bounding_box')) { dst.bounding_box = src.bounding_box; } diff --git a/test/unit/helper/geojsonify.js b/test/unit/helper/geojsonify.js index f8bb9779..6f3581c5 100644 --- a/test/unit/helper/geojsonify.js +++ b/test/unit/helper/geojsonify.js @@ -44,6 +44,7 @@ module.exports.tests.search = function(test, common) { '_id': 'id1', '_type': 'layer1', 'source': 'source1', + 'source_id': 'source_id_1', 'layer': 'layer1', 'center_point': { 'lat': 51.5337144, @@ -73,6 +74,7 @@ module.exports.tests.search = function(test, common) { '_id': 'id2', '_type': 'layer2', 'source': 'source2', + 'source_id': 'source_id_2', 'layer': 'layer2', 'name': { 'default': 'Blues Cafe' @@ -89,12 +91,13 @@ module.exports.tests.search = function(test, common) { 'county': 'Smithfield', 'localadmin': 'test1', 'locality': 'test2', - 'neighbourhood': 'test3', + 'neighbourhood': 'test3' }, { '_id': 'node:34633854', '_type': 'venue', 'source': 'openstreetmap', + 'source_id': 'source_id_3', 'layer': 'venue', 'name': { 'default': 'Empire State Building' @@ -136,6 +139,7 @@ module.exports.tests.search = function(test, common) { 'gid': 'source1:layer1:id1', 'layer': 'layer1', 'source': 'source1', + 'source_id': 'source_id_1', 'label': '\'Round Midnight Jazz and Blues Bar, test2, England, United Kingdom', 'name': '\'Round Midnight Jazz and Blues Bar', 'country_a': 'GBR', @@ -166,6 +170,7 @@ module.exports.tests.search = function(test, common) { 'gid': 'source2:layer2:id2', 'layer': 'layer2', 'source': 'source2', + 'source_id': 'source_id_2', 'label': 'Blues Cafe, test2, England, United Kingdom', 'name': 'Blues Cafe', 'country_a': 'GBR', @@ -193,6 +198,7 @@ module.exports.tests.search = function(test, common) { 'gid': 'openstreetmap:venue:node:34633854', 'layer': 'venue', 'source': 'openstreetmap', + 'source_id': 'source_id_3', 'label': 'Empire State Building, Manhattan, New York, NY, USA', 'name': 'Empire State Building', 'country_a': 'USA', @@ -320,6 +326,7 @@ module.exports.tests.search = function(test, common) { 'gid': 'whosonfirst:neighbourhood:85816607', 'layer': 'neighbourhood', 'source': 'whosonfirst', + 'source_id': '85816607', 'name': 'East New York', 'confidence': 0.888, 'country': 'United States', From ca0b121bdec4ff3d57d198cb373d64c0a2eefda3 Mon Sep 17 00:00:00 2001 From: greenkeeperio-bot Date: Tue, 17 May 2016 14:26:32 -0700 Subject: [PATCH 10/11] chore(package): update pelias-query to version 7.0.0 https://greenkeeper.io/ --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 26ffa303..7c6f5fc7 100644 --- a/package.json +++ b/package.json @@ -52,7 +52,7 @@ "pelias-config": "^1.0.1", "pelias-logger": "^0.0.8", "pelias-model": "^4.0.0", - "pelias-query": "6.3.0", + "pelias-query": "7.0.0", "pelias-suggester-pipeline": "2.0.4", "pelias-text-analyzer": "^1.0.1", "stats-lite": "1.0.3", From d7eacaf59e3f7661f83e2eeb15a6ee8a4f70ba27 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Wed, 18 May 2016 09:31:15 -0400 Subject: [PATCH 11/11] Fix mistaken order of parameters peliasAdmin was supposed to be a parameter to the multi match view, but instead it was being sent to the score view and ignored. Thanks to @trescube for catching this. --- query/search.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query/search.js b/query/search.js index 5689d9ec..75406fe7 100644 --- a/query/search.js +++ b/query/search.js @@ -37,7 +37,7 @@ query.score( peliasQuery.view.address('postcode') ); // multi_match is appropriate. query.score( peliasQuery.view.admin('country_a') ); query.score( peliasQuery.view.admin('region_a') ); -query.score( peliasQuery.view.admin_multi_match(adminFields), 'peliasAdmin' ); +query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') ); // non-scoring hard filters query.filter( peliasQuery.view.boundary_circle );