diff --git a/app.js b/app.js index d05e0365..25573573 100644 --- a/app.js +++ b/app.js @@ -5,7 +5,7 @@ var peliasConfig = require( 'pelias-config' ).generate().api; if( peliasConfig.accessLog ){ - app.use( require( './middleware/access_log' )( peliasConfig.accessLog ) ); + app.use( require( './middleware/access_log' ).createAccessLogger( peliasConfig.accessLog ) ); } /** ----------------------- pre-processing-middleware ----------------------- **/ diff --git a/controller/search.js b/controller/search.js index e2a8acff..7f7ffb68 100644 --- a/controller/search.js +++ b/controller/search.js @@ -2,6 +2,7 @@ var _ = require('lodash'); var service = { search: require('../service/search') }; var logger = require('pelias-logger').get('api:controller:search'); +var logging = require( '../helper/logging' ); function setup( backend, query ){ @@ -16,8 +17,12 @@ function setup( backend, query ){ return next(); } + var cleanOutput = _.cloneDeep(req.clean); + if (logging.isDNT(req)) { + cleanOutput = logging.removeFields(cleanOutput); + } // log clean parameters for stats - logger.info('[req]', 'endpoint=' + req.path, req.clean); + logger.info('[req]', 'endpoint=' + req.path, cleanOutput); // backend command var cmd = { diff --git a/helper/geojsonify.js b/helper/geojsonify.js index 666ab60c..b4b57536 100644 --- a/helper/geojsonify.js +++ b/helper/geojsonify.js @@ -48,6 +48,10 @@ function lookupSource(src) { return src.source; } +function lookupSourceId(src) { + return src.source_id; +} + function lookupLayer(src) { return src.layer; } @@ -255,6 +259,7 @@ function addMetaData(src, dst) { dst.gid = makeGid(src); dst.layer = lookupLayer(src); dst.source = lookupSource(src); + dst.source_id = lookupSourceId(src); if (src.hasOwnProperty('bounding_box')) { dst.bounding_box = src.bounding_box; } diff --git a/helper/logging.js b/helper/logging.js new file mode 100644 index 00000000..e514ebff --- /dev/null +++ b/helper/logging.js @@ -0,0 +1,24 @@ +var fieldsToRemove = ['text', 'focus.point.lat', 'focus.point.lon', + 'boundary.circle.lat', 'boundary.circle.lon', 'point.lat', 'point.lon']; + +function isDNT(req) { + if (!req.headers) { + return false; + } + return req.headers.DNT || req.headers.dnt || req.headers.do_not_track; +} + +function removeFields(query) { + fieldsToRemove.forEach(function(field) { + if (query[field]) { + query[field] = '[removed]'; + } + }); + + return query; +} + +module.exports = { + isDNT: isDNT, + removeFields: removeFields +}; diff --git a/middleware/access_log.js b/middleware/access_log.js index 812a8849..d87a2e32 100644 --- a/middleware/access_log.js +++ b/middleware/access_log.js @@ -4,11 +4,47 @@ 'use strict'; +var url = require( 'url' ); + +var _ = require( 'lodash' ); var morgan = require( 'morgan' ); var through = require( 'through2' ); + var peliasLogger = require( 'pelias-logger' ).get( 'api' ); +var logging = require( '../helper/logging' ); + +function customRemoteAddr(req, res) { + if (logging.isDNT(req)) { + return '[IP removed]'; + } else { + // from morgan default implementation + return req.ip || + req._remoteAddress || + (req.connection && req.connection.remoteAddress) || + undefined; + } +} + +function customURL(req, res) { + var parsedUrl = _.cloneDeep(req._parsedUrl); + parsedUrl.query = _.cloneDeep(req.query); + + if (logging.isDNT(req)) { + // strip out sensitive fields in the query object + parsedUrl.query = logging.removeFields(parsedUrl.query); + + // search will override the query object when formatting the url + // see https://nodejs.org/api/all.html#all_url_format_urlobj + delete parsedUrl.search; + } + + return url.format(parsedUrl); +} function createAccessLogger( logFormat ){ + morgan.token('remote-addr', customRemoteAddr); + morgan.token('url', customURL); + return morgan( logFormat, { stream: through( function write( ln, _, next ){ peliasLogger.info( ln.toString().trim() ); @@ -17,4 +53,8 @@ function createAccessLogger( logFormat ){ }); } -module.exports = createAccessLogger; +module.exports = { + customRemoteAddr: customRemoteAddr, + customURL: customURL, + createAccessLogger: createAccessLogger +}; diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js index df48b2b1..39ea45b2 100644 --- a/middleware/confidenceScore.js +++ b/middleware/confidenceScore.js @@ -77,8 +77,6 @@ function computeConfidenceScore(req, mean, stdev, hit) { hit.confidence /= checkCount; hit.confidence = Number((hit.confidence).toFixed(3)); - logger.debug('[confidence]:', hit.confidence, hit.name.default); - return hit; } @@ -102,8 +100,6 @@ function checkForDealBreakers(req, hit) { if (check.assigned(req.clean.parsed_text.postalcode) && check.assigned(hit.address_parts) && req.clean.parsed_text.postalcode !== hit.address_parts.zip) { - logger.debug('[confidence][deal-breaker]: postalcode !== zip (' + req.clean.parsed_text.postalcode + - ' !== ' + hit.address_parts.zip + ')'); return true; } } diff --git a/package.json b/package.json index 26ba7808..7c6f5fc7 100644 --- a/package.json +++ b/package.json @@ -30,13 +30,11 @@ "url": "https://github.com/pelias/api/issues" }, "engines": { - "node": ">=0.10.26", - "npm": ">=1.4.3", - "elasticsearch": ">=1.2.1" + "node": ">=0.10.26" }, "dependencies": { "async": "^1.5.2", - "check-types": "^6.0.0", + "check-types": "^7.0.0", "cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve", "elasticsearch": "^11.0.0", "elasticsearch-exceptions": "0.0.4", @@ -54,7 +52,7 @@ "pelias-config": "^1.0.1", "pelias-logger": "^0.0.8", "pelias-model": "^4.0.0", - "pelias-query": "6.3.0", + "pelias-query": "7.0.0", "pelias-suggester-pipeline": "2.0.4", "pelias-text-analyzer": "^1.0.1", "stats-lite": "1.0.3", @@ -67,7 +65,7 @@ "jshint": "^2.5.6", "nsp": "^2.2.0", "precommit-hook": "^3.0.0", - "proxyquire": "^1.4.0", + "proxyquire": "^1.7.7", "tap-dot": "1.0.5", "tape": "^4.5.1" }, diff --git a/query/search.js b/query/search.js index 5689d9ec..75406fe7 100644 --- a/query/search.js +++ b/query/search.js @@ -37,7 +37,7 @@ query.score( peliasQuery.view.address('postcode') ); // multi_match is appropriate. query.score( peliasQuery.view.admin('country_a') ); query.score( peliasQuery.view.admin('region_a') ); -query.score( peliasQuery.view.admin_multi_match(adminFields), 'peliasAdmin' ); +query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') ); // non-scoring hard filters query.filter( peliasQuery.view.boundary_circle ); diff --git a/test/unit/helper/geojsonify.js b/test/unit/helper/geojsonify.js index f8bb9779..6f3581c5 100644 --- a/test/unit/helper/geojsonify.js +++ b/test/unit/helper/geojsonify.js @@ -44,6 +44,7 @@ module.exports.tests.search = function(test, common) { '_id': 'id1', '_type': 'layer1', 'source': 'source1', + 'source_id': 'source_id_1', 'layer': 'layer1', 'center_point': { 'lat': 51.5337144, @@ -73,6 +74,7 @@ module.exports.tests.search = function(test, common) { '_id': 'id2', '_type': 'layer2', 'source': 'source2', + 'source_id': 'source_id_2', 'layer': 'layer2', 'name': { 'default': 'Blues Cafe' @@ -89,12 +91,13 @@ module.exports.tests.search = function(test, common) { 'county': 'Smithfield', 'localadmin': 'test1', 'locality': 'test2', - 'neighbourhood': 'test3', + 'neighbourhood': 'test3' }, { '_id': 'node:34633854', '_type': 'venue', 'source': 'openstreetmap', + 'source_id': 'source_id_3', 'layer': 'venue', 'name': { 'default': 'Empire State Building' @@ -136,6 +139,7 @@ module.exports.tests.search = function(test, common) { 'gid': 'source1:layer1:id1', 'layer': 'layer1', 'source': 'source1', + 'source_id': 'source_id_1', 'label': '\'Round Midnight Jazz and Blues Bar, test2, England, United Kingdom', 'name': '\'Round Midnight Jazz and Blues Bar', 'country_a': 'GBR', @@ -166,6 +170,7 @@ module.exports.tests.search = function(test, common) { 'gid': 'source2:layer2:id2', 'layer': 'layer2', 'source': 'source2', + 'source_id': 'source_id_2', 'label': 'Blues Cafe, test2, England, United Kingdom', 'name': 'Blues Cafe', 'country_a': 'GBR', @@ -193,6 +198,7 @@ module.exports.tests.search = function(test, common) { 'gid': 'openstreetmap:venue:node:34633854', 'layer': 'venue', 'source': 'openstreetmap', + 'source_id': 'source_id_3', 'label': 'Empire State Building, Manhattan, New York, NY, USA', 'name': 'Empire State Building', 'country_a': 'USA', @@ -320,6 +326,7 @@ module.exports.tests.search = function(test, common) { 'gid': 'whosonfirst:neighbourhood:85816607', 'layer': 'neighbourhood', 'source': 'whosonfirst', + 'source_id': '85816607', 'name': 'East New York', 'confidence': 0.888, 'country': 'United States', diff --git a/test/unit/helper/logging.js b/test/unit/helper/logging.js new file mode 100644 index 00000000..90fbfb55 --- /dev/null +++ b/test/unit/helper/logging.js @@ -0,0 +1,96 @@ +var logging = require('../../../helper/logging'); + +module.exports.tests = {}; + +module.exports.tests.dnt = function(test) { + test('DNT=1 triggers DNT detection', function(t) { + var req = { + headers: { + DNT: '1' + } + }; + + t.ok(logging.isDNT(req), 'DNT detected'); + t.end(); + }); + + test('DNT=0 triggers DNT detection', function(t) { + // because this is common apparently, although the spec says to do the opposite + // see https://en.wikipedia.org/wiki/Do_Not_Track + var req = { + headers: { + DNT: '0' + } + }; + + t.ok(logging.isDNT(req), 'DNT detected'); + t.end(); + }); + + test('do_not_track header triggers DNT detection', function(t) { + // according to @riordan, some people use this too + var req = { + headers: { + do_not_track: '1' + } + }; + + t.ok(logging.isDNT(req), 'DNT detected'); + t.end(); + }); + + test('no DNT or do_not_track header does not trigger DNT detection', function(t) { + var req = { + headers: { + 'Accept-Charset': 'utf-8' + } + }; + + t.notOk(logging.isDNT(req), 'DNT detected'); + t.end(); + }); +}; + +module.exports.tests.field_removal = function(test) { + test('removes multiple fields that may have sensitive information', function(t) { + var query = { + text: 'possibly sensitive text', + 'point.lat': 'possibly sensitive location info' + }; + + var cleaned_query = logging.removeFields(query); + + var expected = { + text: '[removed]', + 'point.lat': '[removed]' + }; + + t.deepEquals(cleaned_query, expected, 'multiple sensitive fields removed'); + t.end(); + }); + + test('non-sensitive fields untouched', function(t) { + var query = { + sources: 'wof,gn' + }; + + var cleaned_query = logging.removeFields(query); + + var expected = { + sources: 'wof,gn' + }; + + t.deepEquals(cleaned_query, expected, 'non-sensitive fields are not touched'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('logging: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/middleware/access_log.js b/test/unit/middleware/access_log.js new file mode 100644 index 00000000..b2926a2d --- /dev/null +++ b/test/unit/middleware/access_log.js @@ -0,0 +1,83 @@ +var access_log = require('../../../middleware/access_log'); + +module.exports.tests = {}; + +module.exports.tests.customRemoteAddress = function(test) { + test('non-DNT request shows IP in logs', function(t) { + var req = { + ip: '8.8.8.8', + query: '/v1/search?....' + }; + + var result = access_log.customRemoteAddr(req, {}); + + t.equals(result, '8.8.8.8', 'IP would be sent to logs'); + t.end(); + }); + + test('DNT request does not show IP in logs', function(t) { + var req = { + ip: '8.8.8.8', + query: '/v1/search?....', + headers: { + DNT: 1 + } + }; + + var result = access_log.customRemoteAddr(req, {}); + + t.equals(result, '[IP removed]', 'IP removed from logs'); + t.end(); + }); +}; + +module.exports.tests.customURL = function(test) { + test('non-DNT request shows full query in logs', function(t) { + var req = { + ip: '8.8.8.8', + query: { + text: 'london' + }, + _parsedUrl: { + pathname: '/v1/search', + path: '/v1/search?text=london' + } + }; + + var result = access_log.customURL(req, {}); + + t.equals(result, '/v1/search?text=london', 'query not removed from logs'); + t.end(); + }); + + test('DNT request removes sensitive fields from logs', function(t) { + var req = { + ip: '8.8.8.8', + query: { + text: 'london' + }, + _parsedUrl: { + pathname: '/v1/search', + path: '/v1/search?text=london' + }, + headers: { + DNT: 1 + } + }; + + var result = access_log.customURL(req, {}); + + t.equals(result, '/v1/search?text=%5Bremoved%5D', 'query has sensitive fields removed'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('[middleware] access_log: ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index 828d99b5..62516bc9 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -19,8 +19,10 @@ var tests = [ require('./helper/labelGenerator_GBR'), require('./helper/labelGenerator_USA'), require('./helper/labelSchema'), + require('./helper/logging'), require('./helper/type_mapping'), require('./helper/sizeCalculator'), + require('./middleware/access_log'), require('./middleware/confidenceScore'), require('./middleware/confidenceScoreReverse'), require('./middleware/distance'),