Browse Source

Merge pull request #542 from pelias/respect_privacy_headers

Respect privacy headers
pull/545/head
Julian Simioni 9 years ago
parent
commit
5b15b0f30a
  1. 2
      app.js
  2. 7
      controller/search.js
  3. 24
      helper/logging.js
  4. 42
      middleware/access_log.js
  5. 4
      middleware/confidenceScore.js
  6. 96
      test/unit/helper/logging.js
  7. 83
      test/unit/middleware/access_log.js
  8. 2
      test/unit/run.js

2
app.js

@ -5,7 +5,7 @@ var peliasConfig = require( 'pelias-config' ).generate().api;
if( peliasConfig.accessLog ){ if( peliasConfig.accessLog ){
app.use( require( './middleware/access_log' )( peliasConfig.accessLog ) ); app.use( require( './middleware/access_log' ).createAccessLogger( peliasConfig.accessLog ) );
} }
/** ----------------------- pre-processing-middleware ----------------------- **/ /** ----------------------- pre-processing-middleware ----------------------- **/

7
controller/search.js

@ -2,6 +2,7 @@ var _ = require('lodash');
var service = { search: require('../service/search') }; var service = { search: require('../service/search') };
var logger = require('pelias-logger').get('api:controller:search'); var logger = require('pelias-logger').get('api:controller:search');
var logging = require( '../helper/logging' );
function setup( backend, query ){ function setup( backend, query ){
@ -16,8 +17,12 @@ function setup( backend, query ){
return next(); return next();
} }
var cleanOutput = _.cloneDeep(req.clean);
if (logging.isDNT(req)) {
cleanOutput = logging.removeFields(cleanOutput);
}
// log clean parameters for stats // log clean parameters for stats
logger.info('[req]', 'endpoint=' + req.path, req.clean); logger.info('[req]', 'endpoint=' + req.path, cleanOutput);
// backend command // backend command
var cmd = { var cmd = {

24
helper/logging.js

@ -0,0 +1,24 @@
var fieldsToRemove = ['text', 'focus.point.lat', 'focus.point.lon',
'boundary.circle.lat', 'boundary.circle.lon', 'point.lat', 'point.lon'];
function isDNT(req) {
if (!req.headers) {
return false;
}
return req.headers.DNT || req.headers.dnt || req.headers.do_not_track;
}
function removeFields(query) {
fieldsToRemove.forEach(function(field) {
if (query[field]) {
query[field] = '[removed]';
}
});
return query;
}
module.exports = {
isDNT: isDNT,
removeFields: removeFields
};

42
middleware/access_log.js

@ -4,11 +4,47 @@
'use strict'; 'use strict';
var url = require( 'url' );
var _ = require( 'lodash' );
var morgan = require( 'morgan' ); var morgan = require( 'morgan' );
var through = require( 'through2' ); var through = require( 'through2' );
var peliasLogger = require( 'pelias-logger' ).get( 'api' ); var peliasLogger = require( 'pelias-logger' ).get( 'api' );
var logging = require( '../helper/logging' );
function customRemoteAddr(req, res) {
if (logging.isDNT(req)) {
return '[IP removed]';
} else {
// from morgan default implementation
return req.ip ||
req._remoteAddress ||
(req.connection && req.connection.remoteAddress) ||
undefined;
}
}
function customURL(req, res) {
var parsedUrl = _.cloneDeep(req._parsedUrl);
parsedUrl.query = _.cloneDeep(req.query);
if (logging.isDNT(req)) {
// strip out sensitive fields in the query object
parsedUrl.query = logging.removeFields(parsedUrl.query);
// search will override the query object when formatting the url
// see https://nodejs.org/api/all.html#all_url_format_urlobj
delete parsedUrl.search;
}
return url.format(parsedUrl);
}
function createAccessLogger( logFormat ){ function createAccessLogger( logFormat ){
morgan.token('remote-addr', customRemoteAddr);
morgan.token('url', customURL);
return morgan( logFormat, { return morgan( logFormat, {
stream: through( function write( ln, _, next ){ stream: through( function write( ln, _, next ){
peliasLogger.info( ln.toString().trim() ); peliasLogger.info( ln.toString().trim() );
@ -17,4 +53,8 @@ function createAccessLogger( logFormat ){
}); });
} }
module.exports = createAccessLogger; module.exports = {
customRemoteAddr: customRemoteAddr,
customURL: customURL,
createAccessLogger: createAccessLogger
};

4
middleware/confidenceScore.js

@ -77,8 +77,6 @@ function computeConfidenceScore(req, mean, stdev, hit) {
hit.confidence /= checkCount; hit.confidence /= checkCount;
hit.confidence = Number((hit.confidence).toFixed(3)); hit.confidence = Number((hit.confidence).toFixed(3));
logger.debug('[confidence]:', hit.confidence, hit.name.default);
return hit; return hit;
} }
@ -102,8 +100,6 @@ function checkForDealBreakers(req, hit) {
if (check.assigned(req.clean.parsed_text.postalcode) && check.assigned(hit.address_parts) && if (check.assigned(req.clean.parsed_text.postalcode) && check.assigned(hit.address_parts) &&
req.clean.parsed_text.postalcode !== hit.address_parts.zip) { req.clean.parsed_text.postalcode !== hit.address_parts.zip) {
logger.debug('[confidence][deal-breaker]: postalcode !== zip (' + req.clean.parsed_text.postalcode +
' !== ' + hit.address_parts.zip + ')');
return true; return true;
} }
} }

96
test/unit/helper/logging.js

@ -0,0 +1,96 @@
var logging = require('../../../helper/logging');
module.exports.tests = {};
module.exports.tests.dnt = function(test) {
test('DNT=1 triggers DNT detection', function(t) {
var req = {
headers: {
DNT: '1'
}
};
t.ok(logging.isDNT(req), 'DNT detected');
t.end();
});
test('DNT=0 triggers DNT detection', function(t) {
// because this is common apparently, although the spec says to do the opposite
// see https://en.wikipedia.org/wiki/Do_Not_Track
var req = {
headers: {
DNT: '0'
}
};
t.ok(logging.isDNT(req), 'DNT detected');
t.end();
});
test('do_not_track header triggers DNT detection', function(t) {
// according to @riordan, some people use this too
var req = {
headers: {
do_not_track: '1'
}
};
t.ok(logging.isDNT(req), 'DNT detected');
t.end();
});
test('no DNT or do_not_track header does not trigger DNT detection', function(t) {
var req = {
headers: {
'Accept-Charset': 'utf-8'
}
};
t.notOk(logging.isDNT(req), 'DNT detected');
t.end();
});
};
module.exports.tests.field_removal = function(test) {
test('removes multiple fields that may have sensitive information', function(t) {
var query = {
text: 'possibly sensitive text',
'point.lat': 'possibly sensitive location info'
};
var cleaned_query = logging.removeFields(query);
var expected = {
text: '[removed]',
'point.lat': '[removed]'
};
t.deepEquals(cleaned_query, expected, 'multiple sensitive fields removed');
t.end();
});
test('non-sensitive fields untouched', function(t) {
var query = {
sources: 'wof,gn'
};
var cleaned_query = logging.removeFields(query);
var expected = {
sources: 'wof,gn'
};
t.deepEquals(cleaned_query, expected, 'non-sensitive fields are not touched');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('logging: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

83
test/unit/middleware/access_log.js

@ -0,0 +1,83 @@
var access_log = require('../../../middleware/access_log');
module.exports.tests = {};
module.exports.tests.customRemoteAddress = function(test) {
test('non-DNT request shows IP in logs', function(t) {
var req = {
ip: '8.8.8.8',
query: '/v1/search?....'
};
var result = access_log.customRemoteAddr(req, {});
t.equals(result, '8.8.8.8', 'IP would be sent to logs');
t.end();
});
test('DNT request does not show IP in logs', function(t) {
var req = {
ip: '8.8.8.8',
query: '/v1/search?....',
headers: {
DNT: 1
}
};
var result = access_log.customRemoteAddr(req, {});
t.equals(result, '[IP removed]', 'IP removed from logs');
t.end();
});
};
module.exports.tests.customURL = function(test) {
test('non-DNT request shows full query in logs', function(t) {
var req = {
ip: '8.8.8.8',
query: {
text: 'london'
},
_parsedUrl: {
pathname: '/v1/search',
path: '/v1/search?text=london'
}
};
var result = access_log.customURL(req, {});
t.equals(result, '/v1/search?text=london', 'query not removed from logs');
t.end();
});
test('DNT request removes sensitive fields from logs', function(t) {
var req = {
ip: '8.8.8.8',
query: {
text: 'london'
},
_parsedUrl: {
pathname: '/v1/search',
path: '/v1/search?text=london'
},
headers: {
DNT: 1
}
};
var result = access_log.customURL(req, {});
t.equals(result, '/v1/search?text=%5Bremoved%5D', 'query has sensitive fields removed');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('[middleware] access_log: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

2
test/unit/run.js

@ -19,8 +19,10 @@ var tests = [
require('./helper/labelGenerator_GBR'), require('./helper/labelGenerator_GBR'),
require('./helper/labelGenerator_USA'), require('./helper/labelGenerator_USA'),
require('./helper/labelSchema'), require('./helper/labelSchema'),
require('./helper/logging'),
require('./helper/type_mapping'), require('./helper/type_mapping'),
require('./helper/sizeCalculator'), require('./helper/sizeCalculator'),
require('./middleware/access_log'),
require('./middleware/confidenceScore'), require('./middleware/confidenceScore'),
require('./middleware/confidenceScoreReverse'), require('./middleware/confidenceScoreReverse'),
require('./middleware/distance'), require('./middleware/distance'),

Loading…
Cancel
Save