Browse Source

Merge pull request #545 from pelias/master

Merge master into staging
pull/550/head
Diana Shkolnikov 9 years ago
parent
commit
8f3947b887
  1. 2
      app.js
  2. 7
      controller/search.js
  3. 5
      helper/geojsonify.js
  4. 24
      helper/logging.js
  5. 42
      middleware/access_log.js
  6. 4
      middleware/confidenceScore.js
  7. 10
      package.json
  8. 2
      query/search.js
  9. 9
      test/unit/helper/geojsonify.js
  10. 96
      test/unit/helper/logging.js
  11. 83
      test/unit/middleware/access_log.js
  12. 2
      test/unit/run.js

2
app.js

@ -5,7 +5,7 @@ var peliasConfig = require( 'pelias-config' ).generate().api;
if( peliasConfig.accessLog ){ if( peliasConfig.accessLog ){
app.use( require( './middleware/access_log' )( peliasConfig.accessLog ) ); app.use( require( './middleware/access_log' ).createAccessLogger( peliasConfig.accessLog ) );
} }
/** ----------------------- pre-processing-middleware ----------------------- **/ /** ----------------------- pre-processing-middleware ----------------------- **/

7
controller/search.js

@ -2,6 +2,7 @@ var _ = require('lodash');
var service = { search: require('../service/search') }; var service = { search: require('../service/search') };
var logger = require('pelias-logger').get('api:controller:search'); var logger = require('pelias-logger').get('api:controller:search');
var logging = require( '../helper/logging' );
function setup( backend, query ){ function setup( backend, query ){
@ -16,8 +17,12 @@ function setup( backend, query ){
return next(); return next();
} }
var cleanOutput = _.cloneDeep(req.clean);
if (logging.isDNT(req)) {
cleanOutput = logging.removeFields(cleanOutput);
}
// log clean parameters for stats // log clean parameters for stats
logger.info('[req]', 'endpoint=' + req.path, req.clean); logger.info('[req]', 'endpoint=' + req.path, cleanOutput);
// backend command // backend command
var cmd = { var cmd = {

5
helper/geojsonify.js

@ -48,6 +48,10 @@ function lookupSource(src) {
return src.source; return src.source;
} }
function lookupSourceId(src) {
return src.source_id;
}
function lookupLayer(src) { function lookupLayer(src) {
return src.layer; return src.layer;
} }
@ -255,6 +259,7 @@ function addMetaData(src, dst) {
dst.gid = makeGid(src); dst.gid = makeGid(src);
dst.layer = lookupLayer(src); dst.layer = lookupLayer(src);
dst.source = lookupSource(src); dst.source = lookupSource(src);
dst.source_id = lookupSourceId(src);
if (src.hasOwnProperty('bounding_box')) { if (src.hasOwnProperty('bounding_box')) {
dst.bounding_box = src.bounding_box; dst.bounding_box = src.bounding_box;
} }

24
helper/logging.js

@ -0,0 +1,24 @@
var fieldsToRemove = ['text', 'focus.point.lat', 'focus.point.lon',
'boundary.circle.lat', 'boundary.circle.lon', 'point.lat', 'point.lon'];
function isDNT(req) {
if (!req.headers) {
return false;
}
return req.headers.DNT || req.headers.dnt || req.headers.do_not_track;
}
function removeFields(query) {
fieldsToRemove.forEach(function(field) {
if (query[field]) {
query[field] = '[removed]';
}
});
return query;
}
module.exports = {
isDNT: isDNT,
removeFields: removeFields
};

42
middleware/access_log.js

@ -4,11 +4,47 @@
'use strict'; 'use strict';
var url = require( 'url' );
var _ = require( 'lodash' );
var morgan = require( 'morgan' ); var morgan = require( 'morgan' );
var through = require( 'through2' ); var through = require( 'through2' );
var peliasLogger = require( 'pelias-logger' ).get( 'api' ); var peliasLogger = require( 'pelias-logger' ).get( 'api' );
var logging = require( '../helper/logging' );
function customRemoteAddr(req, res) {
if (logging.isDNT(req)) {
return '[IP removed]';
} else {
// from morgan default implementation
return req.ip ||
req._remoteAddress ||
(req.connection && req.connection.remoteAddress) ||
undefined;
}
}
function customURL(req, res) {
var parsedUrl = _.cloneDeep(req._parsedUrl);
parsedUrl.query = _.cloneDeep(req.query);
if (logging.isDNT(req)) {
// strip out sensitive fields in the query object
parsedUrl.query = logging.removeFields(parsedUrl.query);
// search will override the query object when formatting the url
// see https://nodejs.org/api/all.html#all_url_format_urlobj
delete parsedUrl.search;
}
return url.format(parsedUrl);
}
function createAccessLogger( logFormat ){ function createAccessLogger( logFormat ){
morgan.token('remote-addr', customRemoteAddr);
morgan.token('url', customURL);
return morgan( logFormat, { return morgan( logFormat, {
stream: through( function write( ln, _, next ){ stream: through( function write( ln, _, next ){
peliasLogger.info( ln.toString().trim() ); peliasLogger.info( ln.toString().trim() );
@ -17,4 +53,8 @@ function createAccessLogger( logFormat ){
}); });
} }
module.exports = createAccessLogger; module.exports = {
customRemoteAddr: customRemoteAddr,
customURL: customURL,
createAccessLogger: createAccessLogger
};

4
middleware/confidenceScore.js

@ -77,8 +77,6 @@ function computeConfidenceScore(req, mean, stdev, hit) {
hit.confidence /= checkCount; hit.confidence /= checkCount;
hit.confidence = Number((hit.confidence).toFixed(3)); hit.confidence = Number((hit.confidence).toFixed(3));
logger.debug('[confidence]:', hit.confidence, hit.name.default);
return hit; return hit;
} }
@ -102,8 +100,6 @@ function checkForDealBreakers(req, hit) {
if (check.assigned(req.clean.parsed_text.postalcode) && check.assigned(hit.address_parts) && if (check.assigned(req.clean.parsed_text.postalcode) && check.assigned(hit.address_parts) &&
req.clean.parsed_text.postalcode !== hit.address_parts.zip) { req.clean.parsed_text.postalcode !== hit.address_parts.zip) {
logger.debug('[confidence][deal-breaker]: postalcode !== zip (' + req.clean.parsed_text.postalcode +
' !== ' + hit.address_parts.zip + ')');
return true; return true;
} }
} }

10
package.json

@ -30,13 +30,11 @@
"url": "https://github.com/pelias/api/issues" "url": "https://github.com/pelias/api/issues"
}, },
"engines": { "engines": {
"node": ">=0.10.26", "node": ">=0.10.26"
"npm": ">=1.4.3",
"elasticsearch": ">=1.2.1"
}, },
"dependencies": { "dependencies": {
"async": "^1.5.2", "async": "^1.5.2",
"check-types": "^6.0.0", "check-types": "^7.0.0",
"cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve", "cluster2": "git://github.com/missinglink/cluster2.git#node_zero_twelve",
"elasticsearch": "^11.0.0", "elasticsearch": "^11.0.0",
"elasticsearch-exceptions": "0.0.4", "elasticsearch-exceptions": "0.0.4",
@ -54,7 +52,7 @@
"pelias-config": "^1.0.1", "pelias-config": "^1.0.1",
"pelias-logger": "^0.0.8", "pelias-logger": "^0.0.8",
"pelias-model": "^4.0.0", "pelias-model": "^4.0.0",
"pelias-query": "6.3.0", "pelias-query": "7.0.0",
"pelias-suggester-pipeline": "2.0.4", "pelias-suggester-pipeline": "2.0.4",
"pelias-text-analyzer": "^1.0.1", "pelias-text-analyzer": "^1.0.1",
"stats-lite": "1.0.3", "stats-lite": "1.0.3",
@ -67,7 +65,7 @@
"jshint": "^2.5.6", "jshint": "^2.5.6",
"nsp": "^2.2.0", "nsp": "^2.2.0",
"precommit-hook": "^3.0.0", "precommit-hook": "^3.0.0",
"proxyquire": "^1.4.0", "proxyquire": "^1.7.7",
"tap-dot": "1.0.5", "tap-dot": "1.0.5",
"tape": "^4.5.1" "tape": "^4.5.1"
}, },

2
query/search.js

@ -37,7 +37,7 @@ query.score( peliasQuery.view.address('postcode') );
// multi_match is appropriate. // multi_match is appropriate.
query.score( peliasQuery.view.admin('country_a') ); query.score( peliasQuery.view.admin('country_a') );
query.score( peliasQuery.view.admin('region_a') ); query.score( peliasQuery.view.admin('region_a') );
query.score( peliasQuery.view.admin_multi_match(adminFields), 'peliasAdmin' ); query.score( peliasQuery.view.admin_multi_match(adminFields, 'peliasAdmin') );
// non-scoring hard filters // non-scoring hard filters
query.filter( peliasQuery.view.boundary_circle ); query.filter( peliasQuery.view.boundary_circle );

9
test/unit/helper/geojsonify.js

@ -44,6 +44,7 @@ module.exports.tests.search = function(test, common) {
'_id': 'id1', '_id': 'id1',
'_type': 'layer1', '_type': 'layer1',
'source': 'source1', 'source': 'source1',
'source_id': 'source_id_1',
'layer': 'layer1', 'layer': 'layer1',
'center_point': { 'center_point': {
'lat': 51.5337144, 'lat': 51.5337144,
@ -73,6 +74,7 @@ module.exports.tests.search = function(test, common) {
'_id': 'id2', '_id': 'id2',
'_type': 'layer2', '_type': 'layer2',
'source': 'source2', 'source': 'source2',
'source_id': 'source_id_2',
'layer': 'layer2', 'layer': 'layer2',
'name': { 'name': {
'default': 'Blues Cafe' 'default': 'Blues Cafe'
@ -89,12 +91,13 @@ module.exports.tests.search = function(test, common) {
'county': 'Smithfield', 'county': 'Smithfield',
'localadmin': 'test1', 'localadmin': 'test1',
'locality': 'test2', 'locality': 'test2',
'neighbourhood': 'test3', 'neighbourhood': 'test3'
}, },
{ {
'_id': 'node:34633854', '_id': 'node:34633854',
'_type': 'venue', '_type': 'venue',
'source': 'openstreetmap', 'source': 'openstreetmap',
'source_id': 'source_id_3',
'layer': 'venue', 'layer': 'venue',
'name': { 'name': {
'default': 'Empire State Building' 'default': 'Empire State Building'
@ -136,6 +139,7 @@ module.exports.tests.search = function(test, common) {
'gid': 'source1:layer1:id1', 'gid': 'source1:layer1:id1',
'layer': 'layer1', 'layer': 'layer1',
'source': 'source1', 'source': 'source1',
'source_id': 'source_id_1',
'label': '\'Round Midnight Jazz and Blues Bar, test2, England, United Kingdom', 'label': '\'Round Midnight Jazz and Blues Bar, test2, England, United Kingdom',
'name': '\'Round Midnight Jazz and Blues Bar', 'name': '\'Round Midnight Jazz and Blues Bar',
'country_a': 'GBR', 'country_a': 'GBR',
@ -166,6 +170,7 @@ module.exports.tests.search = function(test, common) {
'gid': 'source2:layer2:id2', 'gid': 'source2:layer2:id2',
'layer': 'layer2', 'layer': 'layer2',
'source': 'source2', 'source': 'source2',
'source_id': 'source_id_2',
'label': 'Blues Cafe, test2, England, United Kingdom', 'label': 'Blues Cafe, test2, England, United Kingdom',
'name': 'Blues Cafe', 'name': 'Blues Cafe',
'country_a': 'GBR', 'country_a': 'GBR',
@ -193,6 +198,7 @@ module.exports.tests.search = function(test, common) {
'gid': 'openstreetmap:venue:node:34633854', 'gid': 'openstreetmap:venue:node:34633854',
'layer': 'venue', 'layer': 'venue',
'source': 'openstreetmap', 'source': 'openstreetmap',
'source_id': 'source_id_3',
'label': 'Empire State Building, Manhattan, New York, NY, USA', 'label': 'Empire State Building, Manhattan, New York, NY, USA',
'name': 'Empire State Building', 'name': 'Empire State Building',
'country_a': 'USA', 'country_a': 'USA',
@ -320,6 +326,7 @@ module.exports.tests.search = function(test, common) {
'gid': 'whosonfirst:neighbourhood:85816607', 'gid': 'whosonfirst:neighbourhood:85816607',
'layer': 'neighbourhood', 'layer': 'neighbourhood',
'source': 'whosonfirst', 'source': 'whosonfirst',
'source_id': '85816607',
'name': 'East New York', 'name': 'East New York',
'confidence': 0.888, 'confidence': 0.888,
'country': 'United States', 'country': 'United States',

96
test/unit/helper/logging.js

@ -0,0 +1,96 @@
var logging = require('../../../helper/logging');
module.exports.tests = {};
module.exports.tests.dnt = function(test) {
test('DNT=1 triggers DNT detection', function(t) {
var req = {
headers: {
DNT: '1'
}
};
t.ok(logging.isDNT(req), 'DNT detected');
t.end();
});
test('DNT=0 triggers DNT detection', function(t) {
// because this is common apparently, although the spec says to do the opposite
// see https://en.wikipedia.org/wiki/Do_Not_Track
var req = {
headers: {
DNT: '0'
}
};
t.ok(logging.isDNT(req), 'DNT detected');
t.end();
});
test('do_not_track header triggers DNT detection', function(t) {
// according to @riordan, some people use this too
var req = {
headers: {
do_not_track: '1'
}
};
t.ok(logging.isDNT(req), 'DNT detected');
t.end();
});
test('no DNT or do_not_track header does not trigger DNT detection', function(t) {
var req = {
headers: {
'Accept-Charset': 'utf-8'
}
};
t.notOk(logging.isDNT(req), 'DNT detected');
t.end();
});
};
module.exports.tests.field_removal = function(test) {
test('removes multiple fields that may have sensitive information', function(t) {
var query = {
text: 'possibly sensitive text',
'point.lat': 'possibly sensitive location info'
};
var cleaned_query = logging.removeFields(query);
var expected = {
text: '[removed]',
'point.lat': '[removed]'
};
t.deepEquals(cleaned_query, expected, 'multiple sensitive fields removed');
t.end();
});
test('non-sensitive fields untouched', function(t) {
var query = {
sources: 'wof,gn'
};
var cleaned_query = logging.removeFields(query);
var expected = {
sources: 'wof,gn'
};
t.deepEquals(cleaned_query, expected, 'non-sensitive fields are not touched');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('logging: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

83
test/unit/middleware/access_log.js

@ -0,0 +1,83 @@
var access_log = require('../../../middleware/access_log');
module.exports.tests = {};
module.exports.tests.customRemoteAddress = function(test) {
test('non-DNT request shows IP in logs', function(t) {
var req = {
ip: '8.8.8.8',
query: '/v1/search?....'
};
var result = access_log.customRemoteAddr(req, {});
t.equals(result, '8.8.8.8', 'IP would be sent to logs');
t.end();
});
test('DNT request does not show IP in logs', function(t) {
var req = {
ip: '8.8.8.8',
query: '/v1/search?....',
headers: {
DNT: 1
}
};
var result = access_log.customRemoteAddr(req, {});
t.equals(result, '[IP removed]', 'IP removed from logs');
t.end();
});
};
module.exports.tests.customURL = function(test) {
test('non-DNT request shows full query in logs', function(t) {
var req = {
ip: '8.8.8.8',
query: {
text: 'london'
},
_parsedUrl: {
pathname: '/v1/search',
path: '/v1/search?text=london'
}
};
var result = access_log.customURL(req, {});
t.equals(result, '/v1/search?text=london', 'query not removed from logs');
t.end();
});
test('DNT request removes sensitive fields from logs', function(t) {
var req = {
ip: '8.8.8.8',
query: {
text: 'london'
},
_parsedUrl: {
pathname: '/v1/search',
path: '/v1/search?text=london'
},
headers: {
DNT: 1
}
};
var result = access_log.customURL(req, {});
t.equals(result, '/v1/search?text=%5Bremoved%5D', 'query has sensitive fields removed');
t.end();
});
};
module.exports.all = function (tape, common) {
function test(name, testFunction) {
return tape('[middleware] access_log: ' + name, testFunction);
}
for( var testCase in module.exports.tests ){
module.exports.tests[testCase](test, common);
}
};

2
test/unit/run.js

@ -19,8 +19,10 @@ var tests = [
require('./helper/labelGenerator_GBR'), require('./helper/labelGenerator_GBR'),
require('./helper/labelGenerator_USA'), require('./helper/labelGenerator_USA'),
require('./helper/labelSchema'), require('./helper/labelSchema'),
require('./helper/logging'),
require('./helper/type_mapping'), require('./helper/type_mapping'),
require('./helper/sizeCalculator'), require('./helper/sizeCalculator'),
require('./middleware/access_log'),
require('./middleware/confidenceScore'), require('./middleware/confidenceScore'),
require('./middleware/confidenceScoreReverse'), require('./middleware/confidenceScoreReverse'),
require('./middleware/distance'), require('./middleware/distance'),

Loading…
Cancel
Save