Browse Source

merge conflicts from pulling: source_parameter

pull/229/head
Peter Johnson 9 years ago
parent
commit
05bdf784e0
  1. 31
      controller/index.js
  2. 28
      controller/markdownToHtml.js
  3. 9
      controller/search.js
  4. 3
      helper/geojsonify.js
  5. 8
      helper/types.js
  6. 255
      middleware/confidenceScore.js
  7. 8
      middleware/geocodeJSON.js
  8. 1
      package.json
  9. 0
      public/apiDoc.md
  10. 7
      public/attribution.md
  11. 12
      routes/v1.js
  12. 10
      service/search.js
  13. 20
      test/unit/controller/index.js
  14. 4
      test/unit/mock/backend.js
  15. 2
      test/unit/service/search.js

31
controller/index.js

@ -1,31 +0,0 @@
var pkg = require('../package');
var markdown = require('markdown').markdown;
var fs = require('fs');
function setup(){
var styleString = '<style>html{font-family:monospace}</style>';
var text = '# Pelias API\n';
text += '### Version: ['+ pkg.version+ '](https://github.com/pelias/api/releases)\n';
text += fs.readFileSync( './DOCS.md', 'utf8');
var indexHtml = styleString + markdown.toHTML(text);
function controller( req, res, next ) {
if (req.accepts('html')) {
res.send(indexHtml);
return;
}
// default behaviour
res.json({
name: pkg.name,
version: {
number: pkg.version
}
});
}
return controller;
}
module.exports = setup;

28
controller/markdownToHtml.js

@ -0,0 +1,28 @@
var markdown = require('markdown').markdown;
var fs = require('fs');
function setup(peliasConfig, markdownFile){
var styleString = '<style>html{font-family:monospace}</style>';
var text = '# Pelias API\n';
text += '### Version: [' + peliasConfig.version + '](https://github.com/pelias/api/releases)\n';
text += fs.readFileSync( markdownFile, 'utf8');
var html = styleString + markdown.toHTML(text);
function controller( req, res ) {
if (req.accepts('html')) {
res.send(html);
return;
}
// default behaviour
res.json({
markdown: text,
html: html
});
}
return controller;
}
module.exports = setup;

9
controller/search.js

@ -15,19 +15,20 @@ function setup( backend, query ){
body: query( req.clean ) body: query( req.clean )
}; };
if (req.clean.type !== undefined) { // ?
if( req.clean.hasOwnProperty('type') ){
cmd.type = req.clean.type; cmd.type = req.clean.type;
delete req.clean.type; // remove type from clean to avoid clutter
} }
// query backend // query backend
service.search( backend, cmd, function( err, docs ){ service.search( backend, cmd, function( err, docs, meta ){
// error handler // error handler
if( err ){ return next( err ); } if( err ){ return next( err ); }
req.results = { req.results = {
data: docs data: docs,
meta: meta
}; };
next(); next();

3
helper/geojsonify.js

@ -18,7 +18,8 @@ var DETAILS_PROPS = [
'county', 'county',
'localadmin', 'localadmin',
'locality', 'locality',
'neighbourhood' 'neighbourhood',
'confidence'
]; ];

8
helper/types.js

@ -14,7 +14,9 @@ module.exports = function calculate_types(clean_types) {
return undefined; return undefined;
} }
/* the layers and source parameters are cumulative:
* perform a set insersection of their specified types
*/
if (clean_types.from_layers || clean_types.from_source) { if (clean_types.from_layers || clean_types.from_source) {
var types = valid_types; var types = valid_types;
@ -29,6 +31,10 @@ module.exports = function calculate_types(clean_types) {
return types; return types;
} }
/*
* Type restrictions requested by the address parser should only be used
* if both the source and layers parameters are empty, so do this last
*/
if (clean_types.from_address_parser) { if (clean_types.from_address_parser) {
return clean_types.from_address_parser; return clean_types.from_address_parser;
} }

255
middleware/confidenceScore.js

@ -0,0 +1,255 @@
/**
*
*Basic confidence score should be computed and returned for each item in the results.
* The score should range between 0-1, and take into consideration as many factors as possible.
*
* Some factors to consider:
*
* - number of results from ES
* - score of item within the range of highest-lowest scores from ES (within the returned set)
* - linguistic match of query
* - detection (or specification) of query type. i.e. an address shouldn't match an admin address.
*/
var stats = require('stats-lite');
var logger = require('pelias-logger').get('api');
var RELATIVE_SCORES = true;
function setup(peliasConfig) {
RELATIVE_SCORES = peliasConfig.hasOwnProperty('relativeScores') ? peliasConfig.relativeScores : true;
return computeScores;
}
function computeScores(req, res, next) {
// do nothing if no result data set
if (!req.results || !req.results.data || !req.results.meta) {
return next();
}
// compute standard deviation and mean from all scores
var scores = req.results.meta.scores;
var stdev = computeStandardDeviation(scores);
var mean = stats.mean(scores);
// loop through data items and determine confidence scores
req.results.data = req.results.data.map(computeConfidenceScore.bind(null, req, mean, stdev));
next();
}
/**
* Check all types of things to determine how confident we are that this result
* is correct. Score is based on overall score distribution in the result set
* as well as how closely the result matches the input parameters.
*
* @param {object} req
* @param {number} mean
* @param {number} stdev
* @param {object} hit
* @returns {object}
*/
function computeConfidenceScore(req, mean, stdev, hit) {
var dealBreakers = checkForDealBreakers(req, hit);
if (dealBreakers) {
hit.confidence = 0.5;
return hit;
}
var checkCount = 3;
hit.confidence = 0;
if (RELATIVE_SCORES) {
checkCount += 2;
hit.confidence += checkDistanceFromMean(hit._score, mean, stdev);
hit.confidence += computeZScore(hit._score, mean, stdev);
}
hit.confidence += checkName(req.clean.input, req.clean.parsed_input, hit);
hit.confidence += checkQueryType(req.clean.parsed_input, hit);
hit.confidence += checkAddress(req.clean.parsed_input, hit);
// TODO: look at categories and location
hit.confidence /= checkCount;
logger.debug('[confidence]:', hit.confidence, hit.name.default);
return hit;
}
function checkForDealBreakers(req, hit) {
if (!req.clean.parsed_input) {
return false;
}
if (req.clean.parsed_input.state && req.clean.parsed_input.state !== hit.admin1_abbr) {
logger.debug('[confidence][deal-breaker]: state !== admin1_abbr');
return true;
}
if (req.clean.parsed_input.postalcode && req.clean.parsed_input.postalcode !== hit.zip) {
logger.debug('[confidence][deal-breaker]: postalcode !== zip');
return true;
}
}
/**
* Check how statistically significant the score of this result is
* given mean and standard deviation
*
* @param {number} score
* @param {number} mean
* @param {number} stdev
* @returns {number}
*/
function checkDistanceFromMean(score, mean, stdev) {
return (score - mean) > stdev ? 1 : 0;
}
/**
* Compare input string or name component of parsed_input against
* default name in result
*
* @param {string} input
* @param {object|undefined} parsed_input
* @param {object} hit
* @returns {number}
*/
function checkName(input, parsed_input, hit) {
// parsed_input name should take precedence if available since it's the cleaner name property
if (parsed_input && parsed_input.name && hit.name.default.toLowerCase() === parsed_input.name.toLowerCase()) {
return 1;
}
// if no parsed_input check the input value as provided against result's default name
if (hit.name.default.toLowerCase() === input.toLowerCase()) {
return 1;
}
// if no matches detected, don't judge too harshly since it was a longshot anyway
return 0.7;
}
/**
* Input being set indicates the query was for an address
* check if house number was specified and found in result
*
* @param {object|undefined} input
* @param {object} hit
* @returns {number}
*/
function checkQueryType(input, hit) {
if (!!input.number && (!hit.address || (hit.address && !hit.address.number))) {
return 0;
}
return 1;
}
/**
* Determine the quality of the property match
*
* @param {string|number|undefined|null} inputProp
* @param {string|number|undefined|null} hitProp
* @param {boolean} expectEnriched
* @returns {number}
*/
function propMatch(inputProp, hitProp, expectEnriched) {
// both missing, but expect to have enriched value in result => BAD
if (!inputProp && !hitProp && expectEnriched) { return 0; }
// both missing, and no enrichment expected => GOOD
if (!inputProp && !hitProp) { return 1; }
// input has it, result doesn't => BAD
if (inputProp && !hitProp) { return 0; }
// input missing, result has it, and enrichment is expected => GOOD
if (!inputProp && hitProp && expectEnriched) { return 1; }
// input missing, result has it, enrichment not desired => 50/50
if (!inputProp && hitProp) { return 0.5; }
// both present, values match => GREAT
if (inputProp && hitProp && inputProp.toString().toLowerCase() === hitProp.toString().toLowerCase()) { return 1; }
// ¯\_(ツ)_/¯
return 0.7;
}
/**
* Check various parts of the parsed input address
* against the results
*
* @param {object} input
* @param {string|number} [input.number]
* @param {string} [input.street]
* @param {string} [input.postalcode]
* @param {string} [input.state]
* @param {string} [input.country]
* @param {object} hit
* @param {object} [hit.address]
* @param {string|number} [hit.address.number]
* @param {string} [hit.address.street]
* @param {string|number} [hit.zip]
* @param {string} [hit.admin1_abbr]
* @param {string} [hit.alpha3]
* @returns {number}
*/
function checkAddress(input, hit) {
var checkCount = 5;
var res = 0;
if (input && input.number && input.street) {
res += propMatch(input.number, (hit.address ? hit.address.number : null), false);
res += propMatch(input.street, (hit.address ? hit.address.street : null), false);
res += propMatch(input.postalcode, (hit.address ? hit.address.zip: null), true);
res += propMatch(input.state, hit.admin1_abbr, true);
res += propMatch(input.country, hit.alpha3, true);
res /= checkCount;
}
else {
res = 1;
}
return res;
}
/**
* z-scores have an effective range of -3.00 to +3.00.
* An average z-score is ZERO.
* A negative z-score indicates that the item/element is below
* average and a positive z-score means that the item/element
* in above average. When teachers say they are going to "curve"
* the test, they do this by computing z-scores for the students' test scores.
*
* @param {number} score
* @param {number} mean
* @param {number} stdev
* @returns {number}
*/
function computeZScore(score, mean, stdev) {
if (stdev < 0.01) {
return 0;
}
// because the effective range of z-scores is -3.00 to +3.00
// add 10 to ensure a positive value, and then divide by 10+3+3
// to further normalize to %-like result
return (((score - mean) / (stdev)) + 10) / 16;
}
/**
* Computes standard deviation given an array of values
*
* @param {Array} scores
* @returns {number}
*/
function computeStandardDeviation(scores) {
var stdev = stats.stdev(scores);
// if stdev is low, just consider it 0
return (stdev < 0.01) ? 0 : stdev;
}
module.exports = setup;

8
middleware/geocodeJSON.js

@ -25,16 +25,10 @@ function convertToGeocodeJSON(peliasConfig, req, next) {
// the GeocodeJSON spec that is implemented by this instance. // the GeocodeJSON spec that is implemented by this instance.
req.results.geojson.geocoding.version = '0.1'; req.results.geojson.geocoding.version = '0.1';
// OPTIONAL. Default: null. The licence of the data. In case of multiple sources,
// and then multiple licences, can be an object with one key by source.
// Can be a freeform text property describing the licensing details.
// Can be a URI on the server, which outlines licensing details.
req.results.geojson.geocoding.license = peliasConfig.host + '/license';
// OPTIONAL. Default: null. The attribution of the data. In case of multiple sources, // OPTIONAL. Default: null. The attribution of the data. In case of multiple sources,
// and then multiple attributions, can be an object with one key by source. // and then multiple attributions, can be an object with one key by source.
// Can be a URI on the server, which outlines attribution details. // Can be a URI on the server, which outlines attribution details.
req.results.geojson.geocoding.attribution = peliasConfig.host + '/attribution'; req.results.geojson.geocoding.attribution = peliasConfig.host + 'attribution';
// OPTIONAL. Default: null. The query that has been issued to trigger the // OPTIONAL. Default: null. The query that has been issued to trigger the
// search. // search.

1
package.json

@ -52,6 +52,7 @@
"pelias-query": "^1.1.0", "pelias-query": "^1.1.0",
"pelias-schema": "1.0.0", "pelias-schema": "1.0.0",
"pelias-suggester-pipeline": "2.0.2", "pelias-suggester-pipeline": "2.0.2",
"stats-lite": "^1.0.3",
"through2": "0.6.5" "through2": "0.6.5"
}, },
"devDependencies": { "devDependencies": {

0
DOCS.md → public/apiDoc.md

7
public/attribution.md

@ -0,0 +1,7 @@
## Attribution
* Geocoding by [Pelias](https://mapzen.com/pelias) from [Mapzen](https://mapzen.com)
* Data from
* [OpenStreetMap](http://www.openstreetmap.org/copyright) © OpenStreetMap contributors under [ODbL](http://opendatacommons.org/licenses/odbl/)
* [Quattroshapes](https://github.com/foursquare/quattroshapes/blob/master/LICENSE.md) under [CC-BY-2.0](https://creativecommons.org/licenses/by/2.0/)
* [GeoNames](http://www.geonames.org/) under [CC-BY-3.0](https://creativecommons.org/licenses/by/2.0/)
* and other sources

12
routes/v1.js

@ -1,3 +1,4 @@
var express = require('express');
var Router = require('express').Router; var Router = require('express').Router;
var reverseQuery = require('../query/reverse'); var reverseQuery = require('../query/reverse');
@ -16,7 +17,7 @@ var middleware = {
/** ----------------------- controllers ----------------------- **/ /** ----------------------- controllers ----------------------- **/
var controllers = { var controllers = {
index: require('../controller/index'), mdToHTML: require('../controller/markdownToHtml'),
place: require('../controller/place'), place: require('../controller/place'),
search: require('../controller/search') search: require('../controller/search')
}; };
@ -24,6 +25,7 @@ var controllers = {
/** ----------------------- controllers ----------------------- **/ /** ----------------------- controllers ----------------------- **/
var postProc = { var postProc = {
confidenceScores: require('../middleware/confidenceScore'),
renamePlacenames: require('../middleware/renamePlacenames'), renamePlacenames: require('../middleware/renamePlacenames'),
geocodeJSON: require('../middleware/geocodeJSON'), geocodeJSON: require('../middleware/geocodeJSON'),
sendJSON: require('../middleware/sendJSON') sendJSON: require('../middleware/sendJSON')
@ -41,12 +43,16 @@ function addRoutes(app, peliasConfig) {
var routers = { var routers = {
index: createRouter([ index: createRouter([
controllers.index() controllers.mdToHTML(peliasConfig, './public/apiDoc.md')
]),
attribution: createRouter([
controllers.mdToHTML(peliasConfig, './public/attribution.md')
]), ]),
search: createRouter([ search: createRouter([
sanitisers.search.middleware, sanitisers.search.middleware,
middleware.types, middleware.types,
controllers.search(), controllers.search(),
postProc.confidenceScores(peliasConfig),
postProc.renamePlacenames(), postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig), postProc.geocodeJSON(peliasConfig),
postProc.sendJSON postProc.sendJSON
@ -54,6 +60,7 @@ function addRoutes(app, peliasConfig) {
reverse: createRouter([ reverse: createRouter([
sanitisers.reverse.middleware, sanitisers.reverse.middleware,
controllers.search(undefined, reverseQuery), controllers.search(undefined, reverseQuery),
// TODO: add confidence scores
postProc.renamePlacenames(), postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig), postProc.geocodeJSON(peliasConfig),
postProc.sendJSON postProc.sendJSON
@ -72,6 +79,7 @@ function addRoutes(app, peliasConfig) {
// api root // api root
app.get ( base, routers.index ); app.get ( base, routers.index );
app.get ( base + 'attribution', routers.attribution );
app.get ( base + 'place', routers.place ); app.get ( base + 'place', routers.place );
app.get ( base + 'autocomplete', routers.search ); app.get ( base + 'autocomplete', routers.search );
app.get ( base + 'search', routers.search ); app.get ( base + 'search', routers.search );

10
service/search.js

@ -23,20 +23,28 @@ function service( backend, cmd, cb ){
// map returned documents // map returned documents
var docs = []; var docs = [];
var meta = {
scores: []
};
if( data && data.hits && data.hits.total && Array.isArray(data.hits.hits)){ if( data && data.hits && data.hits.total && Array.isArray(data.hits.hits)){
docs = data.hits.hits.map( function( hit ){ docs = data.hits.hits.map( function( hit ){
meta.scores.push(hit._score);
// map metadata in to _source so we // map metadata in to _source so we
// can serve it up to the consumer // can serve it up to the consumer
hit._source._id = hit._id; hit._source._id = hit._id;
hit._source._type = hit._type; hit._source._type = hit._type;
hit._source._score = hit._score;
return hit._source; return hit._source;
}); });
} }
// fire callback // fire callback
return cb( null, docs ); return cb( null, docs, meta );
}); });
} }

20
test/unit/controller/index.js

@ -1,19 +1,19 @@
var setup = require('../../../controller/index'); var setup = require('../../../controller/markdownToHtml');
module.exports.tests = {}; module.exports.tests = {};
module.exports.tests.interface = function(test, common) { module.exports.tests.interface = function(test, common) {
test('valid interface', function(t) { test('valid interface', function(t) {
t.equal(typeof setup, 'function', 'setup is a function'); t.equal(typeof setup, 'function', 'setup is a function');
t.equal(typeof setup(), 'function', 'setup returns a controller'); t.equal(typeof setup({}, './public/apiDoc.md'), 'function', 'setup returns a controller');
t.end(); t.end();
}); });
}; };
module.exports.tests.info_json = function(test, common) { module.exports.tests.info_json = function(test, common) {
test('returns server info in json', function(t) { test('returns server info in json', function(t) {
var controller = setup(); var controller = setup({}, './public/attribution.md');
var req = { var req = {
accepts: function (format) { accepts: function (format) {
t.equal(format, 'html', 'check for Accepts:html'); t.equal(format, 'html', 'check for Accepts:html');
@ -22,9 +22,8 @@ module.exports.tests.info_json = function(test, common) {
}; };
var res = { json: function( json ){ var res = { json: function( json ){
t.equal(typeof json, 'object', 'returns json'); t.equal(typeof json, 'object', 'returns json');
t.equal(typeof json.name, 'string', 'name'); t.assert(json.hasOwnProperty('markdown'), 'return object contains markdown property');
t.equal(typeof json.version, 'object', 'version'); t.assert(json.hasOwnProperty('html'), 'return object contains html property');
t.equal(typeof json.version.number, 'string', 'version number');
t.end(); t.end();
}}; }};
controller( req, res ); controller( req, res );
@ -33,21 +32,24 @@ module.exports.tests.info_json = function(test, common) {
module.exports.tests.info_html = function(test, common) { module.exports.tests.info_html = function(test, common) {
test('returns server info in html', function(t) { test('returns server info in html', function(t) {
var filePath = './foo.md';
var style = '<style>html{font-family:monospace}</style>'; var style = '<style>html{font-family:monospace}</style>';
var mockText = 'this text should show up in the html content'; var mockText = 'this text should show up in the html content';
var fsMock = { var fsMock = {
readFileSync: function (path, format) { readFileSync: function (path, format) {
t.equal(path, './DOCS.md', 'open DOCS.md file'); t.equal(path, filePath, 'open specified file');
t.equal(format, 'utf8', 'file format'); t.equal(format, 'utf8', 'file format');
return mockText; return mockText;
} }
}; };
var proxyquire = require('proxyquire'); var proxyquire = require('proxyquire');
var setup = proxyquire('../../../controller/index', { 'fs': fsMock }); var setup = proxyquire('../../../controller/markdownToHtml', { 'fs': fsMock });
var controller = setup(); var config = { version: '1.1.1' };
var controller = setup(config, filePath);
var req = { var req = {
accepts: function () { accepts: function () {
return true; return true;

4
test/unit/mock/backend.js

@ -10,6 +10,7 @@ responses['client/search/ok/1'] = function( cmd, cb ){
return cb( undefined, searchEnvelope([{ return cb( undefined, searchEnvelope([{
_id: 'myid1', _id: 'myid1',
_type: 'mytype1', _type: 'mytype1',
_score: 10,
_source: { _source: {
value: 1, value: 1,
center_point: { lat: 100.1, lon: -50.5 }, center_point: { lat: 100.1, lon: -50.5 },
@ -19,6 +20,7 @@ responses['client/search/ok/1'] = function( cmd, cb ){
}, { }, {
_id: 'myid2', _id: 'myid2',
_type: 'mytype2', _type: 'mytype2',
_score: 20,
_source: { _source: {
value: 2, value: 2,
center_point: { lat: 100.2, lon: -51.5 }, center_point: { lat: 100.2, lon: -51.5 },
@ -35,6 +37,7 @@ responses['client/mget/ok/1'] = function( cmd, cb ){
return cb( undefined, mgetEnvelope([{ return cb( undefined, mgetEnvelope([{
_id: 'myid1', _id: 'myid1',
_type: 'mytype1', _type: 'mytype1',
_score: 10,
found: true, found: true,
_source: { _source: {
value: 1, value: 1,
@ -45,6 +48,7 @@ responses['client/mget/ok/1'] = function( cmd, cb ){
}, { }, {
_id: 'myid2', _id: 'myid2',
_type: 'mytype2', _type: 'mytype2',
_score: 20,
found: true, found: true,
_source: { _source: {
value: 2, value: 2,

2
test/unit/service/search.js

@ -19,6 +19,7 @@ module.exports.tests.functional_success = function(test, common) {
var expected = [ var expected = [
{ {
_id: 'myid1', _type: 'mytype1', _id: 'myid1', _type: 'mytype1',
_score: 10,
value: 1, value: 1,
center_point: { lat: 100.1, lon: -50.5 }, center_point: { lat: 100.1, lon: -50.5 },
name: { default: 'test name1' }, name: { default: 'test name1' },
@ -26,6 +27,7 @@ module.exports.tests.functional_success = function(test, common) {
}, },
{ {
_id: 'myid2', _type: 'mytype2', _id: 'myid2', _type: 'mytype2',
_score: 20,
value: 2, value: 2,
center_point: { lat: 100.2, lon: -51.5 }, center_point: { lat: 100.2, lon: -51.5 },
name: { default: 'test name2' }, name: { default: 'test name2' },

Loading…
Cancel
Save