diff --git a/controller/index.js b/controller/index.js
deleted file mode 100644
index d900793f..00000000
--- a/controller/index.js
+++ /dev/null
@@ -1,31 +0,0 @@
-
-var pkg = require('../package');
-var markdown = require('markdown').markdown;
-var fs = require('fs');
-
-function setup(){
-
- var styleString = '';
- var text = '# Pelias API\n';
- text += '### Version: ['+ pkg.version+ '](https://github.com/pelias/api/releases)\n';
- text += fs.readFileSync( './DOCS.md', 'utf8');
- var indexHtml = styleString + markdown.toHTML(text);
-
- function controller( req, res, next ) {
- if (req.accepts('html')) {
- res.send(indexHtml);
- return;
- }
- // default behaviour
- res.json({
- name: pkg.name,
- version: {
- number: pkg.version
- }
- });
- }
-
- return controller;
-}
-
-module.exports = setup;
diff --git a/controller/markdownToHtml.js b/controller/markdownToHtml.js
new file mode 100644
index 00000000..656afe01
--- /dev/null
+++ b/controller/markdownToHtml.js
@@ -0,0 +1,28 @@
+
+var markdown = require('markdown').markdown;
+var fs = require('fs');
+
+function setup(peliasConfig, markdownFile){
+
+ var styleString = '';
+ var text = '# Pelias API\n';
+ text += '### Version: [' + peliasConfig.version + '](https://github.com/pelias/api/releases)\n';
+ text += fs.readFileSync( markdownFile, 'utf8');
+ var html = styleString + markdown.toHTML(text);
+
+ function controller( req, res ) {
+ if (req.accepts('html')) {
+ res.send(html);
+ return;
+ }
+ // default behaviour
+ res.json({
+ markdown: text,
+ html: html
+ });
+ }
+
+ return controller;
+}
+
+module.exports = setup;
diff --git a/controller/search.js b/controller/search.js
index d70c1b5c..c4c54754 100644
--- a/controller/search.js
+++ b/controller/search.js
@@ -15,19 +15,20 @@ function setup( backend, query ){
body: query( req.clean )
};
- if (req.clean.type !== undefined) {
+ // ?
+ if( req.clean.hasOwnProperty('type') ){
cmd.type = req.clean.type;
- delete req.clean.type; // remove type from clean to avoid clutter
}
// query backend
- service.search( backend, cmd, function( err, docs ){
+ service.search( backend, cmd, function( err, docs, meta ){
// error handler
if( err ){ return next( err ); }
req.results = {
- data: docs
+ data: docs,
+ meta: meta
};
next();
diff --git a/helper/geojsonify.js b/helper/geojsonify.js
index a56b86eb..82c4cf88 100644
--- a/helper/geojsonify.js
+++ b/helper/geojsonify.js
@@ -18,7 +18,8 @@ var DETAILS_PROPS = [
'county',
'localadmin',
'locality',
- 'neighbourhood'
+ 'neighbourhood',
+ 'confidence'
];
diff --git a/helper/types.js b/helper/types.js
index a87a65b8..7455476e 100644
--- a/helper/types.js
+++ b/helper/types.js
@@ -5,7 +5,7 @@ var valid_types = require( '../query/types' );
*/
var intersection = function intersection(set1, set2) {
return set2.filter(function(value) {
- return set1.indexOf(value) !== -1;
+ return set1.indexOf(value) !== -1;
});
};
@@ -14,7 +14,9 @@ module.exports = function calculate_types(clean_types) {
return undefined;
}
-
+ /* the layers and source parameters are cumulative:
+ * perform a set insersection of their specified types
+ */
if (clean_types.from_layers || clean_types.from_source) {
var types = valid_types;
@@ -29,7 +31,11 @@ module.exports = function calculate_types(clean_types) {
return types;
}
+ /*
+ * Type restrictions requested by the address parser should only be used
+ * if both the source and layers parameters are empty, so do this last
+ */
if (clean_types.from_address_parser) {
return clean_types.from_address_parser;
}
-};
+};
\ No newline at end of file
diff --git a/middleware/confidenceScore.js b/middleware/confidenceScore.js
new file mode 100644
index 00000000..6c55bc27
--- /dev/null
+++ b/middleware/confidenceScore.js
@@ -0,0 +1,255 @@
+/**
+ *
+ *Basic confidence score should be computed and returned for each item in the results.
+ * The score should range between 0-1, and take into consideration as many factors as possible.
+ *
+ * Some factors to consider:
+ *
+ * - number of results from ES
+ * - score of item within the range of highest-lowest scores from ES (within the returned set)
+ * - linguistic match of query
+ * - detection (or specification) of query type. i.e. an address shouldn't match an admin address.
+ */
+
+var stats = require('stats-lite');
+var logger = require('pelias-logger').get('api');
+
+var RELATIVE_SCORES = true;
+
+function setup(peliasConfig) {
+ RELATIVE_SCORES = peliasConfig.hasOwnProperty('relativeScores') ? peliasConfig.relativeScores : true;
+ return computeScores;
+}
+
+function computeScores(req, res, next) {
+ // do nothing if no result data set
+ if (!req.results || !req.results.data || !req.results.meta) {
+ return next();
+ }
+
+ // compute standard deviation and mean from all scores
+ var scores = req.results.meta.scores;
+ var stdev = computeStandardDeviation(scores);
+ var mean = stats.mean(scores);
+
+ // loop through data items and determine confidence scores
+ req.results.data = req.results.data.map(computeConfidenceScore.bind(null, req, mean, stdev));
+
+ next();
+}
+
+/**
+ * Check all types of things to determine how confident we are that this result
+ * is correct. Score is based on overall score distribution in the result set
+ * as well as how closely the result matches the input parameters.
+ *
+ * @param {object} req
+ * @param {number} mean
+ * @param {number} stdev
+ * @param {object} hit
+ * @returns {object}
+ */
+function computeConfidenceScore(req, mean, stdev, hit) {
+ var dealBreakers = checkForDealBreakers(req, hit);
+ if (dealBreakers) {
+ hit.confidence = 0.5;
+ return hit;
+ }
+
+ var checkCount = 3;
+ hit.confidence = 0;
+
+ if (RELATIVE_SCORES) {
+ checkCount += 2;
+ hit.confidence += checkDistanceFromMean(hit._score, mean, stdev);
+ hit.confidence += computeZScore(hit._score, mean, stdev);
+ }
+ hit.confidence += checkName(req.clean.input, req.clean.parsed_input, hit);
+ hit.confidence += checkQueryType(req.clean.parsed_input, hit);
+ hit.confidence += checkAddress(req.clean.parsed_input, hit);
+
+ // TODO: look at categories and location
+
+ hit.confidence /= checkCount;
+
+ logger.debug('[confidence]:', hit.confidence, hit.name.default);
+
+ return hit;
+}
+
+function checkForDealBreakers(req, hit) {
+ if (!req.clean.parsed_input) {
+ return false;
+ }
+
+ if (req.clean.parsed_input.state && req.clean.parsed_input.state !== hit.admin1_abbr) {
+ logger.debug('[confidence][deal-breaker]: state !== admin1_abbr');
+ return true;
+ }
+
+ if (req.clean.parsed_input.postalcode && req.clean.parsed_input.postalcode !== hit.zip) {
+ logger.debug('[confidence][deal-breaker]: postalcode !== zip');
+ return true;
+ }
+}
+
+/**
+ * Check how statistically significant the score of this result is
+ * given mean and standard deviation
+ *
+ * @param {number} score
+ * @param {number} mean
+ * @param {number} stdev
+ * @returns {number}
+ */
+function checkDistanceFromMean(score, mean, stdev) {
+ return (score - mean) > stdev ? 1 : 0;
+}
+
+/**
+ * Compare input string or name component of parsed_input against
+ * default name in result
+ *
+ * @param {string} input
+ * @param {object|undefined} parsed_input
+ * @param {object} hit
+ * @returns {number}
+ */
+function checkName(input, parsed_input, hit) {
+ // parsed_input name should take precedence if available since it's the cleaner name property
+ if (parsed_input && parsed_input.name && hit.name.default.toLowerCase() === parsed_input.name.toLowerCase()) {
+ return 1;
+ }
+
+ // if no parsed_input check the input value as provided against result's default name
+ if (hit.name.default.toLowerCase() === input.toLowerCase()) {
+ return 1;
+ }
+
+ // if no matches detected, don't judge too harshly since it was a longshot anyway
+ return 0.7;
+}
+
+/**
+ * Input being set indicates the query was for an address
+ * check if house number was specified and found in result
+ *
+ * @param {object|undefined} input
+ * @param {object} hit
+ * @returns {number}
+ */
+function checkQueryType(input, hit) {
+ if (!!input.number && (!hit.address || (hit.address && !hit.address.number))) {
+ return 0;
+ }
+ return 1;
+}
+
+/**
+ * Determine the quality of the property match
+ *
+ * @param {string|number|undefined|null} inputProp
+ * @param {string|number|undefined|null} hitProp
+ * @param {boolean} expectEnriched
+ * @returns {number}
+ */
+function propMatch(inputProp, hitProp, expectEnriched) {
+
+ // both missing, but expect to have enriched value in result => BAD
+ if (!inputProp && !hitProp && expectEnriched) { return 0; }
+
+ // both missing, and no enrichment expected => GOOD
+ if (!inputProp && !hitProp) { return 1; }
+
+ // input has it, result doesn't => BAD
+ if (inputProp && !hitProp) { return 0; }
+
+ // input missing, result has it, and enrichment is expected => GOOD
+ if (!inputProp && hitProp && expectEnriched) { return 1; }
+
+ // input missing, result has it, enrichment not desired => 50/50
+ if (!inputProp && hitProp) { return 0.5; }
+
+ // both present, values match => GREAT
+ if (inputProp && hitProp && inputProp.toString().toLowerCase() === hitProp.toString().toLowerCase()) { return 1; }
+
+ // ¯\_(ツ)_/¯
+ return 0.7;
+}
+
+/**
+ * Check various parts of the parsed input address
+ * against the results
+ *
+ * @param {object} input
+ * @param {string|number} [input.number]
+ * @param {string} [input.street]
+ * @param {string} [input.postalcode]
+ * @param {string} [input.state]
+ * @param {string} [input.country]
+ * @param {object} hit
+ * @param {object} [hit.address]
+ * @param {string|number} [hit.address.number]
+ * @param {string} [hit.address.street]
+ * @param {string|number} [hit.zip]
+ * @param {string} [hit.admin1_abbr]
+ * @param {string} [hit.alpha3]
+ * @returns {number}
+ */
+function checkAddress(input, hit) {
+ var checkCount = 5;
+ var res = 0;
+
+ if (input && input.number && input.street) {
+ res += propMatch(input.number, (hit.address ? hit.address.number : null), false);
+ res += propMatch(input.street, (hit.address ? hit.address.street : null), false);
+ res += propMatch(input.postalcode, (hit.address ? hit.address.zip: null), true);
+ res += propMatch(input.state, hit.admin1_abbr, true);
+ res += propMatch(input.country, hit.alpha3, true);
+
+ res /= checkCount;
+ }
+ else {
+ res = 1;
+ }
+
+ return res;
+}
+
+/**
+ * z-scores have an effective range of -3.00 to +3.00.
+ * An average z-score is ZERO.
+ * A negative z-score indicates that the item/element is below
+ * average and a positive z-score means that the item/element
+ * in above average. When teachers say they are going to "curve"
+ * the test, they do this by computing z-scores for the students' test scores.
+ *
+ * @param {number} score
+ * @param {number} mean
+ * @param {number} stdev
+ * @returns {number}
+ */
+function computeZScore(score, mean, stdev) {
+ if (stdev < 0.01) {
+ return 0;
+ }
+ // because the effective range of z-scores is -3.00 to +3.00
+ // add 10 to ensure a positive value, and then divide by 10+3+3
+ // to further normalize to %-like result
+ return (((score - mean) / (stdev)) + 10) / 16;
+}
+
+/**
+ * Computes standard deviation given an array of values
+ *
+ * @param {Array} scores
+ * @returns {number}
+ */
+function computeStandardDeviation(scores) {
+ var stdev = stats.stdev(scores);
+ // if stdev is low, just consider it 0
+ return (stdev < 0.01) ? 0 : stdev;
+}
+
+
+module.exports = setup;
diff --git a/middleware/geocodeJSON.js b/middleware/geocodeJSON.js
index da6cfdf6..d2f58b46 100644
--- a/middleware/geocodeJSON.js
+++ b/middleware/geocodeJSON.js
@@ -25,16 +25,10 @@ function convertToGeocodeJSON(peliasConfig, req, next) {
// the GeocodeJSON spec that is implemented by this instance.
req.results.geojson.geocoding.version = '0.1';
- // OPTIONAL. Default: null. The licence of the data. In case of multiple sources,
- // and then multiple licences, can be an object with one key by source.
- // Can be a freeform text property describing the licensing details.
- // Can be a URI on the server, which outlines licensing details.
- req.results.geojson.geocoding.license = peliasConfig.host + '/license';
-
// OPTIONAL. Default: null. The attribution of the data. In case of multiple sources,
// and then multiple attributions, can be an object with one key by source.
// Can be a URI on the server, which outlines attribution details.
- req.results.geojson.geocoding.attribution = peliasConfig.host + '/attribution';
+ req.results.geojson.geocoding.attribution = peliasConfig.host + 'attribution';
// OPTIONAL. Default: null. The query that has been issued to trigger the
// search.
diff --git a/package.json b/package.json
index 5c90c84e..50dc376b 100644
--- a/package.json
+++ b/package.json
@@ -52,6 +52,7 @@
"pelias-query": "^1.1.0",
"pelias-schema": "1.0.0",
"pelias-suggester-pipeline": "2.0.2",
+ "stats-lite": "^1.0.3",
"through2": "0.6.5"
},
"devDependencies": {
diff --git a/DOCS.md b/public/apiDoc.md
similarity index 100%
rename from DOCS.md
rename to public/apiDoc.md
diff --git a/public/attribution.md b/public/attribution.md
new file mode 100644
index 00000000..db9c392e
--- /dev/null
+++ b/public/attribution.md
@@ -0,0 +1,7 @@
+## Attribution
+* Geocoding by [Pelias](https://mapzen.com/pelias) from [Mapzen](https://mapzen.com)
+* Data from
+ * [OpenStreetMap](http://www.openstreetmap.org/copyright) © OpenStreetMap contributors under [ODbL](http://opendatacommons.org/licenses/odbl/)
+ * [Quattroshapes](https://github.com/foursquare/quattroshapes/blob/master/LICENSE.md) under [CC-BY-2.0](https://creativecommons.org/licenses/by/2.0/)
+ * [GeoNames](http://www.geonames.org/) under [CC-BY-3.0](https://creativecommons.org/licenses/by/2.0/)
+ * and other sources
diff --git a/routes/v1.js b/routes/v1.js
index d5a1c183..24ecccf2 100644
--- a/routes/v1.js
+++ b/routes/v1.js
@@ -1,3 +1,4 @@
+var express = require('express');
var Router = require('express').Router;
var reverseQuery = require('../query/reverse');
@@ -16,7 +17,7 @@ var middleware = {
/** ----------------------- controllers ----------------------- **/
var controllers = {
- index: require('../controller/index'),
+ mdToHTML: require('../controller/markdownToHtml'),
place: require('../controller/place'),
search: require('../controller/search')
};
@@ -24,6 +25,7 @@ var controllers = {
/** ----------------------- controllers ----------------------- **/
var postProc = {
+ confidenceScores: require('../middleware/confidenceScore'),
renamePlacenames: require('../middleware/renamePlacenames'),
geocodeJSON: require('../middleware/geocodeJSON'),
sendJSON: require('../middleware/sendJSON')
@@ -41,12 +43,16 @@ function addRoutes(app, peliasConfig) {
var routers = {
index: createRouter([
- controllers.index()
+ controllers.mdToHTML(peliasConfig, './public/apiDoc.md')
+ ]),
+ attribution: createRouter([
+ controllers.mdToHTML(peliasConfig, './public/attribution.md')
]),
search: createRouter([
sanitisers.search.middleware,
middleware.types,
controllers.search(),
+ postProc.confidenceScores(peliasConfig),
postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig),
postProc.sendJSON
@@ -54,6 +60,7 @@ function addRoutes(app, peliasConfig) {
reverse: createRouter([
sanitisers.reverse.middleware,
controllers.search(undefined, reverseQuery),
+ // TODO: add confidence scores
postProc.renamePlacenames(),
postProc.geocodeJSON(peliasConfig),
postProc.sendJSON
@@ -72,6 +79,7 @@ function addRoutes(app, peliasConfig) {
// api root
app.get ( base, routers.index );
+ app.get ( base + 'attribution', routers.attribution );
app.get ( base + 'place', routers.place );
app.get ( base + 'autocomplete', routers.search );
app.get ( base + 'search', routers.search );
diff --git a/service/search.js b/service/search.js
index 15b5cd14..1e77f69f 100644
--- a/service/search.js
+++ b/service/search.js
@@ -23,20 +23,28 @@ function service( backend, cmd, cb ){
// map returned documents
var docs = [];
+ var meta = {
+ scores: []
+ };
+
if( data && data.hits && data.hits.total && Array.isArray(data.hits.hits)){
+
docs = data.hits.hits.map( function( hit ){
+ meta.scores.push(hit._score);
+
// map metadata in to _source so we
// can serve it up to the consumer
hit._source._id = hit._id;
hit._source._type = hit._type;
+ hit._source._score = hit._score;
return hit._source;
});
}
// fire callback
- return cb( null, docs );
+ return cb( null, docs, meta );
});
}
diff --git a/test/unit/controller/index.js b/test/unit/controller/index.js
index f59dacd4..cffe1030 100644
--- a/test/unit/controller/index.js
+++ b/test/unit/controller/index.js
@@ -1,19 +1,19 @@
-var setup = require('../../../controller/index');
+var setup = require('../../../controller/markdownToHtml');
module.exports.tests = {};
module.exports.tests.interface = function(test, common) {
test('valid interface', function(t) {
t.equal(typeof setup, 'function', 'setup is a function');
- t.equal(typeof setup(), 'function', 'setup returns a controller');
+ t.equal(typeof setup({}, './public/apiDoc.md'), 'function', 'setup returns a controller');
t.end();
});
};
module.exports.tests.info_json = function(test, common) {
test('returns server info in json', function(t) {
- var controller = setup();
+ var controller = setup({}, './public/attribution.md');
var req = {
accepts: function (format) {
t.equal(format, 'html', 'check for Accepts:html');
@@ -22,9 +22,8 @@ module.exports.tests.info_json = function(test, common) {
};
var res = { json: function( json ){
t.equal(typeof json, 'object', 'returns json');
- t.equal(typeof json.name, 'string', 'name');
- t.equal(typeof json.version, 'object', 'version');
- t.equal(typeof json.version.number, 'string', 'version number');
+ t.assert(json.hasOwnProperty('markdown'), 'return object contains markdown property');
+ t.assert(json.hasOwnProperty('html'), 'return object contains html property');
t.end();
}};
controller( req, res );
@@ -33,21 +32,24 @@ module.exports.tests.info_json = function(test, common) {
module.exports.tests.info_html = function(test, common) {
test('returns server info in html', function(t) {
+ var filePath = './foo.md';
var style = '';
var mockText = 'this text should show up in the html content';
var fsMock = {
readFileSync: function (path, format) {
- t.equal(path, './DOCS.md', 'open DOCS.md file');
+ t.equal(path, filePath, 'open specified file');
t.equal(format, 'utf8', 'file format');
return mockText;
}
};
var proxyquire = require('proxyquire');
- var setup = proxyquire('../../../controller/index', { 'fs': fsMock });
+ var setup = proxyquire('../../../controller/markdownToHtml', { 'fs': fsMock });
- var controller = setup();
+ var config = { version: '1.1.1' };
+
+ var controller = setup(config, filePath);
var req = {
accepts: function () {
return true;
diff --git a/test/unit/mock/backend.js b/test/unit/mock/backend.js
index fe617fe7..201ab7b5 100644
--- a/test/unit/mock/backend.js
+++ b/test/unit/mock/backend.js
@@ -10,6 +10,7 @@ responses['client/search/ok/1'] = function( cmd, cb ){
return cb( undefined, searchEnvelope([{
_id: 'myid1',
_type: 'mytype1',
+ _score: 10,
_source: {
value: 1,
center_point: { lat: 100.1, lon: -50.5 },
@@ -19,6 +20,7 @@ responses['client/search/ok/1'] = function( cmd, cb ){
}, {
_id: 'myid2',
_type: 'mytype2',
+ _score: 20,
_source: {
value: 2,
center_point: { lat: 100.2, lon: -51.5 },
@@ -35,6 +37,7 @@ responses['client/mget/ok/1'] = function( cmd, cb ){
return cb( undefined, mgetEnvelope([{
_id: 'myid1',
_type: 'mytype1',
+ _score: 10,
found: true,
_source: {
value: 1,
@@ -45,6 +48,7 @@ responses['client/mget/ok/1'] = function( cmd, cb ){
}, {
_id: 'myid2',
_type: 'mytype2',
+ _score: 20,
found: true,
_source: {
value: 2,
diff --git a/test/unit/service/search.js b/test/unit/service/search.js
index e1f97541..a7212775 100644
--- a/test/unit/service/search.js
+++ b/test/unit/service/search.js
@@ -19,6 +19,7 @@ module.exports.tests.functional_success = function(test, common) {
var expected = [
{
_id: 'myid1', _type: 'mytype1',
+ _score: 10,
value: 1,
center_point: { lat: 100.1, lon: -50.5 },
name: { default: 'test name1' },
@@ -26,6 +27,7 @@ module.exports.tests.functional_success = function(test, common) {
},
{
_id: 'myid2', _type: 'mytype2',
+ _score: 20,
value: 2,
center_point: { lat: 100.2, lon: -51.5 },
name: { default: 'test name2' },