mirror of https://github.com/pelias/api.git
228 lines
5.6 KiB
228 lines
5.6 KiB
'use strict'; |
|
|
|
/** |
|
* |
|
* Basic confidence score should be computed and returned for each item in the results. |
|
* The score should range between 0-1, and take into consideration as many factors as possible. |
|
* |
|
* Some factors to consider: |
|
* |
|
* - number of results from ES |
|
* - fallback status (aka layer match between expected and actual) |
|
*/ |
|
|
|
var check = require('check-types'); |
|
var logger = require('pelias-logger').get('api'); |
|
const _ = require('lodash'); |
|
|
|
function setup() { |
|
return computeScores; |
|
} |
|
|
|
function computeScores(req, res, next) { |
|
// do nothing if no result data set or if the query is not of the fallback variety |
|
// later add disambiguation to this list |
|
if (check.undefined(req.clean) || check.undefined(res) || |
|
check.undefined(res.data) || check.undefined(res.meta) || |
|
res.meta.query_type !== 'fallback') { |
|
return next(); |
|
} |
|
|
|
// loop through data items and determine confidence scores |
|
res.data = res.data.map(computeConfidenceScore.bind(null, req)); |
|
|
|
next(); |
|
} |
|
|
|
/** |
|
* Check all types of things to determine how confident we are that this result |
|
* is correct. |
|
* |
|
* @param {object} req |
|
* @param {object} hit |
|
* @returns {object} |
|
*/ |
|
function computeConfidenceScore(req, hit) { |
|
|
|
// if parsed text doesn't exist, which it never should, just assign a low confidence and move on |
|
if (!req.clean.hasOwnProperty('parsed_text')) { |
|
hit.confidence = 0.1; |
|
hit.match_type = 'unknown'; |
|
return hit; |
|
} |
|
|
|
// start with a confidence level of 1 because we trust ES queries to be accurate |
|
hit.confidence = 1.0; |
|
|
|
// in the case of fallback there might be deductions |
|
hit.confidence *= checkFallbackLevel(req, hit); |
|
|
|
// truncate the precision |
|
hit.confidence = Number((hit.confidence).toFixed(3)); |
|
|
|
return hit; |
|
} |
|
|
|
function checkFallbackLevel(req, hit) { |
|
if (checkFallbackOccurred(req, hit)) { |
|
hit.match_type = 'fallback'; |
|
|
|
// if we know a fallback occurred, deduct points based on layer granularity |
|
switch (hit.layer) { |
|
case 'venue': |
|
logger.warn('Fallback scenarios should not result in address or venue records!', req.clean.parsed_text); |
|
return 0.8; |
|
case 'address': |
|
return 0.8; |
|
case 'street': |
|
return 0.8; |
|
case 'postalcode': |
|
return 0.8; |
|
case 'localadmin': |
|
case 'locality': |
|
case 'borough': |
|
case 'neighbourhood': |
|
return 0.6; |
|
case 'macrocounty': |
|
case 'county': |
|
return 0.4; |
|
case 'region': |
|
return 0.3; |
|
case 'country': |
|
case 'dependency': |
|
case 'macroregion': |
|
return 0.1; |
|
default: |
|
return 0.1; |
|
} |
|
|
|
} |
|
|
|
hit.match_type = 'exact'; |
|
return 1.0; |
|
} |
|
|
|
/** |
|
* In parsed_text we might find any of the following properties: |
|
* query |
|
* number |
|
* street |
|
* neighbourhood |
|
* borough |
|
* city |
|
* county |
|
* state |
|
* postalcode |
|
* country |
|
* |
|
* They do not map 1:1 to our layers so the following somewhat complicated |
|
* mapping structure is needed to set clear rules for comparing what was requested |
|
* by the query and what has been received as a result to determine if a fallback occurred. |
|
*/ |
|
const fallbackRules = [ |
|
{ |
|
name: 'venue', |
|
notSet: [], |
|
set: ['query'], |
|
expectedLayers: ['venue'] |
|
}, |
|
{ |
|
name: 'address', |
|
notSet: ['query'], |
|
set: ['number', 'street'], |
|
expectedLayers: ['address'] |
|
}, |
|
{ |
|
name: 'street', |
|
notSet: ['query', 'number'], |
|
set: ['street'], |
|
expectedLayers: ['street'] |
|
}, |
|
{ |
|
name: 'postalcode', |
|
notSet: ['query', 'number', 'street'], |
|
set: ['postalcode'], |
|
expectedLayers: ['postalcode'] |
|
}, |
|
{ |
|
name: 'neighbourhood', |
|
notSet: ['query', 'number', 'street', 'postalcode'], |
|
set: ['neighbourhood'], |
|
expectedLayers: ['neighbourhood'] |
|
}, |
|
{ |
|
name: 'borough', |
|
notSet: ['query', 'number', 'street', 'postalcode', 'neighbourhood'], |
|
set: ['borough'], |
|
expectedLayers: ['borough'] |
|
}, |
|
{ |
|
name: 'city', |
|
notSet: ['query', 'number', 'street', 'postalcode', 'neighbourhood', 'borough'], |
|
set: ['city'], |
|
expectedLayers: ['borough', 'locality', 'localadmin'] |
|
}, |
|
{ |
|
name: 'county', |
|
notSet: ['query', 'number', 'street', 'postalcode', 'neighbourhood', 'borough', 'city'], |
|
set: ['county'], |
|
expectedLayers: ['county'] |
|
}, |
|
{ |
|
name: 'state', |
|
notSet: ['query', 'number', 'street', 'postalcode', 'neighbourhood', 'borough', 'city', 'county'], |
|
set: ['state'], |
|
expectedLayers: ['region'] |
|
}, |
|
{ |
|
name: 'country', |
|
notSet: ['query', 'number', 'street', 'postalcode', 'neighbourhood', 'borough', 'city', 'county', 'state'], |
|
set: ['country'], |
|
expectedLayers: ['country'] |
|
} |
|
]; |
|
|
|
function checkFallbackOccurred(req, hit) { |
|
|
|
// short-circuit after finding the first fallback scenario |
|
const res = _.find(fallbackRules, (rule) => { |
|
|
|
return ( |
|
// verify that more granular properties are not set |
|
notSet(req.clean.parsed_text, rule.notSet) && |
|
// verify that expected property is set |
|
areSet(req.clean.parsed_text, rule.set) && |
|
// verify that expected layer(s) was not returned |
|
rule.expectedLayers.indexOf(hit.layer) === -1 |
|
); |
|
}); |
|
|
|
return !!res; |
|
} |
|
|
|
function notSet(parsed_text, notSet) { |
|
if (notSet.length === 0) { |
|
return true; |
|
} |
|
|
|
return ( |
|
_.every(notSet, (prop) => { |
|
return !_.get(parsed_text, prop, false); |
|
}) |
|
); |
|
} |
|
|
|
function areSet(parsed_text, areSet) { |
|
if (areSet.length === 0) { |
|
logger.warn('Expected properties in fallbackRules should never be empty'); |
|
return true; |
|
} |
|
|
|
return ( |
|
_.every(areSet, (prop) => { |
|
return _.get(parsed_text, prop, false); |
|
}) |
|
); |
|
} |
|
|
|
module.exports = setup;
|
|
|