Browse Source

drastically revised comments

pull/912/head
Stephen Hess 8 years ago
parent
commit
f176b86657
  1. 96
      query/address_search_using_ids.js

96
query/address_search_using_ids.js

@ -20,65 +20,80 @@ addressUsingIdsQuery.filter( peliasQuery.view.boundary_rect );
addressUsingIdsQuery.filter( peliasQuery.view.sources ); addressUsingIdsQuery.filter( peliasQuery.view.sources );
// -------------------------------- // --------------------------------
// This query is a departure from traditional Pelias queries where textual
// Red Lion, PA -- parsed as locality/state, localadmin/state, and neighbourhood/state // names of admin areas were looked up. This query uses the ids returned by
// Chelsea -- parsed as neighbourhood, localadmin, and locality // placeholder for lookups which dramatically reduces the amount of information
// Manhattan -- parsed as borough, locality, and localadmin // that ES has to store and allows us to have placeholder handle altnames on
// Luxembourg -- parsed as country, locality, and region // behalf of Pelias.
//
// if any placeholder results are at neighbourhood, borough, locality, or localadmin layers, filter by those ids at those layers // For the happy path, an input like '30 West 26th Street, Manhattan' would result
// fallback to county // in:
// if any placeholder results are at county or macrocounty layers, filter by those ids at those layers
// fallback to region
// if any placeholder results are at region or macroregion layers, filter by those ids at those layers
// fallback to dependency/country
// if any placeholder results are at dependency or country layers, filter by those ids at those layers
// address in Red Lion, PA -- find results at layer=address
// neighbourhood_id in [85844063, 85844067]
// locality_id in [101717221]
// localadmin_id in [404487867]
// search all of the above
// address in Chelsea
// neighbourhood_id in [85786511, 85810589, 85769021, 85890029, 85810579, 85810591, 85810575, 85772883, 420514219]
// locality_id in [85950359, 85914491, 101932747, 85951865, 101715289, 85943049, 101733697, 101722101, 101738587]
// localadmin_id in [404476575, 404508239, 404474971, 404527169, 404494675, 404503811, 404519887, 404488679, 404538119]
// address in Manhattan
// neighbourhood_id in [] // neighbourhood_id in []
// borough_id in [421205771] // borough_id in [421205771]
// locality_id in [85945171, 85940551, 85972655] // locality_id in [85945171, 85940551, 85972655]
// localadmin_id in [404502889, 404499147, 404502891, 85972655] // localadmin_id in [404502889, 404499147, 404502891, 85972655]
// search all of the above //
// Where the ids are for all the various Manhattans. Each of those could
// address in Luxembourg // conceivably be the Manhattan that the user was referring to so so all must be
// country_id in [85633275] // queried for at the same time.
// region_id in [85681727, 85673875] //
// locality_id in [101751765] // A counter example for this is '1 West Market Street, York, PA' where York, PA
// search locality first, then region perhaps // can be interpreted as a locality OR county. From experience, when there's
// ambiguity between locality and county for an input, the user is, with complete
// metaphysical certitude, referring to the city. If they were referring to the
// if there are locality/localadmin layers, return ['locality', 'localadmin'] // county, they would have entered 'York County, PA'. The point is that it's
// if there are region/macroregion layers, return ['region', 'macroregion'] // insufficient to just query for all ids because, in this case, '1 West Market Street'
// in other cities in York County, PA would be returned and would be both jarring
// to the user and almost certainly leads to incorrect results. For example,
// the following could be returned (all are towns in York County, PA):
// - 1 West Market Street, Dallastown, PA
// - 1 West Market Street, Fawn Grove, PA
// - 1 West Market Street, Shrewsbury, PA
// etc.
//
// To avoid this calamitous response, this query takes the approach of
// "granularity bands". That is, if there are any ids in the first set of any
// of these granularities:
// - neighbourhood
// - borough
// - locality
// - localadmin
// - region
// - macroregion
// - dependency
// - country
//
// then query for all ids in only those layers. Falling back, if there are
// no ids in those layers, query for the county/macrocounty layers.
//
// This methodology ensures that no happened-to-match-on-county results are returned.
//
// The decision was made to include all other layers in one to solve the issue
// where a country and city share a name, such as Mexico, which could be
// interpreted as a country AND city (in Missouri). The data itself will sort
// out which is correct. That is, it's unlikely that "11 Rock Springs Dr" exists
// in Mexico the country due to naming conventions and would be filtered out
// (though it could, but that's good because it's legitimate)
const granularity_bands = [ const granularity_bands = [
['neighbourhood', 'borough', 'locality', 'localadmin', 'region', 'macroregion', 'dependency', 'country'], ['neighbourhood', 'borough', 'locality', 'localadmin', 'region', 'macroregion', 'dependency', 'country'],
['county', 'macrocounty'] ['county', 'macrocounty']
]; ];
// returns IFF there are *any* results in the granularity band
function anyResultsAtGranularityBand(results, band) { function anyResultsAtGranularityBand(results, band) {
return results.some(result => _.includes(band, result.layer)); return results.some(result => _.includes(band, result.layer));
} }
// returns the ids of results at the requested layer
function getIdsAtLayer(results, layer) { function getIdsAtLayer(results, layer) {
return results.filter(result => result.layer === layer).map(_.property('source_id')); return results.filter(result => result.layer === layer).map(_.property('source_id'));
} }
/** /**
map request variables to query variables for all inputs map request variables to query variables for all inputs
provided by this HTTP request. provided by this HTTP request. This function operates on res.data which is the
Document-ified placeholder repsonse.
**/ **/
function generateQuery( clean, res ){ function generateQuery( clean, res ){
const vs = new peliasQuery.Vars( defaults ); const vs = new peliasQuery.Vars( defaults );
@ -103,8 +118,11 @@ function generateQuery( clean, res ){
} }
vs.var( 'input:street', clean.parsed_text.street ); vs.var( 'input:street', clean.parsed_text.street );
// find the first granularity band for which there are results
const granularity_band = granularity_bands.find(band => anyResultsAtGranularityBand(results, band)); const granularity_band = granularity_bands.find(band => anyResultsAtGranularityBand(results, band));
// if there's a granularity band, accumulate the ids from each layer in the band
// into an object mapping layer->ids of those layers
if (granularity_band) { if (granularity_band) {
const layers_to_ids = granularity_band.reduce((acc, layer) => { const layers_to_ids = granularity_band.reduce((acc, layer) => {
acc[layer] = getIdsAtLayer(res.data, layer); acc[layer] = getIdsAtLayer(res.data, layer);

Loading…
Cancel
Save