diff --git a/query/address_search_using_ids.js b/query/address_search_using_ids.js index 413ab3ce..d7e2d1fd 100644 --- a/query/address_search_using_ids.js +++ b/query/address_search_using_ids.js @@ -20,65 +20,80 @@ addressUsingIdsQuery.filter( peliasQuery.view.boundary_rect ); addressUsingIdsQuery.filter( peliasQuery.view.sources ); // -------------------------------- - -// Red Lion, PA -- parsed as locality/state, localadmin/state, and neighbourhood/state -// Chelsea -- parsed as neighbourhood, localadmin, and locality -// Manhattan -- parsed as borough, locality, and localadmin -// Luxembourg -- parsed as country, locality, and region - -// if any placeholder results are at neighbourhood, borough, locality, or localadmin layers, filter by those ids at those layers -// fallback to county -// if any placeholder results are at county or macrocounty layers, filter by those ids at those layers -// fallback to region -// if any placeholder results are at region or macroregion layers, filter by those ids at those layers -// fallback to dependency/country -// if any placeholder results are at dependency or country layers, filter by those ids at those layers - - -// address in Red Lion, PA -- find results at layer=address -// neighbourhood_id in [85844063, 85844067] -// locality_id in [101717221] -// localadmin_id in [404487867] -// search all of the above - -// address in Chelsea -// neighbourhood_id in [85786511, 85810589, 85769021, 85890029, 85810579, 85810591, 85810575, 85772883, 420514219] -// locality_id in [85950359, 85914491, 101932747, 85951865, 101715289, 85943049, 101733697, 101722101, 101738587] -// localadmin_id in [404476575, 404508239, 404474971, 404527169, 404494675, 404503811, 404519887, 404488679, 404538119] - -// address in Manhattan +// This query is a departure from traditional Pelias queries where textual +// names of admin areas were looked up. This query uses the ids returned by +// placeholder for lookups which dramatically reduces the amount of information +// that ES has to store and allows us to have placeholder handle altnames on +// behalf of Pelias. +// +// For the happy path, an input like '30 West 26th Street, Manhattan' would result +// in: // neighbourhood_id in [] // borough_id in [421205771] // locality_id in [85945171, 85940551, 85972655] // localadmin_id in [404502889, 404499147, 404502891, 85972655] -// search all of the above - -// address in Luxembourg -// country_id in [85633275] -// region_id in [85681727, 85673875] -// locality_id in [101751765] -// search locality first, then region perhaps - - -// if there are locality/localadmin layers, return ['locality', 'localadmin'] -// if there are region/macroregion layers, return ['region', 'macroregion'] +// +// Where the ids are for all the various Manhattans. Each of those could +// conceivably be the Manhattan that the user was referring to so so all must be +// queried for at the same time. +// +// A counter example for this is '1 West Market Street, York, PA' where York, PA +// can be interpreted as a locality OR county. From experience, when there's +// ambiguity between locality and county for an input, the user is, with complete +// metaphysical certitude, referring to the city. If they were referring to the +// county, they would have entered 'York County, PA'. The point is that it's +// insufficient to just query for all ids because, in this case, '1 West Market Street' +// in other cities in York County, PA would be returned and would be both jarring +// to the user and almost certainly leads to incorrect results. For example, +// the following could be returned (all are towns in York County, PA): +// - 1 West Market Street, Dallastown, PA +// - 1 West Market Street, Fawn Grove, PA +// - 1 West Market Street, Shrewsbury, PA +// etc. +// +// To avoid this calamitous response, this query takes the approach of +// "granularity bands". That is, if there are any ids in the first set of any +// of these granularities: +// - neighbourhood +// - borough +// - locality +// - localadmin +// - region +// - macroregion +// - dependency +// - country +// +// then query for all ids in only those layers. Falling back, if there are +// no ids in those layers, query for the county/macrocounty layers. +// +// This methodology ensures that no happened-to-match-on-county results are returned. +// +// The decision was made to include all other layers in one to solve the issue +// where a country and city share a name, such as Mexico, which could be +// interpreted as a country AND city (in Missouri). The data itself will sort +// out which is correct. That is, it's unlikely that "11 Rock Springs Dr" exists +// in Mexico the country due to naming conventions and would be filtered out +// (though it could, but that's good because it's legitimate) const granularity_bands = [ ['neighbourhood', 'borough', 'locality', 'localadmin', 'region', 'macroregion', 'dependency', 'country'], ['county', 'macrocounty'] ]; +// returns IFF there are *any* results in the granularity band function anyResultsAtGranularityBand(results, band) { return results.some(result => _.includes(band, result.layer)); } +// returns the ids of results at the requested layer function getIdsAtLayer(results, layer) { return results.filter(result => result.layer === layer).map(_.property('source_id')); } /** map request variables to query variables for all inputs - provided by this HTTP request. + provided by this HTTP request. This function operates on res.data which is the + Document-ified placeholder repsonse. **/ function generateQuery( clean, res ){ const vs = new peliasQuery.Vars( defaults ); @@ -103,8 +118,11 @@ function generateQuery( clean, res ){ } vs.var( 'input:street', clean.parsed_text.street ); + // find the first granularity band for which there are results const granularity_band = granularity_bands.find(band => anyResultsAtGranularityBand(results, band)); + // if there's a granularity band, accumulate the ids from each layer in the band + // into an object mapping layer->ids of those layers if (granularity_band) { const layers_to_ids = granularity_band.reduce((acc, layer) => { acc[layer] = getIdsAtLayer(res.data, layer);