api/query/address_search_using_ids.js

const peliasQuery = require('pelias-query');
const defaults = require('./search_defaults');
const logger = require('pelias-logger').get('api');
const _ = require('lodash');
const check = require('check-types');

//------------------------------
// general-purpose search query
//------------------------------
const addressUsingIdsQuery = new peliasQuery.layout.AddressesUsingIdsQuery();

// scoring boost
addressUsingIdsQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) );
// --------------------------------

// non-scoring hard filters
addressUsingIdsQuery.filter( peliasQuery.view.boundary_country );
addressUsingIdsQuery.filter( peliasQuery.view.boundary_circle );
addressUsingIdsQuery.filter( peliasQuery.view.boundary_rect );
addressUsingIdsQuery.filter( peliasQuery.view.sources );
// --------------------------------

// This query is a departure from traditional Pelias queries where textual
// names of admin areas were looked up.  This query uses the ids returned by
// placeholder for lookups which dramatically reduces the amount of information
// that ES has to store and allows us to have placeholder handle altnames on
// behalf of Pelias.
//
// For the happy path, an input like '30 West 26th Street, Manhattan' would result
// in:
// neighbourhood_id in []
// borough_id in [421205771]
// locality_id in [85945171, 85940551, 85972655]
// localadmin_id in [404502889, 404499147, 404502891, 85972655]
//
// Where the ids are for all the various Manhattans.  Each of those could
// conceivably be the Manhattan that the user was referring to so so all must be
// queried for at the same time.
//
// A counter example for this is '1 West Market Street, York, PA' where York, PA
// can be interpreted as a locality OR county.  From experience, when there's
// ambiguity between locality and county for an input, the user is, with complete
// metaphysical certitude, referring to the city.  If they were referring to the
// county, they would have entered 'York County, PA'.  The point is that it's
// insufficient to just query for all ids because, in this case, '1 West Market Street'
// in other cities in York County, PA would be returned and would be both jarring
// to the user and almost certainly leads to incorrect results.  For example,
// the following could be returned (all are towns in York County, PA):
// - 1 West Market Street, Dallastown, PA
// - 1 West Market Street, Fawn Grove, PA
// - 1 West Market Street, Shrewsbury, PA
// etc.
//
// To avoid this calamitous response, this query takes the approach of
// "granularity bands".  That is, if there are any ids in the first set of any
// of these granularities:
// - neighbourhood
// - borough
// - locality
// - localadmin
// - region
// - macroregion
// - dependency
// - country
//
// then query for all ids in only those layers.  Falling back, if there are
// no ids in those layers, query for the county/macrocounty layers.
//
// This methodology ensures that no happened-to-match-on-county results are returned.
//
// The decision was made to include all other layers in one to solve the issue
// where a country and city share a name, such as Mexico, which could be
// interpreted as a country AND city (in Missouri).  The data itself will sort
// out which is correct.  That is, it's unlikely that "11 Rock Springs Dr" exists
// in Mexico the country due to naming conventions and would be filtered out
// (though it could, but that's good because it's legitimate)

const granularity_bands = [
  ['neighbourhood', 'borough', 'locality', 'localadmin', 'region', 'macroregion', 'dependency', 'country'],
  ['county', 'macrocounty']
];

// returns IFF there are *any* results in the granularity band
function anyResultsAtGranularityBand(results, band) {
  return results.some(result => _.includes(band, result.layer));
}

// returns the ids of results at the requested layer
function getIdsAtLayer(results, layer) {
  return results.filter(result => result.layer === layer).map(_.property('source_id'));
}

/**
  map request variables to query variables for all inputs
  provided by this HTTP request.  This function operates on res.data which is the
  Document-ified placeholder repsonse.
**/
function generateQuery( clean, res ){
  const vs = new peliasQuery.Vars( defaults );
  const results = _.defaultTo(res.data, []);

  // sources
  if( !_.isEmpty(clean.sources) ) {
    vs.var( 'sources', clean.sources);
  }

  // size
  if( clean.querySize ) {
    vs.var( 'size', clean.querySize );
  }

  if( ! _.isEmpty(clean.parsed_text.number) ){
    vs.var( 'input:housenumber', clean.parsed_text.number );
  }
  vs.var( 'input:street', clean.parsed_text.street );

  // find the first granularity band for which there are results
  const granularity_band = granularity_bands.find(band => anyResultsAtGranularityBand(results, band));

  // if there's a granularity band, accumulate the ids from each layer in the band
  // into an object mapping layer->ids of those layers
  if (granularity_band) {
    const layers_to_ids = granularity_band.reduce((acc, layer) => {
      acc[layer] = getIdsAtLayer(res.data, layer);
      return acc;
    }, {});

    // use an object here instead of calling `set` since that flattens out an
    // object into key/value pairs and makes identifying layers harder in query module
    vs.var('input:layers', layers_to_ids);

  }

  // focus point
  if( check.number(clean['focus.point.lat']) &&
      check.number(clean['focus.point.lon']) ){
    vs.set({
      'focus:point:lat': clean['focus.point.lat'],
      'focus:point:lon': clean['focus.point.lon']
    });
  }

  // boundary rect
  if( check.number(clean['boundary.rect.min_lat']) &&
      check.number(clean['boundary.rect.max_lat']) &&
      check.number(clean['boundary.rect.min_lon']) &&
      check.number(clean['boundary.rect.max_lon']) ){
    vs.set({
      'boundary:rect:top': clean['boundary.rect.max_lat'],
      'boundary:rect:right': clean['boundary.rect.max_lon'],
      'boundary:rect:bottom': clean['boundary.rect.min_lat'],
      'boundary:rect:left': clean['boundary.rect.min_lon']
    });
  }

  // boundary circle
  if( check.number(clean['boundary.circle.lat']) &&
      check.number(clean['boundary.circle.lon']) ){
    vs.set({
      'boundary:circle:lat': clean['boundary.circle.lat'],
      'boundary:circle:lon': clean['boundary.circle.lon']
    });

    if( check.number(clean['boundary.circle.radius']) ){
      vs.set({
        'boundary:circle:radius': Math.round( clean['boundary.circle.radius'] ) + 'km'
      });
    }
  }

  // boundary country
  if( check.string(clean['boundary.country']) ){
    vs.set({
      'boundary:country': clean['boundary.country']
    });
  }

  return {
    type: 'address_search_using_ids',
    body: addressUsingIdsQuery.render(vs)
  };

}

module.exports = generateQuery;
added query for address search with ids 7 years ago			`const peliasQuery = require('pelias-query');`
			`const defaults = require('./search_defaults');`
			`const logger = require('pelias-logger').get('api');`
			`const _ = require('lodash');`
			`const check = require('check-types');`

			`//------------------------------`
			`// general-purpose search query`
			`//------------------------------`
			`const addressUsingIdsQuery = new peliasQuery.layout.AddressesUsingIdsQuery();`

			`// scoring boost`
added support for focus-only function 7 years ago			`addressUsingIdsQuery.score( peliasQuery.view.focus_only_function( peliasQuery.view.phrase ) );`
added query for address search with ids 7 years ago			`// --------------------------------`

			`// non-scoring hard filters`
			`addressUsingIdsQuery.filter( peliasQuery.view.boundary_country );`
			`addressUsingIdsQuery.filter( peliasQuery.view.boundary_circle );`
			`addressUsingIdsQuery.filter( peliasQuery.view.boundary_rect );`
			`addressUsingIdsQuery.filter( peliasQuery.view.sources );`
			`// --------------------------------`

drastically revised comments 7 years ago			`// This query is a departure from traditional Pelias queries where textual`
			`// names of admin areas were looked up. This query uses the ids returned by`
			`// placeholder for lookups which dramatically reduces the amount of information`
			`// that ES has to store and allows us to have placeholder handle altnames on`
			`// behalf of Pelias.`
			`//`
			`// For the happy path, an input like '30 West 26th Street, Manhattan' would result`
			`// in:`
added query for address search with ids 7 years ago			`// neighbourhood_id in []`
			`// borough_id in [421205771]`
			`// locality_id in [85945171, 85940551, 85972655]`
			`// localadmin_id in [404502889, 404499147, 404502891, 85972655]`
drastically revised comments 7 years ago			`//`
			`// Where the ids are for all the various Manhattans. Each of those could`
			`// conceivably be the Manhattan that the user was referring to so so all must be`
			`// queried for at the same time.`
			`//`
			`// A counter example for this is '1 West Market Street, York, PA' where York, PA`
			`// can be interpreted as a locality OR county. From experience, when there's`
			`// ambiguity between locality and county for an input, the user is, with complete`
			`// metaphysical certitude, referring to the city. If they were referring to the`
			`// county, they would have entered 'York County, PA'. The point is that it's`
			`// insufficient to just query for all ids because, in this case, '1 West Market Street'`
			`// in other cities in York County, PA would be returned and would be both jarring`
			`// to the user and almost certainly leads to incorrect results. For example,`
			`// the following could be returned (all are towns in York County, PA):`
			`// - 1 West Market Street, Dallastown, PA`
			`// - 1 West Market Street, Fawn Grove, PA`
			`// - 1 West Market Street, Shrewsbury, PA`
			`// etc.`
			`//`
			`// To avoid this calamitous response, this query takes the approach of`
			`// "granularity bands". That is, if there are any ids in the first set of any`
			`// of these granularities:`
			`// - neighbourhood`
			`// - borough`
			`// - locality`
			`// - localadmin`
			`// - region`
			`// - macroregion`
			`// - dependency`
			`// - country`
			`//`
			`// then query for all ids in only those layers. Falling back, if there are`
			`// no ids in those layers, query for the county/macrocounty layers.`
			`//`
			`// This methodology ensures that no happened-to-match-on-county results are returned.`
			`//`
			`// The decision was made to include all other layers in one to solve the issue`
			`// where a country and city share a name, such as Mexico, which could be`
			`// interpreted as a country AND city (in Missouri). The data itself will sort`
			`// out which is correct. That is, it's unlikely that "11 Rock Springs Dr" exists`
			`// in Mexico the country due to naming conventions and would be filtered out`
			`// (though it could, but that's good because it's legitimate)`
added query for address search with ids 7 years ago
			`const granularity_bands = [`
combined non-county layers into one 7 years ago			`['neighbourhood', 'borough', 'locality', 'localadmin', 'region', 'macroregion', 'dependency', 'country'],`
			`['county', 'macrocounty']`
added query for address search with ids 7 years ago			`];`

drastically revised comments 7 years ago			`// returns IFF there are any results in the granularity band`
added query for address search with ids 7 years ago			`function anyResultsAtGranularityBand(results, band) {`
code cleanup, shorter function syntax 7 years ago			`return results.some(result => _.includes(band, result.layer));`
added query for address search with ids 7 years ago			`}`

drastically revised comments 7 years ago			`// returns the ids of results at the requested layer`
added query for address search with ids 7 years ago			`function getIdsAtLayer(results, layer) {`
code cleanup, shorter function syntax 7 years ago			`return results.filter(result => result.layer === layer).map(_.property('source_id'));`
added query for address search with ids 7 years ago			`}`

			`/**`
			`map request variables to query variables for all inputs`
drastically revised comments 7 years ago			`provided by this HTTP request. This function operates on res.data which is the`
			`Document-ified placeholder repsonse.`
added query for address search with ids 7 years ago			`**/`
			`function generateQuery( clean, res ){`
			`const vs = new peliasQuery.Vars( defaults );`
			`const results = _.defaultTo(res.data, []);`

			`// sources`
			`if( !_.isEmpty(clean.sources) ) {`
			`vs.var( 'sources', clean.sources);`
			`}`

			`// size`
			`if( clean.querySize ) {`
			`vs.var( 'size', clean.querySize );`
			`}`

			`if( ! _.isEmpty(clean.parsed_text.number) ){`
			`vs.var( 'input:housenumber', clean.parsed_text.number );`
			`}`
			`vs.var( 'input:street', clean.parsed_text.street );`

drastically revised comments 7 years ago			`// find the first granularity band for which there are results`
code cleanup, shorter function syntax 7 years ago			`const granularity_band = granularity_bands.find(band => anyResultsAtGranularityBand(results, band));`
added query for address search with ids 7 years ago
drastically revised comments 7 years ago			`// if there's a granularity band, accumulate the ids from each layer in the band`
			`// into an object mapping layer->ids of those layers`
added query for address search with ids 7 years ago			`if (granularity_band) {`
			`const layers_to_ids = granularity_band.reduce((acc, layer) => {`
			`acc[layer] = getIdsAtLayer(res.data, layer);`
			`return acc;`
			`}, {});`

code cleanup, shorter function syntax 7 years ago			// use an object here instead of calling `set` since that flattens out an
			`// object into key/value pairs and makes identifying layers harder in query module`
don't serialize input:layers when passing to query 7 years ago			`vs.var('input:layers', layers_to_ids);`
added query for address search with ids 7 years ago
			`}`

			`// focus point`
			`if( check.number(clean['focus.point.lat']) &&`
			`check.number(clean['focus.point.lon']) ){`
			`vs.set({`
			`'focus:point:lat': clean['focus.point.lat'],`
			`'focus:point:lon': clean['focus.point.lon']`
			`});`
			`}`

			`// boundary rect`
			`if( check.number(clean['boundary.rect.min_lat']) &&`
			`check.number(clean['boundary.rect.max_lat']) &&`
			`check.number(clean['boundary.rect.min_lon']) &&`
			`check.number(clean['boundary.rect.max_lon']) ){`
			`vs.set({`
			`'boundary:rect:top': clean['boundary.rect.max_lat'],`
			`'boundary:rect:right': clean['boundary.rect.max_lon'],`
			`'boundary:rect:bottom': clean['boundary.rect.min_lat'],`
			`'boundary:rect:left': clean['boundary.rect.min_lon']`
			`});`
			`}`

			`// boundary circle`
			`if( check.number(clean['boundary.circle.lat']) &&`
			`check.number(clean['boundary.circle.lon']) ){`
			`vs.set({`
			`'boundary:circle:lat': clean['boundary.circle.lat'],`
			`'boundary:circle:lon': clean['boundary.circle.lon']`
			`});`

			`if( check.number(clean['boundary.circle.radius']) ){`
			`vs.set({`
			`'boundary:circle:radius': Math.round( clean['boundary.circle.radius'] ) + 'km'`
			`});`
			`}`
			`}`

			`// boundary country`
			`if( check.string(clean['boundary.country']) ){`
			`vs.set({`
			`'boundary:country': clean['boundary.country']`
			`});`
			`}`

			`return {`
feat(queries): Normalize all query names They should start with the endpoint (ideally), and address_search_using_ids should not have the same query name as 'search_fallback'. 6 years ago			`type: 'address_search_using_ids',`
added query for address search with ids 7 years ago			`body: addressUsingIdsQuery.render(vs)`
			`};`

			`}`

			`module.exports = generateQuery;`