feat(autocomplete) add hard distance filter to short focus.point queries

Short autocomplete inputs are very difficult to serve in a performant and low-latency way. With shorter inputs, many more documents match for just about any input string. In our testing, one to three character input texts generally match up to 100 million documents out of a 560 million document full planet build. There's really no way to make scoring 100 million documents fast, so in order to achieve acceptable performance (ideally, <100ms P99 latency), it's worth looking at ways to either avoid querying Elasticsearch or reducing the scope of autocomplete queries. Short autocomplete queries without a focus.point parameter can be cached. There are only 47,000 possible 1-3 character alphanumerical inputs. At this time, caching is outside the scope of Pelias itself but can easily be implemented with Varnish, Nginx, Fastly, Cloudfront, and lots of other tools and services. Queries with a `focus.point` are effectively uncachable however, since the coordinate chosen will often be unique. This PR uses the `focus.point` coordinate to build a hard filter limiting the search to documents only within a certain radius of the coordinate. This can reduce the number of documents searched and improve performance, while still returning results that are useful. It takes two parameters, driven by `pelias-config`: - `api.autocomplete.focusHardLimitTextLength': the maximum length of text for which a hard distance filter will be constructed - `api.autocomplete.focusHardLimitMultiplier`: the length of the input text will be multiplied by this number to get the total hard filter radius in kilometers. For example, with `focusHardLimitTextLength` 4, and `focusHardLimitMultiplier` 50, the following hard filters would be constructed: | text length | max distance | | ---- | ----| | 1 | 50 | | 2 | 100 | | 3 | 150 | | 4+ | unlimited |
6 years ago · df8f1bb332
2 changed files with 15 additions and 1 deletions
--- a/package.json
+++ b/package.json
@ -52,7 +52,7 @@
    "markdown": "^0.5.0",
    "morgan": "^1.8.2",
    "pelias-categories": "^1.2.0",
-    "pelias-config": "^3.0.2",
+    "pelias-config": "^3.6.0",
    "pelias-labels": "^1.8.0",
    "pelias-logger": "^1.2.0",
    "pelias-microservice-wrapper": "^1.7.0",
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@ -1,4 +1,5 @@
 const peliasQuery = require('pelias-query');
+const config = require('pelias-config').generate();
 const defaults = require('./autocomplete_defaults');
 const textParser = require('./text_parser_addressit');
 const check = require('check-types');
@ -49,6 +50,7 @@ query.filter( peliasQuery.view.sources );
 query.filter( peliasQuery.view.layers );
 query.filter( peliasQuery.view.boundary_rect );
 query.filter( peliasQuery.view.boundary_country );
+query.filter( peliasQuery.view.boundary_circle );

 // --------------------------------

@ -107,6 +109,18 @@ function generateQuery( clean ){
      'focus:point:lat': clean['focus.point.lat'],
      'focus:point:lon': clean['focus.point.lon']
    });
+
+    // search only near the focus.point for short inputs
+    // this reduces the numer of documents hit and keeps latency low
+    const hardLimitTextLength = config.get('api.autocomplete.focusHardLimitTextLength') || 0;
+    const distanceMultplier   = config.get('api.autocomplete.focusHardLimitMultiplier') || 50;
+    if (clean.text.length < hardLimitTextLength) {
+      vs.set({
+        'boundary:circle:lat': clean['focus.point.lat'],
+        'boundary:circle:lon': clean['focus.point.lon'],
+        'boundary:circle:radius': `${50 * clean.text.length}km`
+      });
+    }
  }

  // boundary rect