From 495c3251a438ee91801121cda6d326e43f780a85 Mon Sep 17 00:00:00 2001 From: Harish Krishna Date: Fri, 17 Jul 2015 17:28:04 -0400 Subject: [PATCH] moving all query parsing logic into its own helper. Tests to come --- helper/query_parser.js | 81 ++++++++++++++++++++++++++++++++++++++++++ query/search.js | 20 ++++++----- sanitiser/_input.js | 78 ++-------------------------------------- 3 files changed, 96 insertions(+), 83 deletions(-) create mode 100644 helper/query_parser.js diff --git a/helper/query_parser.js b/helper/query_parser.js new file mode 100644 index 00000000..7c20e9fd --- /dev/null +++ b/helper/query_parser.js @@ -0,0 +1,81 @@ + +var parser = require('addressit'); +var extend = require('extend'); +var get_layers = require('../helper/layers'); +var delim = ','; + +module.exports = function(query) { + var parsedAddress0 = {}; + var parsedAddress1 = {}; + var parsedAddress2 = {}; + + // naive approach + // for admin matching during query time + // split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny' + var delimIndex = query.indexOf(delim); + if ( delimIndex !== -1 ) { + parsedAddress0.name = query.substring(0, delimIndex); + parsedAddress0.admin_parts = query.substring(delimIndex + 1).trim(); + } + + var tokenized = query.split(/[ ,]+/); + var hasNumber = /\d/.test(query); + + // set target_layer if input length <= 3 characters + if (query.length <= 3 ) { + // no address parsing required + parsedAddress2.target_layer = get_layers(['admin']); + } else if (tokenized.length === 1 || (tokenized.length < 3 && !hasNumber)) { + // no need to hit address layers if there's only one (or two) token(s) + parsedAddress2.target_layer = get_layers(['admin', 'poi']); + } else { + // address parsing + parsedAddress1 = parser( query ); + // set target_layer if input suggests no address + if (parsedAddress1.text === parsedAddress1.regions.join(' ') && !hasNumber) { + parsedAddress2.target_layer = get_layers(['admin', 'poi']); + } // else { + // this might be an overkill - you'd want to search for poi and admin + // even if an address is being queried. TBD + // parsedAddress2.target_layer = get_layers(['address']); + // } + } + + var parsedAddress = extend(parsedAddress0, parsedAddress1, parsedAddress2); + + var address_parts = [ 'name', + 'number', + 'street', + 'city', + 'state', + 'country', + 'postalcode', + 'regions', + 'admin_parts', + 'target_layer' + ]; + + var parsed_input = {}; + + address_parts.forEach(function(part){ + if (parsedAddress[part]) { + parsed_input[part] = parsedAddress[part]; + } + }); + + return parsed_input; +}; + + +// parsed_input = { +// name : parsedAddress.name, +// number : parsedAddress.number, +// street : parsedAddress.street, +// city : parsedAddress.city, +// state : parsedAddress.state, +// country: parsedAddress.country, +// postalcode : parsedAddress.postalcode, +// regions: parsedAddress.regions, +// admin_parts: parsedAddress.admin_parts, +// target_layer: parsedAddress.target_layer +// } \ No newline at end of file diff --git a/query/search.js b/query/search.js index 2faefafb..2f1cff6c 100644 --- a/query/search.js +++ b/query/search.js @@ -32,13 +32,15 @@ function generate( params ){ var admin_fields = []; var qb = function(admin_fields, value) { - admin_fields.forEach(function(admin_field) { - var match = {}; - match[admin_field] = value; - query.query.filtered.query.bool.should.push({ - 'match': match - }); - }); + if (value) { + admin_fields.forEach(function(admin_field) { + var match = {}; + match[admin_field] = value; + query.query.filtered.query.bool.should.push({ + 'match': match + }); + }); + } }; // update input @@ -84,7 +86,7 @@ function generate( params ){ admin_fields.push('admin0', 'alpha3'); } - var input_regions = params.parsed_input.regions ? params.parsed_input.regions.join(' ') : ''; + var input_regions = params.parsed_input.regions ? params.parsed_input.regions.join(' ') : undefined; if (admin_fields.length === 5 && input_regions !== params.input) { if (params.parsed_input.admin_parts) { qb(admin_fields, params.parsed_input.admin_parts); @@ -111,6 +113,8 @@ function generate( params ){ }); query.sort = query.sort.concat( sort( params ) ); + + console.log(JSON.stringify(query, null, 2)); return query; } diff --git a/sanitiser/_input.js b/sanitiser/_input.js index b85d03a0..6be54b98 100644 --- a/sanitiser/_input.js +++ b/sanitiser/_input.js @@ -1,15 +1,12 @@ var isObject = require('is-object'); -var parser = require('addressit'); -var extend = require('extend'); -var get_layers = require('../helper/layers'); +var query_parse= require('../helper/query_parser'); // validate inputs, convert types and apply defaults function sanitize( req ){ req.clean = req.clean || {}; var params= req.query; - var delim = ','; - + // ensure the input params are a valid object if( !isObject( params ) ){ params = {}; @@ -25,76 +22,7 @@ function sanitize( req ){ req.clean.input = params.input; - var parsedAddress0 = {}; - var parsedAddress1 = {}; - var parsedAddress2 = {}; - - // naive approach - // for admin matching during query time - // split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny' - var delimIndex = params.input.indexOf(delim); - if ( delimIndex !== -1 ) { - parsedAddress0.name = params.input.substring(0, delimIndex); - parsedAddress0.admin_parts = params.input.substring(delimIndex + 1).trim(); - } - - var tokenized = params.input.split(/[ ,]+/); - var hasNumber = /\d/.test(params.input); - - // set target_layer if input length <= 3 characters - if (params.input.length <= 3 ) { - // no address parsing required - parsedAddress2.target_layer = get_layers(['admin']); - } else if (tokenized.length === 1 || (tokenized.length < 3 && !hasNumber)) { - // no need to hit address layers if there's only one (or two) token(s) - parsedAddress2.target_layer = get_layers(['admin', 'poi']); - } else { - // address parsing - parsedAddress1 = parser( params.input ); - // set target_layer if input suggests no address - if (parsedAddress1.text === parsedAddress1.regions.join(' ') && !hasNumber) { - parsedAddress2.target_layer = get_layers(['admin', 'poi']); - } // else { - // this might be an overkill - you'd want to search for poi and admin - // even if an address is being queried. TBD - // parsedAddress2.target_layer = get_layers(['address']); - // } - } - - var parsedAddress = extend(parsedAddress0, parsedAddress1, parsedAddress2); - - var address_parts = [ 'name', - 'number', - 'street', - 'city', - 'state', - 'country', - 'postalcode', - 'regions', - 'admin_parts', - 'target_layer' - ]; - - req.clean.parsed_input = {}; - - address_parts.forEach(function(part){ - if (parsedAddress[part]) { - req.clean.parsed_input[part] = parsedAddress[part]; - } - }); - - // req.clean.parsed_input = { - // name : parsedAddress.name, - // number : parsedAddress.number, - // street : parsedAddress.street, - // city : parsedAddress.city, - // state : parsedAddress.state, - // country: parsedAddress.country, - // postalcode : parsedAddress.postalcode, - // regions: parsedAddress.regions, - // admin_parts: parsedAddress.admin_parts, - // target_layer: parsedAddress.target_layer - // } + req.clean.parsed_input = query_parse(params.input); return { 'error': false };