diff --git a/src/options.c b/src/options.c
index 3cfc6dccf3..6c0c493ab2 100644
--- a/src/options.c
+++ b/src/options.c
@@ -137,6 +137,8 @@ struct option long_opts[] = {
{"real-time-html" , no_argument , 0 , 0 } ,
{"restore" , no_argument , 0 , 0 } ,
{"sort-panel" , required_argument , 0 , 0 } ,
+ {"site-search" , required_argument , 0 , 0 } ,
+ {"site-search-lower" , no_argument , 0 , 0 } ,
{"static-file" , required_argument , 0 , 0 } ,
{"user-name" , required_argument , 0 , 0 } ,
#ifdef HAVE_LIBSSL
@@ -260,6 +262,8 @@ cmd_help (void)
" --process-and-exit - Parse log and exit without outputting data.\n"
" --real-os - Display real OS names. e.g, Windows XP, Snow Leopard.\n"
" --restore - Restore data from disk from the given --db-path or from /tmp.\n"
+ " --site-search= - Parse search terms for local site search with query param (usually q)\n"
+ " --site-search-lower - Lower case search keyphrases from local site search\n"
" --sort-panel=PANEL,METRIC,ORDER - Sort panel on initial load. e.g., --sort-panel=VISITORS,BY_HITS,ASC.\n"
" See manpage for a list of panels/fields.\n"
" --static-file= - Add static file extension. e.g.: .mp3. Extensions are case sensitive.\n"
@@ -600,6 +604,24 @@ parse_long_opt (const char *name, const char *oarg) {
set_array_opt (oarg, conf.static_files, &conf.static_file_idx, MAX_EXTENSIONS);
}
+ /* local site search */
+ if (!strcmp ("site-search", name)) {
+ char *first_param = xstrdup ("?");
+ char *other_param = xstrdup ("&");
+
+ append_str(&first_param, oarg);
+ append_str(&first_param, "=");
+ append_str(&other_param, oarg);
+ append_str(&other_param, "=");
+
+ conf.site_search = first_param;
+ conf.site_search_other = other_param;
+ }
+
+ /* lowercase local site search */
+ if (!strcmp ("site-search-lower", name))
+ conf.site_search_lower = 1;
+
/* GEOIP OPTIONS
* ========================= */
/* specifies the path of the GeoIP City database file */
diff --git a/src/parser.c b/src/parser.c
index c5fc3feb23..c20a3a93ff 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -332,6 +332,50 @@ decode_url (char *url) {
return trim_str (char_replace (out, '+', ' '));
}
+/* Process keyphrases from local site search.
+ * Note that the referer hasn't been decoded at the entry point
+ * since there could be '&' within the search query.
+ *
+ * On error, 1 is returned.
+ * On success, the extracted keyphrase is assigned and 0 is returned. */
+static int
+extract_sitesearch_keyphrase (char *ref, char **keyphrase) {
+ char *r, *ptr, *referer;
+ int encoded = 0;
+
+ /* Find start of keyword */
+ if ((r = strstr (ref, conf.site_search)) != NULL || (r = strstr (ref,
+ conf.site_search_other)) != NULL)
+ r += strlen(conf.site_search);
+ // else if ((r = strstr (ref, "%26q%3D")) != NULL || (r = strstr (ref, "%3Fq%3D")) != NULL)
+ // encoded = 1, r += 7;
+ else
+ return 1;
+
+ /* Find end of keyword and end string there*/
+ if (!encoded && (ptr = strchr (r, '&')) != NULL)
+ *ptr = '\0';
+ else if(!encoded && (ptr = strchr (r, ' ')) != NULL)
+ /* Handles case when there is nothing else after the q param */
+ *ptr = '\0';
+ else if (encoded && (ptr = strstr (r, "%26")) != NULL)
+ *ptr = '\0';
+
+ referer = decode_url (r);
+ if (referer == NULL || *referer == '\0') {
+ free (referer);
+ return 1;
+ }
+
+ referer = char_replace (referer, '+', ' ');
+ *keyphrase = trim_str (referer);
+
+ if(conf.site_search_lower)
+ *keyphrase = strtolower(*keyphrase);
+
+ return 0;
+}
+
/* Process keyphrases from Google search, cache, and translate.
* Note that the referer hasn't been decoded at the entry point
* since there could be '&' within the search query.
@@ -1023,6 +1067,9 @@ parse_specifier (GLogItem * logitem, char **str, const char *p, const char *end)
if (!(tkn = parse_string (&(*str), end, 1)))
return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
+ if(conf.site_search)
+ extract_sitesearch_keyphrase (tkn, &logitem->keyphrase);
+
logitem->req = parse_req (tkn, &logitem->method, &logitem->protocol);
free (tkn);
break;
diff --git a/src/settings.h b/src/settings.h
index e5202bc8db..b12187ce16 100644
--- a/src/settings.h
+++ b/src/settings.h
@@ -178,6 +178,9 @@ typedef struct GConf_
int real_time_html; /* enable real-time HTML output */
int restore; /* reload data from db-path */
int skip_term_resolver; /* no terminal resolver */
+ const char *site_search; /* enable local site search keywords parsing */
+ const char *site_search_other; /* alternate matching for site search param */
+ int site_search_lower; /* lower case local site search keywords */
int is_json_log_format; /* is a json log format */
uint32_t keep_last; /* number of days to keep in storage */
uint32_t num_tests; /* number of lines to test */
diff --git a/src/util.c b/src/util.c
index 0da412bb59..d88b63fa16 100644
--- a/src/util.c
+++ b/src/util.c
@@ -925,6 +925,24 @@ strtoupper (char *str) {
return str;
}
+/* Make a string lowercase.
+ *
+ * On error the original string is returned.
+ * On success, the lowercased string is returned. */
+char *
+strtolower (char *str) {
+ char *p = str;
+ if (str == NULL || *str == '\0')
+ return str;
+
+ while (*p != '\0') {
+ *p = tolower (*p);
+ p++;
+ }
+
+ return str;
+}
+
/* Left-pad a string with n amount of spaces.
*
* On success, a left-padded string is returned. */
diff --git a/src/util.h b/src/util.h
index 12ba21694f..eadbc062f1 100644
--- a/src/util.h
+++ b/src/util.h
@@ -76,6 +76,7 @@ char *replace_str (const char *str, const char *old, const char *new);
char *rtrim (char *s);
char *secs_to_str (int secs);
char *strtoupper(char *str);
+char *strtolower(char *str);
char *substring (const char *str, int begin, int len);
char *trim_str (char *str);
char *u322str (uint32_t d, int width);