diff --git a/src/options.c b/src/options.c index 3cfc6dccf3..6c0c493ab2 100644 --- a/src/options.c +++ b/src/options.c @@ -137,6 +137,8 @@ struct option long_opts[] = { {"real-time-html" , no_argument , 0 , 0 } , {"restore" , no_argument , 0 , 0 } , {"sort-panel" , required_argument , 0 , 0 } , + {"site-search" , required_argument , 0 , 0 } , + {"site-search-lower" , no_argument , 0 , 0 } , {"static-file" , required_argument , 0 , 0 } , {"user-name" , required_argument , 0 , 0 } , #ifdef HAVE_LIBSSL @@ -260,6 +262,8 @@ cmd_help (void) " --process-and-exit - Parse log and exit without outputting data.\n" " --real-os - Display real OS names. e.g, Windows XP, Snow Leopard.\n" " --restore - Restore data from disk from the given --db-path or from /tmp.\n" + " --site-search= - Parse search terms for local site search with query param (usually q)\n" + " --site-search-lower - Lower case search keyphrases from local site search\n" " --sort-panel=PANEL,METRIC,ORDER - Sort panel on initial load. e.g., --sort-panel=VISITORS,BY_HITS,ASC.\n" " See manpage for a list of panels/fields.\n" " --static-file= - Add static file extension. e.g.: .mp3. Extensions are case sensitive.\n" @@ -600,6 +604,24 @@ parse_long_opt (const char *name, const char *oarg) { set_array_opt (oarg, conf.static_files, &conf.static_file_idx, MAX_EXTENSIONS); } + /* local site search */ + if (!strcmp ("site-search", name)) { + char *first_param = xstrdup ("?"); + char *other_param = xstrdup ("&"); + + append_str(&first_param, oarg); + append_str(&first_param, "="); + append_str(&other_param, oarg); + append_str(&other_param, "="); + + conf.site_search = first_param; + conf.site_search_other = other_param; + } + + /* lowercase local site search */ + if (!strcmp ("site-search-lower", name)) + conf.site_search_lower = 1; + /* GEOIP OPTIONS * ========================= */ /* specifies the path of the GeoIP City database file */ diff --git a/src/parser.c b/src/parser.c index c5fc3feb23..c20a3a93ff 100644 --- a/src/parser.c +++ b/src/parser.c @@ -332,6 +332,50 @@ decode_url (char *url) { return trim_str (char_replace (out, '+', ' ')); } +/* Process keyphrases from local site search. + * Note that the referer hasn't been decoded at the entry point + * since there could be '&' within the search query. + * + * On error, 1 is returned. + * On success, the extracted keyphrase is assigned and 0 is returned. */ +static int +extract_sitesearch_keyphrase (char *ref, char **keyphrase) { + char *r, *ptr, *referer; + int encoded = 0; + + /* Find start of keyword */ + if ((r = strstr (ref, conf.site_search)) != NULL || (r = strstr (ref, + conf.site_search_other)) != NULL) + r += strlen(conf.site_search); + // else if ((r = strstr (ref, "%26q%3D")) != NULL || (r = strstr (ref, "%3Fq%3D")) != NULL) + // encoded = 1, r += 7; + else + return 1; + + /* Find end of keyword and end string there*/ + if (!encoded && (ptr = strchr (r, '&')) != NULL) + *ptr = '\0'; + else if(!encoded && (ptr = strchr (r, ' ')) != NULL) + /* Handles case when there is nothing else after the q param */ + *ptr = '\0'; + else if (encoded && (ptr = strstr (r, "%26")) != NULL) + *ptr = '\0'; + + referer = decode_url (r); + if (referer == NULL || *referer == '\0') { + free (referer); + return 1; + } + + referer = char_replace (referer, '+', ' '); + *keyphrase = trim_str (referer); + + if(conf.site_search_lower) + *keyphrase = strtolower(*keyphrase); + + return 0; +} + /* Process keyphrases from Google search, cache, and translate. * Note that the referer hasn't been decoded at the entry point * since there could be '&' within the search query. @@ -1023,6 +1067,9 @@ parse_specifier (GLogItem * logitem, char **str, const char *p, const char *end) if (!(tkn = parse_string (&(*str), end, 1))) return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL); + if(conf.site_search) + extract_sitesearch_keyphrase (tkn, &logitem->keyphrase); + logitem->req = parse_req (tkn, &logitem->method, &logitem->protocol); free (tkn); break; diff --git a/src/settings.h b/src/settings.h index e5202bc8db..b12187ce16 100644 --- a/src/settings.h +++ b/src/settings.h @@ -178,6 +178,9 @@ typedef struct GConf_ int real_time_html; /* enable real-time HTML output */ int restore; /* reload data from db-path */ int skip_term_resolver; /* no terminal resolver */ + const char *site_search; /* enable local site search keywords parsing */ + const char *site_search_other; /* alternate matching for site search param */ + int site_search_lower; /* lower case local site search keywords */ int is_json_log_format; /* is a json log format */ uint32_t keep_last; /* number of days to keep in storage */ uint32_t num_tests; /* number of lines to test */ diff --git a/src/util.c b/src/util.c index 0da412bb59..d88b63fa16 100644 --- a/src/util.c +++ b/src/util.c @@ -925,6 +925,24 @@ strtoupper (char *str) { return str; } +/* Make a string lowercase. + * + * On error the original string is returned. + * On success, the lowercased string is returned. */ +char * +strtolower (char *str) { + char *p = str; + if (str == NULL || *str == '\0') + return str; + + while (*p != '\0') { + *p = tolower (*p); + p++; + } + + return str; +} + /* Left-pad a string with n amount of spaces. * * On success, a left-padded string is returned. */ diff --git a/src/util.h b/src/util.h index 12ba21694f..eadbc062f1 100644 --- a/src/util.h +++ b/src/util.h @@ -76,6 +76,7 @@ char *replace_str (const char *str, const char *old, const char *new); char *rtrim (char *s); char *secs_to_str (int secs); char *strtoupper(char *str); +char *strtolower(char *str); char *substring (const char *str, int begin, int len); char *trim_str (char *str); char *u322str (uint32_t d, int width);