From 9a9a2c8da853dc776750b085cab157839620974e Mon Sep 17 00:00:00 2001 From: Raj Patel <57837394+rajc242@users.noreply.github.com> Date: Thu, 18 May 2023 08:59:52 -0500 Subject: [PATCH] Transfer description in comments to OPTIONS (#358) --- udfs/community/csv_to_struct.sqlx | 5 +++++ udfs/community/cw_array_compact.sqlx | 6 +++++- udfs/community/cw_array_distinct.sqlx | 6 +++++- udfs/community/cw_array_max.sqlx | 6 +++++- udfs/community/cw_array_median.sqlx | 6 +++++- udfs/community/cw_array_min.sqlx | 6 +++++- udfs/community/cw_array_overlap.sqlx | 6 +++++- .../cw_comparable_format_bigint_t.sqlx | 3 +++ .../cw_comparable_format_varchar_t.sqlx | 3 +++ udfs/community/cw_editdistance.sqlx | 6 +++++- udfs/community/cw_find_in_list.sqlx | 3 +++ udfs/community/cw_from_base.sqlx | 6 +++++- .../community/cw_json_array_contains_bool.sqlx | 6 +++++- udfs/community/cw_json_array_contains_num.sqlx | 6 +++++- udfs/community/cw_json_array_contains_str.sqlx | 6 +++++- udfs/community/cw_json_array_get.sqlx | 6 +++++- udfs/community/cw_json_array_length.sqlx | 6 +++++- udfs/community/cw_lower_case_ascii_only.sqlx | 6 +++++- udfs/community/cw_map_create.sqlx | 6 +++++- udfs/community/cw_map_get.sqlx | 6 +++++- udfs/community/cw_map_parse.sqlx | 6 +++++- udfs/community/cw_next_day.sqlx | 6 +++++- udfs/community/cw_regexp_extract.sqlx | 6 +++++- udfs/community/cw_regexp_extract_all.sqlx | 6 +++++- udfs/community/cw_regexp_extract_all_n.sqlx | 6 +++++- udfs/community/cw_regexp_extract_n.sqlx | 6 +++++- .../cw_runtime_parse_interval_seconds.sqlx | 6 +++++- udfs/community/cw_stringify_interval.sqlx | 6 +++++- udfs/community/cw_substrb.sqlx | 6 +++++- udfs/community/cw_substring_index.sqlx | 6 +++++- udfs/community/cw_td_nvp.sqlx | 6 +++++- udfs/community/cw_to_base.sqlx | 6 +++++- udfs/community/cw_ts_overlap_buckets.sqlx | 6 +++++- udfs/community/cw_ts_pattern_match.sqlx | 6 +++++- udfs/community/cw_url_decode.sqlx | 6 +++++- udfs/community/cw_url_encode.sqlx | 6 +++++- udfs/community/cw_url_extract_authority.sqlx | 6 +++++- udfs/community/cw_url_extract_file.sqlx | 6 +++++- udfs/community/cw_url_extract_fragment.sqlx | 6 +++++- udfs/community/cw_url_extract_host.sqlx | 6 +++++- udfs/community/cw_url_extract_parameter.sqlx | 6 +++++- udfs/community/cw_url_extract_path.sqlx | 6 +++++- udfs/community/cw_url_extract_port.sqlx | 6 +++++- udfs/community/cw_url_extract_protocol.sqlx | 6 +++++- udfs/community/cw_url_extract_query.sqlx | 6 +++++- udfs/community/day_occurrence_of_month.sqlx | 8 +++++++- udfs/community/find_in_set.sqlx | 3 +++ udfs/community/from_binary.sqlx | 6 +++++- udfs/community/from_hex.sqlx | 6 +++++- udfs/community/get_array_value.sqlx | 6 +++++- udfs/community/get_value.sqlx | 6 +++++- udfs/community/getbit.sqlx | 8 +++++++- udfs/community/knots_to_mph.sqlx | 6 +++++- udfs/community/mannwhitneyu.sqlx | 5 +++++ udfs/community/mph_to_knots.sqlx | 6 +++++- udfs/community/multiply_full_scale.sqlx | 6 +++++- udfs/community/p_fisherexact.sqlx | 7 ++++++- udfs/community/pi.sqlx | 6 +++++- udfs/community/random_string.sqlx | 9 ++++++++- udfs/community/random_value.sqlx | 3 +++ udfs/community/string_to_struct.sqlx | 8 ++++++++ udfs/community/to_binary.sqlx | 6 +++++- udfs/community/to_hex.sqlx | 6 +++++- udfs/community/ts_gen_keyed_timestamps.sqlx | 18 +++++++++++++++++- udfs/community/url_parse.sqlx | 5 +++++ udfs/community/week_of_month.sqlx | 8 +++++++- udfs/migration/snowflake/factorial.sqlx | 6 +++++- 67 files changed, 350 insertions(+), 58 deletions(-) diff --git a/udfs/community/csv_to_struct.sqlx b/udfs/community/csv_to_struct.sqlx index b5bdc99a6..b927be2aa 100644 --- a/udfs/community/csv_to_struct.sqlx +++ b/udfs/community/csv_to_struct.sqlx @@ -20,6 +20,11 @@ config { hasOutput: true } -- strList: string that has map in the format a:b,c:d.... -- Output: struct for the above map. CREATE OR REPLACE FUNCTION ${self()}(strList STRING) +OPTIONS ( + description="""Prepare struct for the csv with string that has map in the format a:b,c:d,... +Input: string that has map in the format a:b,c:d.... +Output: struct for the above map.""" +) AS ( ${ref("string_to_struct")}(strList, ',', ':') ); diff --git a/udfs/community/cw_array_compact.sqlx b/udfs/community/cw_array_compact.sqlx index 11c504999..bb611abee 100644 --- a/udfs/community/cw_array_compact.sqlx +++ b/udfs/community/cw_array_compact.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ /* Similar to Snowflake ARRAY_COMPACT */ -CREATE OR REPLACE FUNCTION ${self()}(a ANY TYPE) AS ( +CREATE OR REPLACE FUNCTION ${self()}(a ANY TYPE) +OPTIONS ( + description="Similar to Snowflake ARRAY_COMPACT" +) +AS ( ARRAY(SELECT v FROM UNNEST(a) v WHERE v IS NOT NULL) ); diff --git a/udfs/community/cw_array_distinct.sqlx b/udfs/community/cw_array_distinct.sqlx index 5420eb1f7..b51444187 100644 --- a/udfs/community/cw_array_distinct.sqlx +++ b/udfs/community/cw_array_distinct.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ /* Similar to presto ARRAY_DISTINCT */ -CREATE OR REPLACE FUNCTION ${self()}(arr ANY TYPE) AS ( +CREATE OR REPLACE FUNCTION ${self()}(arr ANY TYPE) +OPTIONS ( + description="Similar to presto ARRAY_DISTINCT" +) +AS ( ARRAY( SELECT DISTINCT x FROM UNNEST(arr) AS x ) ); diff --git a/udfs/community/cw_array_max.sqlx b/udfs/community/cw_array_max.sqlx index d324d2d27..9e058a625 100644 --- a/udfs/community/cw_array_max.sqlx +++ b/udfs/community/cw_array_max.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ /* Similar to presto ARRAY_MAX */ -CREATE OR REPLACE FUNCTION ${self()}(arr ANY TYPE) AS ( +CREATE OR REPLACE FUNCTION ${self()}(arr ANY TYPE) +OPTIONS ( + description="Similar to presto ARRAY_MAX" +) +AS ( ( SELECT MAX(x) FROM UNNEST(arr) AS x ) ); diff --git a/udfs/community/cw_array_median.sqlx b/udfs/community/cw_array_median.sqlx index fa409c3b2..c6ad73842 100644 --- a/udfs/community/cw_array_median.sqlx +++ b/udfs/community/cw_array_median.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ /* Similar to MEDIAN in Teradata */ -CREATE OR REPLACE FUNCTION ${self()}(arr ANY TYPE) AS ( +CREATE OR REPLACE FUNCTION ${self()}(arr ANY TYPE) +OPTIONS ( + description="Similar to MEDIAN in Teradata" +) +AS ( ( SELECT PERCENTILE_CONT(x, 0.5) OVER() FROM UNNEST(arr) AS x LIMIT 1 ) ); diff --git a/udfs/community/cw_array_min.sqlx b/udfs/community/cw_array_min.sqlx index 971654070..5f8d157d7 100644 --- a/udfs/community/cw_array_min.sqlx +++ b/udfs/community/cw_array_min.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ /* Similar to presto ARRAY_MIN */ -CREATE OR REPLACE FUNCTION ${self()}(arr ANY TYPE) AS ( +CREATE OR REPLACE FUNCTION ${self()}(arr ANY TYPE) +OPTIONS ( + description="Similar to presto ARRAY_MIN" +) +AS ( ( SELECT MIN(x) FROM UNNEST(arr) AS x ) ); diff --git a/udfs/community/cw_array_overlap.sqlx b/udfs/community/cw_array_overlap.sqlx index 3d5db642b..e9415bd66 100644 --- a/udfs/community/cw_array_overlap.sqlx +++ b/udfs/community/cw_array_overlap.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Similar to Presto ARRAYS_OVERLAP */ -CREATE OR REPLACE FUNCTION ${self()}(x ANY TYPE, y ANY TYPE) RETURNS BOOL AS( +CREATE OR REPLACE FUNCTION ${self()}(x ANY TYPE, y ANY TYPE) RETURNS BOOL +OPTIONS ( + description="Similar to Presto ARRAYS_OVERLAP" +) +AS( CASE WHEN EXISTS(SELECT 1 FROM UNNEST(ARRAY_CONCAT(x,y)) as z WHERE z IS NULL) THEN NULL ELSE EXISTS(SELECT 1 FROM UNNEST(x) as u JOIN UNNEST(y) as v ON u=v) END diff --git a/udfs/community/cw_comparable_format_bigint_t.sqlx b/udfs/community/cw_comparable_format_bigint_t.sqlx index d6c553067..00b9aaea3 100644 --- a/udfs/community/cw_comparable_format_bigint_t.sqlx +++ b/udfs/community/cw_comparable_format_bigint_t.sqlx @@ -17,6 +17,9 @@ config { hasOutput: true } /* Lexicographically '+' comes before '-' so we replace p(lus) and m(inus) and subtract LONG_MIN on negative values */ CREATE OR REPLACE FUNCTION ${self()}(part INT64) RETURNS STRING +OPTIONS ( + description="Lexicographically '+' comes before '-' so we replace p(lus) and m(inus) and subtract LONG_MIN on negative values" +) AS ( FORMAT( CASE WHEN part < 0 THEN 'm' ELSE 'p' END || '%19d', -- 'm' < 'p' lexicographically diff --git a/udfs/community/cw_comparable_format_varchar_t.sqlx b/udfs/community/cw_comparable_format_varchar_t.sqlx index 84a6b8afc..c585c9d77 100644 --- a/udfs/community/cw_comparable_format_varchar_t.sqlx +++ b/udfs/community/cw_comparable_format_varchar_t.sqlx @@ -17,6 +17,9 @@ config { hasOutput: true } /* Use hex to work around the separator problem (e.g. if separator = '-' then ['-', ''] and ['', '-'] both produce '--') */ CREATE OR REPLACE FUNCTION ${self()}(part STRING) RETURNS STRING +OPTIONS ( + description="Use hex to work around the separator problem (e.g. if separator = '-' then ['-', ''] and ['', '-'] both produce '--')" +) AS ( TO_HEX(CAST(part as BYTES)) ); diff --git a/udfs/community/cw_editdistance.sqlx b/udfs/community/cw_editdistance.sqlx index 4bf442b39..0fc485fd8 100644 --- a/udfs/community/cw_editdistance.sqlx +++ b/udfs/community/cw_editdistance.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Similar to teradata's editdistance without weightages */ CREATE OR REPLACE FUNCTION ${self()}( a STRING, b STRING ) RETURNS INT64 -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Similar to teradata's editdistance without weightages" +) +AS """ if ( a == null || b == null ) { return null; diff --git a/udfs/community/cw_find_in_list.sqlx b/udfs/community/cw_find_in_list.sqlx index cce0c51b5..e067d36c2 100644 --- a/udfs/community/cw_find_in_list.sqlx +++ b/udfs/community/cw_find_in_list.sqlx @@ -17,6 +17,9 @@ config { hasOutput: true } /* Similar to Hive "find_in_set" */ CREATE OR REPLACE FUNCTION ${self()}(needle STRING, list STRING) RETURNS INT64 +OPTIONS ( + description="Similar to Hive find_in_set" +) AS ( CASE WHEN needle IS NULL OR list IS NULL THEN NULL ELSE COALESCE((SELECT o + 1 FROM unnest(split(list, ',')) straw WITH OFFSET o WHERE straw = needle LIMIT 1), 0) END diff --git a/udfs/community/cw_from_base.sqlx b/udfs/community/cw_from_base.sqlx index 6decb9ed1..62d3f1658 100644 --- a/udfs/community/cw_from_base.sqlx +++ b/udfs/community/cw_from_base.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Similar to Presto from_base function - convert string from given base to decimal */ -CREATE OR REPLACE FUNCTION ${self()}(number STRING, base INT64) RETURNS INT64 AS ( +CREATE OR REPLACE FUNCTION ${self()}(number STRING, base INT64) RETURNS INT64 +OPTIONS ( + description="Similar to Presto from_base function - convert string from given base to decimal" +) +AS ( (WITH chars AS ( SELECT IF(ch >= 48 AND ch <= 57, ch - 48, IF(ch >= 65 AND ch <= 90, ch - 65 + 10, ch - 97 + 10)) pos, offset + 1 AS idx FROM UNNEST(TO_CODE_POINTS(number)) AS ch WITH OFFSET diff --git a/udfs/community/cw_json_array_contains_bool.sqlx b/udfs/community/cw_json_array_contains_bool.sqlx index 1233d994a..29c0197e2 100644 --- a/udfs/community/cw_json_array_contains_bool.sqlx +++ b/udfs/community/cw_json_array_contains_bool.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Same as cw_json_array_contains_str(STRING, STRING) UDF but with needle = boolean */ CREATE OR REPLACE FUNCTION ${self()}(json STRING, needle BOOL) RETURNS BOOL -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Same as cw_json_array_contains_str(STRING, STRING) UDF but with needle = boolean" +) +AS """ if (json == null || needle == null) return null; var parsedJson = JSON.parse(json); diff --git a/udfs/community/cw_json_array_contains_num.sqlx b/udfs/community/cw_json_array_contains_num.sqlx index bd845a4ab..8129d2ea1 100644 --- a/udfs/community/cw_json_array_contains_num.sqlx +++ b/udfs/community/cw_json_array_contains_num.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Same as cw_json_array_contains_str(STRING, STRING) UDF but with needle = number */ CREATE OR REPLACE FUNCTION ${self()}(json STRING, needle FLOAT64) RETURNS BOOL -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Same as cw_json_array_contains_str(STRING, STRING) UDF but with needle = number" +) +AS """ if (json == null || needle == null) return null; var parsedJson = JSON.parse(json); diff --git a/udfs/community/cw_json_array_contains_str.sqlx b/udfs/community/cw_json_array_contains_str.sqlx index 5bbda548b..243b959c6 100644 --- a/udfs/community/cw_json_array_contains_str.sqlx +++ b/udfs/community/cw_json_array_contains_str.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Determine if value exists in json (a string containing a JSON array). */ CREATE OR REPLACE FUNCTION ${self()}(json STRING, needle STRING) RETURNS BOOL -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Determine if value exists in json (a string containing a JSON array)." +) +AS """ if (json == null || needle == null) return null; var parsedJson = JSON.parse(json); diff --git a/udfs/community/cw_json_array_get.sqlx b/udfs/community/cw_json_array_get.sqlx index cb3ebfa7f..0d0681dd6 100644 --- a/udfs/community/cw_json_array_get.sqlx +++ b/udfs/community/cw_json_array_get.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Returns the element at the specified index into the json_array. The index is zero-based */ CREATE OR REPLACE FUNCTION ${self()}(json STRING, loc FLOAT64) RETURNS STRING -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Returns the element at the specified index into the json_array. The index is zero-based" +) +AS """ if (json == null || loc == null) return null; var parsedJson = JSON.parse(json); diff --git a/udfs/community/cw_json_array_length.sqlx b/udfs/community/cw_json_array_length.sqlx index cc7ae1949..7575f786e 100644 --- a/udfs/community/cw_json_array_length.sqlx +++ b/udfs/community/cw_json_array_length.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Returns the array length of json (a string containing a JSON array) */ CREATE OR REPLACE FUNCTION ${self()}(json STRING) RETURNS INT64 -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Returns the array length of json (a string containing a JSON array)" +) +AS """ if (json == null) return null; var parsedJson = JSON.parse(json); diff --git a/udfs/community/cw_lower_case_ascii_only.sqlx b/udfs/community/cw_lower_case_ascii_only.sqlx index f71378687..90992bfbe 100644 --- a/udfs/community/cw_lower_case_ascii_only.sqlx +++ b/udfs/community/cw_lower_case_ascii_only.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /** Similar to Vertica LOWERB function, which lowercases only ASCII characters within a given string. */ -CREATE OR REPLACE FUNCTION ${self()}(str STRING) RETURNS STRING AS ( +CREATE OR REPLACE FUNCTION ${self()}(str STRING) RETURNS STRING +OPTIONS ( + description="Similar to Vertica LOWERB function, which lowercases only ASCII characters within a given string." +) +AS ( (WITH chars AS ( SELECT ch FROM UNNEST(TO_CODE_POINTS(str)) AS ch ) diff --git a/udfs/community/cw_map_create.sqlx b/udfs/community/cw_map_create.sqlx index b2c2ea607..b63a6595b 100644 --- a/udfs/community/cw_map_create.sqlx +++ b/udfs/community/cw_map_create.sqlx @@ -20,7 +20,11 @@ config { hasOutput: true } matched from each array. Number of elements in each array should be equal otherwise remaining values will be ignored. */ -CREATE OR REPLACE FUNCTION ${self()}(keys ANY TYPE, vals ANY TYPE) AS ( +CREATE OR REPLACE FUNCTION ${self()}(keys ANY TYPE, vals ANY TYPE) +OPTIONS ( + description="Given an array of keys and values, creates an array of struct containing matched from each array. Number of elements in each array should be equal otherwise remaining values will be ignored." +) +AS ( (SELECT ARRAY( SELECT AS STRUCT keys[SAFE_OFFSET(index)] AS key, vals[SAFE_OFFSET(index)] AS value diff --git a/udfs/community/cw_map_get.sqlx b/udfs/community/cw_map_get.sqlx index e0e1ac69a..365b64c4c 100644 --- a/udfs/community/cw_map_get.sqlx +++ b/udfs/community/cw_map_get.sqlx @@ -19,6 +19,10 @@ config { hasOutput: true } Given an array of struct and needle, searches an array to find struct whose key-field matches needle, then it returns the value-field in the given struct. */ -CREATE OR REPLACE FUNCTION ${self()}(maparray ANY TYPE, inkey ANY TYPE) AS ( +CREATE OR REPLACE FUNCTION ${self()}(maparray ANY TYPE, inkey ANY TYPE) +OPTIONS ( + description="Given an array of struct and needle, searches an array to find struct whose key-field matches needle, then it returns the value-field in the given struct." +) +AS ( (SELECT ARRAY_AGG(kv.value)[SAFE_OFFSET(0)] FROM UNNEST(maparray) AS kv WHERE kv.key = inkey) ); diff --git a/udfs/community/cw_map_parse.sqlx b/udfs/community/cw_map_parse.sqlx index 8b985568f..e188626e1 100644 --- a/udfs/community/cw_map_parse.sqlx +++ b/udfs/community/cw_map_parse.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Similar to hive "str_to_map" */ CREATE OR REPLACE FUNCTION ${self()}(m string, pd string, kvd string) - RETURNS ARRAY> AS ( + RETURNS ARRAY> + OPTIONS ( + description="Similar to hive str_to_map" + ) + AS ( ARRAY(SELECT AS STRUCT kv[SAFE_OFFSET(0)] AS key, kv[SAFE_OFFSET(1)] AS value FROM (SELECT SPLIT(kv, kvd) AS kv FROM UNNEST(SPLIT(m, pd)) AS kv) r )); diff --git a/udfs/community/cw_next_day.sqlx b/udfs/community/cw_next_day.sqlx index d5a14ad8d..d86a3de82 100644 --- a/udfs/community/cw_next_day.sqlx +++ b/udfs/community/cw_next_day.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Returns the date of the first weekday (second arugment) that is later than the date specified by the first argument */ -CREATE OR REPLACE FUNCTION ${self()}(date_value DATE, day_name STRING) RETURNS DATE AS ( +CREATE OR REPLACE FUNCTION ${self()}(date_value DATE, day_name STRING) RETURNS DATE +OPTIONS ( + description="Returns the date of the first weekday (second arugment) that is later than the date specified by the first argument" +) +AS ( (WITH t AS (SELECT CASE lower(substr(day_name, 1, 2)) WHEN 'su' THEN 1 diff --git a/udfs/community/cw_regexp_extract.sqlx b/udfs/community/cw_regexp_extract.sqlx index 46bc0f684..f26b3f05e 100644 --- a/udfs/community/cw_regexp_extract.sqlx +++ b/udfs/community/cw_regexp_extract.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Returns the first substring matched by the regular expression `regexp` in `str`. */ CREATE OR REPLACE FUNCTION ${self()}(str STRING, regexp STRING) RETURNS STRING -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Returns the first substring matched by the regular expression `regexp` in `str`." +) +AS """ var r = new RegExp(regexp); var a = str.match(r); return a[0]; diff --git a/udfs/community/cw_regexp_extract_all.sqlx b/udfs/community/cw_regexp_extract_all.sqlx index eaf7f6284..68f7c153e 100644 --- a/udfs/community/cw_regexp_extract_all.sqlx +++ b/udfs/community/cw_regexp_extract_all.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Returns the substring(s) matched by the regular expression `regexp` in `str`. */ CREATE OR REPLACE FUNCTION ${self()}(str STRING, regexp STRING) RETURNS ARRAY -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Returns the substring(s) matched by the regular expression `regexp` in `str`." +) +AS """ var r = new RegExp(regexp, "g"); return str.match(r); """; diff --git a/udfs/community/cw_regexp_extract_all_n.sqlx b/udfs/community/cw_regexp_extract_all_n.sqlx index 2fb7f43af..ec328a998 100644 --- a/udfs/community/cw_regexp_extract_all_n.sqlx +++ b/udfs/community/cw_regexp_extract_all_n.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Finds all occurrences of the regular expression `regexp` in `str` and returns the capturing group number `groupn`. */ CREATE OR REPLACE FUNCTION ${self()}(str STRING, regexp STRING, groupn INT64) RETURNS ARRAY -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Finds all occurrences of the regular expression `regexp` in `str` and returns the capturing group number `groupn`." +) +AS """ var r = new RegExp(regexp, 'g'); var o = []; while ((a = r.exec(str)) !== null) { diff --git a/udfs/community/cw_regexp_extract_n.sqlx b/udfs/community/cw_regexp_extract_n.sqlx index 3d66611c0..3c285da63 100644 --- a/udfs/community/cw_regexp_extract_n.sqlx +++ b/udfs/community/cw_regexp_extract_n.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Finds the first occurrence of the regular expression `regexp` in `str` and returns the capturing group number `groupn` */ CREATE OR REPLACE FUNCTION ${self()}(str STRING, regexp STRING, groupn INT64) RETURNS STRING -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Finds the first occurrence of the regular expression `regexp` in `str` and returns the capturing group number `groupn`" +) +AS """ var r = new RegExp(regexp); var a = str.match(r); if (!a) return null; diff --git a/udfs/community/cw_runtime_parse_interval_seconds.sqlx b/udfs/community/cw_runtime_parse_interval_seconds.sqlx index c22b08791..56bd79f09 100644 --- a/udfs/community/cw_runtime_parse_interval_seconds.sqlx +++ b/udfs/community/cw_runtime_parse_interval_seconds.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Kludge for interval translation - for now day->sec only! */ -CREATE OR REPLACE FUNCTION ${self()}(ival STRING) RETURNS INT64 AS ( +CREATE OR REPLACE FUNCTION ${self()}(ival STRING) RETURNS INT64 +OPTIONS ( + description="Kludge for interval translation - for now day->sec only!" +) +AS ( CASE WHEN ival IS NULL THEN NULL WHEN ARRAY_LENGTH(SPLIT(ival,' ')) <> 2 THEN NULL WHEN SPLIT(ival,' ')[OFFSET(1)] NOT IN ('day','DAY') THEN NULL diff --git a/udfs/community/cw_stringify_interval.sqlx b/udfs/community/cw_stringify_interval.sqlx index 84e08748f..a249eefac 100644 --- a/udfs/community/cw_stringify_interval.sqlx +++ b/udfs/community/cw_stringify_interval.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Formats the interval as 'day hour:minute:second */ -CREATE OR REPLACE FUNCTION ${self()}(x INT64) RETURNS STRING AS +CREATE OR REPLACE FUNCTION ${self()}(x INT64) RETURNS STRING +OPTIONS ( + description="Formats the interval as 'day hour:minute:second" +) +AS ( concat( CASE WHEN x >= 0 THEN '+' ELSE '-' END, diff --git a/udfs/community/cw_substrb.sqlx b/udfs/community/cw_substrb.sqlx index 7efb6654c..685d00139 100644 --- a/udfs/community/cw_substrb.sqlx +++ b/udfs/community/cw_substrb.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /** Similar to Vertica SUBSTRB function, which treats the multibyte character string as a string of octets (bytes). */ -CREATE OR REPLACE FUNCTION ${self()}(str STRING, startpos INT64 /* 1-based */, extent INT64 /* 1-based */) RETURNS STRING AS ( +CREATE OR REPLACE FUNCTION ${self()}(str STRING, startpos INT64 /* 1-based */, extent INT64 /* 1-based */) RETURNS STRING +OPTIONS ( + description="Similar to Vertica SUBSTRB function, which treats the multibyte character string as a string of octets (bytes)." +) +AS ( (WITH octets AS ( SELECT oct FROM UNNEST(TO_CODE_POINTS(CAST(str as bytes))) AS oct WITH OFFSET off WHERE (off+1) >= startpos and (off+1) < startpos+extent ORDER BY off ) diff --git a/udfs/community/cw_substring_index.sqlx b/udfs/community/cw_substring_index.sqlx index f333a7b50..92a15bece 100644 --- a/udfs/community/cw_substring_index.sqlx +++ b/udfs/community/cw_substring_index.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Similar to MySQL SUBSTRING_INDEX */ CREATE OR REPLACE FUNCTION ${self()}(str STRING, sep STRING, idx INT64) RETURNS STRING -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Similar to MySQL SUBSTRING_INDEX" +) +AS """ if (str === null || sep === null || idx === null) return null; if (sep == "") return ""; var arr = str.split(sep); diff --git a/udfs/community/cw_td_nvp.sqlx b/udfs/community/cw_td_nvp.sqlx index 63460d432..19ec54523 100644 --- a/udfs/community/cw_td_nvp.sqlx +++ b/udfs/community/cw_td_nvp.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Similar to teradata NVP function - extract a value from a key-value separated string */ -CREATE OR REPLACE FUNCTION ${self()}(haystack STRING, needle STRING, pairsep STRING, valuesep STRING, occurence INT64) RETURNS STRING AS ( +CREATE OR REPLACE FUNCTION ${self()}(haystack STRING, needle STRING, pairsep STRING, valuesep STRING, occurence INT64) RETURNS STRING +OPTIONS ( + description="Similar to teradata NVP function - extract a value from a key-value separated string" +) +AS ( NULLIF(ARRAY(SELECT ARRAY_TO_STRING((select ARRAY_AGG(v) from UNNEST(kv) v WITH OFFSET o WHERE o > 0), valuesep) FROM ( SELECT SPLIT(pairs, valuesep) AS kv, o FROM UNNEST(SPLIT(haystack, pairsep)) AS pairs WITH OFFSET o ) t WHERE kv[SAFE_OFFSET(0)] = needle ORDER BY o ASC)[SAFE_ORDINAL(occurence)], '') diff --git a/udfs/community/cw_to_base.sqlx b/udfs/community/cw_to_base.sqlx index 031d8bd9e..2e8fc5274 100644 --- a/udfs/community/cw_to_base.sqlx +++ b/udfs/community/cw_to_base.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Similar to Presto to_base function - convert decimal number to number with given base */ -CREATE OR REPLACE FUNCTION ${self()}(number INT64, base INT64) RETURNS STRING AS ( +CREATE OR REPLACE FUNCTION ${self()}(number INT64, base INT64) RETURNS STRING +OPTIONS ( + description="Similar to Presto to_base function - convert decimal number to number with given base" +) +AS ( (WITH chars AS ( SELECT MOD(CAST(FLOOR(ABS(number)/POW(base, (FLOOR(LOG(ABS(number))/LOG(base)) + 1) - idx)) AS INT64), base) ch, idx from UNNEST(GENERATE_ARRAY(1, CAST(FLOOR(LOG(ABS(number))/LOG(base)) AS INT64) + 1)) idx diff --git a/udfs/community/cw_ts_overlap_buckets.sqlx b/udfs/community/cw_ts_overlap_buckets.sqlx index d8ce633cd..d3aac3cb5 100644 --- a/udfs/community/cw_ts_overlap_buckets.sqlx +++ b/udfs/community/cw_ts_overlap_buckets.sqlx @@ -21,7 +21,11 @@ config { hasOutput: true } */ CREATE OR REPLACE FUNCTION ${self()}(includeMeets BOOLEAN, inputs ARRAY>) RETURNS array> -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Merges two periods together if they overlap and returns unique id for each merged bucket. Coalesces meeting periods as well (not just overlapping periods) if includeMeets is true." +) +AS """ var ret = [ ] var bucketNo = 1; var st = null; diff --git a/udfs/community/cw_ts_pattern_match.sqlx b/udfs/community/cw_ts_pattern_match.sqlx index f1f78e7fd..80de76f6a 100644 --- a/udfs/community/cw_ts_pattern_match.sqlx +++ b/udfs/community/cw_ts_pattern_match.sqlx @@ -19,7 +19,11 @@ config { hasOutput: true } -- in given UID, SID (user session) CREATE OR REPLACE FUNCTION ${self()}(evSeries ARRAY, regexpParts ARRAY) RETURNS ARRAY> -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="ts_pattern_match is function that returns range of matched pattern in given UID, SID (user session)" +) +AS """ var chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; var charIdx = 0; diff --git a/udfs/community/cw_url_decode.sqlx b/udfs/community/cw_url_decode.sqlx index 6867dde9f..61afd567e 100644 --- a/udfs/community/cw_url_decode.sqlx +++ b/udfs/community/cw_url_decode.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* url decode a string */ CREATE OR REPLACE FUNCTION ${self()}(path STRING) RETURNS STRING -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="URL decode a string" +) +AS """ if (path == null) return null; try { return decodeURIComponent(path); diff --git a/udfs/community/cw_url_encode.sqlx b/udfs/community/cw_url_encode.sqlx index 22e51a8d0..e5b649d7c 100644 --- a/udfs/community/cw_url_encode.sqlx +++ b/udfs/community/cw_url_encode.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* url encode a string */ CREATE OR REPLACE FUNCTION ${self()}(path STRING) RETURNS STRING -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="URL encode a string" +) +AS """ if (path == null) return null; try { return encodeURIComponent(path); diff --git a/udfs/community/cw_url_extract_authority.sqlx b/udfs/community/cw_url_extract_authority.sqlx index 8f65a2a72..b48eceee7 100644 --- a/udfs/community/cw_url_extract_authority.sqlx +++ b/udfs/community/cw_url_extract_authority.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Extract the authority from a url, returns "" (empty string) if no authority is found. */ -CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING as ( +CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING +OPTIONS ( + description="""Extract the authority from a url, returns "" (empty string) if no authority is found.""" +) +AS ( (WITH a AS ( SELECT ${ref("cw_url_extract_host")}(url) AS host, ${ref("cw_url_extract_port")}(url) AS port ) SELECT IF(a.port is null, host, concat(host, ":", port)) FROM a) diff --git a/udfs/community/cw_url_extract_file.sqlx b/udfs/community/cw_url_extract_file.sqlx index 0b33990dc..88488bd52 100644 --- a/udfs/community/cw_url_extract_file.sqlx +++ b/udfs/community/cw_url_extract_file.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Extract the file from a url, returns "" (empty string) string if no file is found. */ -CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING as ( +CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING +OPTIONS ( + description="""Extract the file from a url, returns "" (empty string) string if no file is found.""" +) +AS ( (WITH a AS ( SELECT ${ref("cw_url_extract_path")}(url) AS path, ${ref("cw_url_extract_query")}(url) AS query ) SELECT IF(a.query is null or length(a.query) = 0, path, concat(path, "?", query)) FROM a) diff --git a/udfs/community/cw_url_extract_fragment.sqlx b/udfs/community/cw_url_extract_fragment.sqlx index a1f27d7a8..384108b81 100644 --- a/udfs/community/cw_url_extract_fragment.sqlx +++ b/udfs/community/cw_url_extract_fragment.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ /* Extract the fragment from a url, returns "" (empty string) if no fragment is found. */ -CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING AS ( +CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING +OPTIONS ( + description="""Extract the fragment from a url, returns "" (empty string) if no fragment is found.""" +) +AS ( COALESCE(REGEXP_EXTRACT(url,'#(.+)'),'') ); diff --git a/udfs/community/cw_url_extract_host.sqlx b/udfs/community/cw_url_extract_host.sqlx index baff55da0..2d4cd6b6b 100644 --- a/udfs/community/cw_url_extract_host.sqlx +++ b/udfs/community/cw_url_extract_host.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ /* Extract the host from a url, return "" (empty string) if no host is found. */ -CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING AS ( +CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING +OPTIONS ( + description="""Extract the host from a url, return "" (empty string) if no host is found.""" +) +AS ( COALESCE(NET.HOST(url), '') ); diff --git a/udfs/community/cw_url_extract_parameter.sqlx b/udfs/community/cw_url_extract_parameter.sqlx index e053d1c3e..fcb01c036 100644 --- a/udfs/community/cw_url_extract_parameter.sqlx +++ b/udfs/community/cw_url_extract_parameter.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ /* Extract the value of a query param from a url, returns null if the parameter isn't found. */ -CREATE OR REPLACE FUNCTION ${self()}(url STRING, pname STRING) RETURNS STRING AS ( +CREATE OR REPLACE FUNCTION ${self()}(url STRING, pname STRING) RETURNS STRING +OPTIONS ( + description="Extract the value of a query param from a url, returns null if the parameter isn't found." +) +AS ( SPLIT(REGEXP_EXTRACT(url, CONCAT('[?&]',pname,'=([^&]+).*$')),'#')[OFFSET(0)] ); diff --git a/udfs/community/cw_url_extract_path.sqlx b/udfs/community/cw_url_extract_path.sqlx index 1b6dc4874..483cf089c 100644 --- a/udfs/community/cw_url_extract_path.sqlx +++ b/udfs/community/cw_url_extract_path.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Extract the path from a url, returns "" (empty string) if no path is found. */ CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="""Extract the path from a url, returns "" (empty string) if no path is found.""" +) +AS """ var queryPos = url.indexOf('?'); if (queryPos >= 0) url = url.slice(0,queryPos); diff --git a/udfs/community/cw_url_extract_port.sqlx b/udfs/community/cw_url_extract_port.sqlx index 4cce238e6..a16c1fe83 100644 --- a/udfs/community/cw_url_extract_port.sqlx +++ b/udfs/community/cw_url_extract_port.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } /* Extract the port from a url, returns null if no port is found. */ CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS INT64 -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="Extract the port from a url, returns null if no port is found." +) +AS """ var protPos = url.indexOf("//"); if (protPos >= 0) url = url.slice(protPos+2); diff --git a/udfs/community/cw_url_extract_protocol.sqlx b/udfs/community/cw_url_extract_protocol.sqlx index 12abc236d..34f878f55 100644 --- a/udfs/community/cw_url_extract_protocol.sqlx +++ b/udfs/community/cw_url_extract_protocol.sqlx @@ -16,7 +16,11 @@ config { hasOutput: true } */ /* Extract the protocol from a url, return "" (empty string) if no protocol is found. */ -CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING AS ( +CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING +OPTIONS ( + description="""Extract the protocol from a url, return "" (empty string) if no protocol is found.""" +) +AS ( (WITH a AS ( SELECT STRPOS(url, "://") AS v ) SELECT IF(a.v <= 0, "", SUBSTR(url,1,a.v-1)) FROM a) diff --git a/udfs/community/cw_url_extract_query.sqlx b/udfs/community/cw_url_extract_query.sqlx index 2134d0fd8..7042e9870 100644 --- a/udfs/community/cw_url_extract_query.sqlx +++ b/udfs/community/cw_url_extract_query.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ /* Extract the query from a url, returns "" (empty string) if no query is found. */ -CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING AS ( +CREATE OR REPLACE FUNCTION ${self()}(url STRING) RETURNS STRING +OPTIONS ( + description="""Extract the query from a url, returns "" (empty string) if no query is found.""" +) +AS ( COALESCE(SUBSTR(SPLIT(REGEXP_EXTRACT(url, '[^\\?]+(\\?.*)?'),'#')[OFFSET(0)],2),'') ); diff --git a/udfs/community/day_occurrence_of_month.sqlx b/udfs/community/day_occurrence_of_month.sqlx index 3a40eb43f..c84c08ab8 100644 --- a/udfs/community/day_occurrence_of_month.sqlx +++ b/udfs/community/day_occurrence_of_month.sqlx @@ -18,7 +18,13 @@ config { hasOutput: true } -- day_occurrence_of_month: Returns the n th occurrence of the weekday in the month for the specified date. -- Input: date_expression DATE or TIMESTAMP -- Output: The result is an INTEGER value between 1 and 5, representing the n th occurrence of the weekday in the month. -CREATE OR REPLACE FUNCTION ${self()}(date_expression ANY TYPE) AS +CREATE OR REPLACE FUNCTION ${self()}(date_expression ANY TYPE) +OPTIONS ( + description="""Returns the n th occurrence of the weekday in the month for the specified date. +Input: date_expression DATE or TIMESTAMP +Output: The result is an INTEGER value between 1 and 5, representing the n th occurrence of the weekday in the month.""" +) +AS ( ( SELECT diff --git a/udfs/community/find_in_set.sqlx b/udfs/community/find_in_set.sqlx index 42224932e..41e6600c8 100644 --- a/udfs/community/find_in_set.sqlx +++ b/udfs/community/find_in_set.sqlx @@ -25,6 +25,9 @@ config { hasOutput: true } -- strList: string in which to search for. -- Output: Position of str in strList CREATE OR REPLACE FUNCTION ${self()}(str STRING, strList STRING) +OPTIONS ( + description="Returns the first occurance of str in strList where strList is a comma-delimited string. Returns null if either argument is null. Returns 0 if the first argument contains any commas." +) AS ( CASE WHEN STRPOS(str, ',') > 0 THEN 0 diff --git a/udfs/community/from_binary.sqlx b/udfs/community/from_binary.sqlx index c84cbd2c1..a5697d37f 100644 --- a/udfs/community/from_binary.sqlx +++ b/udfs/community/from_binary.sqlx @@ -18,7 +18,11 @@ config { hasOutput: true } -- from_binary: -- Input: STRING representing a number in binary form -- Output: INT64 number in decimal form -CREATE OR REPLACE FUNCTION ${self()}(value STRING) AS +CREATE OR REPLACE FUNCTION ${self()}(value STRING) +OPTIONS ( + description="Returns a number in decimal form from its binary representation." +) +AS ( ( SELECT diff --git a/udfs/community/from_hex.sqlx b/udfs/community/from_hex.sqlx index c2d3fb8ef..d10dd47c4 100644 --- a/udfs/community/from_hex.sqlx +++ b/udfs/community/from_hex.sqlx @@ -18,7 +18,11 @@ config { hasOutput: true } -- from_hex: -- Input: STRING representing a number in hexadecimal form -- Output: INT64 number in decimal form -CREATE OR REPLACE FUNCTION ${self()}(value STRING) AS +CREATE OR REPLACE FUNCTION ${self()}(value STRING) +OPTIONS ( + description="Returns a number in decimal form from its hexadecimal representation." +) +AS ( ( SELECT diff --git a/udfs/community/get_array_value.sqlx b/udfs/community/get_array_value.sqlx index 4f7e6e24e..2858996c0 100644 --- a/udfs/community/get_array_value.sqlx +++ b/udfs/community/get_array_value.sqlx @@ -18,7 +18,11 @@ config { hasOutput: true } -- Given an key and a list of structs, returns the ARRAY type value. -- This can be used to get an array of values for a given key from -- an array of structs. -CREATE OR REPLACE FUNCTION ${self()}(get_key STRING, arr ANY TYPE) AS +CREATE OR REPLACE FUNCTION ${self()}(get_key STRING, arr ANY TYPE) +OPTIONS ( + description="Given an key and a list of structs, returns the ARRAY type value. This can be used to get an array of values for a given key from an array of structs." +) +AS ( ARRAY(SELECT value FROM UNNEST(arr) WHERE key = get_key) ); diff --git a/udfs/community/get_value.sqlx b/udfs/community/get_value.sqlx index 3cd7925df..522022499 100644 --- a/udfs/community/get_value.sqlx +++ b/udfs/community/get_value.sqlx @@ -17,7 +17,11 @@ config { hasOutput: true } -- Given a key and a list of key-value maps of the form [{'key': 'a', 'value': 'b'}, ...] -- returns the SCALAR type value. -CREATE OR REPLACE FUNCTION ${self()}(get_key STRING, arr ANY TYPE) AS +CREATE OR REPLACE FUNCTION ${self()}(get_key STRING, arr ANY TYPE) +OPTIONS ( + description="Given a key and a list of key-value maps of the form [{'key': 'a', 'value': 'b'}, ...] returns the SCALAR type value." +) +AS ( (SELECT value FROM UNNEST(arr) WHERE key = get_key) ); diff --git a/udfs/community/getbit.sqlx b/udfs/community/getbit.sqlx index 7204a16a7..a3c560df7 100644 --- a/udfs/community/getbit.sqlx +++ b/udfs/community/getbit.sqlx @@ -18,7 +18,13 @@ config { hasOutput: true } -- gitbit: Given an INTEGER value, returns the value of a bit at a specified position. -- Input: target_arg value, INT64 target_bit_arg position of the bit, starting at 0 -- Output: value of the bit (0 or 1) at the specified position. -CREATE OR REPLACE FUNCTION ${self()}(target_arg INT64, target_bit_arg INT64) AS +CREATE OR REPLACE FUNCTION ${self()}(target_arg INT64, target_bit_arg INT64) +OPTIONS ( + description="""Given an INTEGER value, returns the value of a bit at a specified position. +Input: target_arg value, INT64 target_bit_arg position of the bit, starting at 0 +Output: value of the bit (0 or 1) at the specified position.""" +) +AS (( SELECT (target_arg & 1 << target_bit_arg) >> target_bit_arg diff --git a/udfs/community/knots_to_mph.sqlx b/udfs/community/knots_to_mph.sqlx index c9cd5f835..6ffe5c689 100644 --- a/udfs/community/knots_to_mph.sqlx +++ b/udfs/community/knots_to_mph.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ -CREATE OR REPLACE FUNCTION ${self()}(input_knots FLOAT64) AS ( +CREATE OR REPLACE FUNCTION ${self()}(input_knots FLOAT64) +OPTIONS ( + description="Converts knots to miles per hour." +) +AS ( input_knots * 1.15078 ); diff --git a/udfs/community/mannwhitneyu.sqlx b/udfs/community/mannwhitneyu.sqlx index 3460b2165..9de9b29ef 100644 --- a/udfs/community/mannwhitneyu.sqlx +++ b/udfs/community/mannwhitneyu.sqlx @@ -20,6 +20,11 @@ config { hasOutput: true } --inputs: x,y (arrays of samples, both should be one-dimensional, type: ARRAY ) -- alt (Defines the alternative hypothesis. The following options are available: 'two-sided', 'less', and 'greater' CREATE OR REPLACE FUNCTION ${self()}(x ARRAY, y ARRAY, alt STRING) +OPTIONS ( + description="""Computes the U statistics and the p value of the Mann–Whitney U test (also called Mann–Whitney–Wilcoxon). +Inputs: x,y (arrays of samples, both should be one-dimensional, type: ARRAY ) + alt (Defines the alternative hypothesis. The following options are available: 'two-sided', 'less', and 'greater'""" +) AS ( ( WITH statistics as ( diff --git a/udfs/community/mph_to_knots.sqlx b/udfs/community/mph_to_knots.sqlx index 11966d9b6..14ec3cd47 100644 --- a/udfs/community/mph_to_knots.sqlx +++ b/udfs/community/mph_to_knots.sqlx @@ -16,6 +16,10 @@ config { hasOutput: true } */ -CREATE OR REPLACE FUNCTION ${self()}(input_mph FLOAT64) AS ( +CREATE OR REPLACE FUNCTION ${self()}(input_mph FLOAT64) +OPTIONS ( + description="Converts miles per hour to knots." +) +AS ( input_mph / 1.15078 ); diff --git a/udfs/community/multiply_full_scale.sqlx b/udfs/community/multiply_full_scale.sqlx index 289795bfe..c0affaf1e 100644 --- a/udfs/community/multiply_full_scale.sqlx +++ b/udfs/community/multiply_full_scale.sqlx @@ -22,7 +22,11 @@ config { hasOutput: true } * in a STRING high_scale_value and multiplies against a multiplier to allow for * high scale multiplication while returning BigQuery's NUMERIC default of (38,9) */ -CREATE OR REPLACE FUNCTION ${self()}(high_scale_value STRING, multiplier NUMERIC) AS ( +CREATE OR REPLACE FUNCTION ${self()}(high_scale_value STRING, multiplier NUMERIC) +OPTIONS ( + description="BigQuery supports a maximum precision / scale of (38,9) for NUMERIC. For some applications (e.g financial) it may be necessary to multiply a high scale number such as an FX Rate against another NUMERIC value. This function takes in a STRING high_scale_value and multiplies against a multiplier to allow for high scale multiplication while returning BigQuery's NUMERIC default of (38,9)" +) +AS ( CAST(SPLIT(high_scale_value, '.')[OFFSET(0)] AS NUMERIC) * multiplier + IFNULL(CAST(SPLIT(high_scale_value, '.')[SAFE_OFFSET(1)] AS NUMERIC) * multiplier / POW(NUMERIC '10', LENGTH(SPLIT(high_scale_value, '.')[SAFE_OFFSET(1)])), 0) diff --git a/udfs/community/p_fisherexact.sqlx b/udfs/community/p_fisherexact.sqlx index d310913a2..324c4bd48 100644 --- a/udfs/community/p_fisherexact.sqlx +++ b/udfs/community/p_fisherexact.sqlx @@ -20,7 +20,12 @@ config { hasOutput: true } -- PARAMETERES: a,b,c,d (values of 2x2 contingency table [[a,b];[c,d]] ) CREATE OR REPLACE FUNCTION ${self()}(a FLOAT64, b FLOAT64, c FLOAT64, d FLOAT64) RETURNS FLOAT64 -LANGUAGE js AS """ +LANGUAGE js +OPTIONS ( + description="""Computes the p value of the Fisher exact test. +Inputs: a,b,c,d (values of 2x2 contingency table [[a,b];[c,d]] )""" +) +AS """ var gl_LnF = [0.000000000000, 0.000000000000,0.693147180560,1.791759469228,3.178053830348,4.787491742782,6.579251212010,8.525161361065,10.604602902745,12.801827480081,15.104412573076,17.502307845874,19.987214495662,22.552163853123,25.191221182739,27.899271383841,30.671860106081,33.505073450137,36.395445208033,39.339884187199,42.335616460753,45.380138898477,48.471181351835,51.606675567764,54.784729398112,58.003605222981,61.261701761002,64.557538627006,67.889743137182,71.257038967168,74.658236348830,78.092223553315,81.557959456115,85.054467017582,88.580827542198,92.136175603687,95.719694542143,99.330612454787,102.968198614514,106.631760260643,110.320639714757,114.034211781462,117.771881399745,121.533081515439,125.317271149357,129.123933639127,132.952575035616,136.802722637326,140.673923648234,144.565743946345,148.477766951773,152.409592584497,156.360836303079,160.331128216631,164.320112263195,168.327445448428,172.352797139163,176.395848406997,180.456291417544,184.533828861450,188.628173423672,192.739047287845,196.866181672890,201.009316399282,205.168199482641,209.342586752537,213.532241494563,217.736934113954,221.956441819130,226.190548323728,230.439043565777,234.701723442818,238.978389561834,243.268849002983,247.572914096187,251.890402209723,256.221135550010,260.564940971863,264.921649798553,269.291097651020,273.673124285694,278.067573440366,282.474292687630,286.893133295427,291.323950094270,295.766601350761,300.220948647014,304.686856765669,309.164193580147,313.652829949879,318.152639620209,322.663499126726,327.185287703775,331.717887196929,336.261181979199,340.815058870799,345.379407062267,349.954118040770,354.539085519441,359.134205369576,363.739375555564,368.354496072405,372.979468885689,377.614197873919,382.258588773060,386.912549123218,391.575988217330,396.248817051792,400.930948278916,405.622296161145,410.322776526937,415.032306728250,419.750805599545,424.478193418257,429.214391866652,433.959323995015,438.712914186121,443.475088120919,448.245772745385,453.024896238496,457.812387981278,462.608178526875,467.412199571608,472.224383926981,477.044665492586,481.872979229888,486.709261136840,491.553448223298,496.405478487218,501.265290891579,506.132825342035,511.008022665236,515.890824587823,520.781173716044,525.679013515995,530.584288294434,535.496943180170,540.416924105998,545.344177791155,550.278651724286,555.220294146895,560.169054037273,565.124881094874,570.087725725134,575.057539024710,580.034272767131,585.017879388839,590.008311975618,595.005524249382,600.009470555328,605.020105849424,610.037385686239,615.061266207085,620.091704128478,625.128656730891,630.172081847810,635.221937855060,640.278183660408,645.340778693435,650.409682895655,655.484856710889,660.566261075874,665.653857411106,670.747607611913,675.847474039737,680.953419513638,686.065407301994,691.183401114411,696.307365093814,701.437263808737,706.573062245788,711.714725802290,716.862220279104,722.015511873601,727.174567172816,732.339353146739,737.509837141778,742.685986874351,747.867770424643,753.055156230484,758.248113081374,763.446610112640,768.650616799717,773.860102952558,779.075038710167,784.295394535246,789.521141208959,794.752249825813,799.988691788643,805.230438803703,810.477462875864,815.729736303910,820.987231675938,826.249921864843,831.517780023906,836.790779582470,842.068894241700,847.352097970438,852.640365001133,857.933669825857,863.231987192405,868.535292100465,873.843559797866,879.156765776907,884.474885770752,889.797895749890,895.125771918680,900.458490711945,905.796028791646,911.138363043611,916.485470574329,921.837328707805,927.193914982477,932.555207148186,937.921183163208,943.291821191336,948.667099599020,954.046996952560,959.431492015349,964.820563745166,970.214191291518,975.612353993036,981.015031374908,986.422203146368,991.833849198224,997.249949600428,1002.670484599700,1008.095434617182,1013.524780246136,1018.958502249690,1024.396581558614,1029.838999269135,1035.285736640802,1040.736775094367,1046.192096209725,1051.651681723869,1057.115513528895,1062.583573670030,1068.055844343702,1073.532307895633,1079.012946818975,1084.497743752466,1089.986681478623,1095.479742921963,1100.976911147256,1106.478169357801,1111.983500893733,1117.492889230361,1123.006317976526,1128.523770872991,1134.045231790853,1139.570684729985,1145.100113817497,1150.633503306224,1156.170837573243,1161.712101118401,1167.257278562881,1172.806354647776,1178.359314232698,1183.916142294397,1189.476823925413,1195.041344332735,1200.609688836497,1206.181842868674,1211.757791971821,1217.337521797807,1222.921018106589,1228.508266764989,1234.099253745500,1239.693965125102,1245.292387084100,1250.894505904980,1256.500307971276,1262.109779766461,1267.722907872849,1273.339678970516,1278.960079836233,1284.584097342420,1290.211718456111,1295.842930237932,1301.477719841101,1307.116074510435,1312.757981581373,1318.403428479016,1324.052402717178,1329.704891897446,1335.360883708266,1341.020365924026,1346.683326404162,1352.349753092274,1358.019634015255,1363.692957282426,1369.369711084694,1375.049883693712,1380.733463461050,1386.420438817390,1392.110798271714,1397.804530410517,1403.501623897022,1409.202067470413,1414.905849945069,1420.612960209818,1426.323387227193,1432.037120032702,1437.754147734109,1443.474459510716,1449.198044612669,1454.924892360256,1460.654992143229,1466.388333420127,1472.124905717606,1477.864698629786,1483.607701817595,1489.353905008135,1495.103297994044,1500.855870632869,1506.611612846456,1512.370514620334,1518.132566003114,1523.897757105899,1529.666078101692,1535.437519224822,1541.212070770367,1546.989723093589,1552.770466609382,1558.554291791712,1564.341189173078,1570.131149343976,1575.924162952360,1581.720220703125,1587.519313357586,1593.321431732963,1599.126566701879,1604.934709191860,1610.745850184837,1616.559980716662,1622.377091876625,1628.197174806977,1634.020220702460,1639.846220809841,1645.675166427451,1651.507048904734,1657.341859641797,1663.179590088963,1669.020231746336,1674.863776163368,1680.710214938425,1686.559539718372,1692.411742198147,1698.266814120349,1704.124747274832,1709.985533498298,1715.849164673896,1721.715632730830,1727.584929643963,1733.457047433439,1739.331978164291,1745.209713946070,1751.090246932471,1756.973569320959,1762.859673352410,1768.748551310742,1774.640195522568,1780.534598356833,1786.431752224470,1792.331649578052,1798.234282911453,1804.139644759508,1810.047727697677,1815.958524341718,1821.872027347356,1827.788229409963,1833.707123264236,1839.628701683880,1845.552957481295,1851.479883507265,1857.409472650655,1863.341717838103,1869.276612033723,1875.214148238805,1881.154319491525,1887.097118866652,1893.042539475259,1898.990574464439,1904.941217017027,1910.894460351315,1916.850297720780,1922.808722413809,1928.769727753433,1934.733307097051,1940.699453836175,1946.668161396160,1952.639423235951,1958.613232847820,1964.589583757118,1970.568469522019,1976.549883733273,1982.533820013961,1988.520272019245,1994.509233436135,2000.500697983243,2006.494659410550,2012.491111499169,2018.490048061115,2024.491462939077,2030.495350006183,2036.501703165785,2042.510516351228,2048.521783525632,2054.535498681675,2060.551655841373,2066.570249055869,2072.591272405219,2078.614719998180,2084.640585972005,2090.668864492236,2096.699549752497,2102.732635974296,2108.768117406820,2114.805988326742,2120.846243038020,2126.888875871703,2132.933881185739,2138.981253364785,2145.030986820017,2151.083075988941,2157.137515335211,2163.194299348439,2169.253422544021,2175.314879462949,2181.378664671636,2187.444772761740,2193.513198349984,2199.583936077986,2205.656980612087,2211.732326643176,2217.809968886525,2223.889902081621,2229.972120991997,2236.056620405072,2242.143395131985,2248.232440007431,2254.323749889509,2260.417319659554,2266.513144221987,2272.611218504153,2278.711537456173,2284.814096050787,2290.918889283202,2297.025912170944,2303.135159753709,2309.246627093211,2315.360309273044,2321.476201398527,2327.594298596568,2333.714596015519,2339.837088825033,2345.961772215927,2352.088641400042,2358.217691610102,2364.348918099585,2370.482316142582,2376.617881033664,2382.755608087750,2388.895492639976,2395.037530045563,2401.181715679689,2407.328044937358,2413.476513233276,2419.627116001722,2425.779848696426,2431.934706790443,2438.091685776028,2444.250781164521,2450.411988486216,2456.575303290250,2462.740721144482,2468.908237635370,2475.077848367861,2481.249548965272,2487.423335069174,2493.599202339280,2499.777146453331,2505.957163106983,2512.139248013700,2518.323396904638,2524.509605528538,2530.697869651621,2536.888185057474,2543.080547546949,2549.274952938054,2555.471397065849,2561.669875782341,2567.870384956384,2574.072920473571,2580.277478236140,2586.484054162865,2592.692644188961,2598.903244265986,2605.115850361738,2611.330458460160]; var A_LNF_SIZE = 500; diff --git a/udfs/community/pi.sqlx b/udfs/community/pi.sqlx index 5e06a7bc8..afed641f8 100644 --- a/udfs/community/pi.sqlx +++ b/udfs/community/pi.sqlx @@ -18,6 +18,10 @@ config { hasOutput: true } /* Returns the value of pi. */ -CREATE OR REPLACE FUNCTION ${self()}() AS ( +CREATE OR REPLACE FUNCTION ${self()}() +OPTIONS ( + description="Returns the value of pi." +) +AS ( ACOS(-1) ); diff --git a/udfs/community/random_string.sqlx b/udfs/community/random_string.sqlx index 9ffebdafb..1e5ffc420 100644 --- a/udfs/community/random_string.sqlx +++ b/udfs/community/random_string.sqlx @@ -19,7 +19,14 @@ config { hasOutput: true } -- Individual characters are chosen uniformly at random from the following pool of characters: 0-9, a-z, A-Z. -- Input: random string length INT64 -- Output: random string of specified length -CREATE OR REPLACE FUNCTION ${self()}(length INT64) AS +CREATE OR REPLACE FUNCTION ${self()}(length INT64) +OPTIONS ( + description="""Returns a random string of specified length. +Individual characters are chosen uniformly at random from the following pool of characters: 0-9, a-z, A-Z. +Input: random string length INT64 +Output: random string of specified length""" +) +AS (( SELECT STRING_AGG([ diff --git a/udfs/community/random_value.sqlx b/udfs/community/random_value.sqlx index c5e61d0ca..ac130bea1 100644 --- a/udfs/community/random_value.sqlx +++ b/udfs/community/random_value.sqlx @@ -27,6 +27,9 @@ config { hasOutput: true } * 'tino', 'julie', 'jordan' */ CREATE OR REPLACE FUNCTION ${self()}(arr ANY TYPE) +OPTIONS ( + description="Returns a random value from an array." +) AS ( ( diff --git a/udfs/community/string_to_struct.sqlx b/udfs/community/string_to_struct.sqlx index 0984c1af0..0f9499f4b 100644 --- a/udfs/community/string_to_struct.sqlx +++ b/udfs/community/string_to_struct.sqlx @@ -23,6 +23,14 @@ config { hasOutput: true } -- kv_delimiter: string that has the delimiter between key and value e.g. ':' -- Output: struct for the above map. CREATE OR REPLACE FUNCTION ${self()}(strList STRING, entry_delimiter STRING, kv_delimiter STRING) +OPTIONS ( + description="""Helper function for parsing key-value data from a string to struct. +Inputs: +strList: string that has map in the format 'a:b,c:d....' +entry_delimiter: string that has the delimiter e.g. ',' +kv_delimiter: string that has the delimiter between key and value e.g. ':' +Output: struct for the above map.""" +) AS ( CASE WHEN REGEXP_CONTAINS(strList, entry_delimiter) OR REGEXP_CONTAINS(strList, kv_delimiter) THEN diff --git a/udfs/community/to_binary.sqlx b/udfs/community/to_binary.sqlx index 154160181..f9a297ad1 100644 --- a/udfs/community/to_binary.sqlx +++ b/udfs/community/to_binary.sqlx @@ -18,7 +18,11 @@ config { hasOutput: true } -- to_binary: -- Input: INT64 number -- Output: STRING representing the input number in binary form -CREATE OR REPLACE FUNCTION ${self()}(x INT64) AS +CREATE OR REPLACE FUNCTION ${self()}(x INT64) +OPTIONS ( + description="Returns a binary representation of a number." +) +AS ( ( SELECT diff --git a/udfs/community/to_hex.sqlx b/udfs/community/to_hex.sqlx index a53991d0f..77ec5ed0f 100644 --- a/udfs/community/to_hex.sqlx +++ b/udfs/community/to_hex.sqlx @@ -18,7 +18,11 @@ config { hasOutput: true } -- to_hex: -- Input: INT64 number -- Output: STRING representing the input number in hexadecimal form -CREATE OR REPLACE FUNCTION ${self()}(x INT64) AS +CREATE OR REPLACE FUNCTION ${self()}(x INT64) +OPTIONS ( + description="Returns a hexadecimal representation of a number." +) +AS ( ( SELECT diff --git a/udfs/community/ts_gen_keyed_timestamps.sqlx b/udfs/community/ts_gen_keyed_timestamps.sqlx index 37611a15a..5e9b1daa9 100644 --- a/udfs/community/ts_gen_keyed_timestamps.sqlx +++ b/udfs/community/ts_gen_keyed_timestamps.sqlx @@ -31,7 +31,23 @@ config { hasOutput: true } -- max_ts: the inclusive upper bound for the generated timestamps, normalized by the tumble_seconds -- Output: An array of generated timestamps for each key - ARRAY> CREATE OR REPLACE FUNCTION ${self()}(keys ARRAY, tumble_seconds INT64, min_ts TIMESTAMP, max_ts Timestamp) -RETURNS ARRAY> AS (( +RETURNS ARRAY> +OPTIONS ( + description="""Generate an array of key-timestamp structs with the specified min, max and interval timeseries +Example Usage: +SELECT * +FROM UNNEST(bqutil.fn.ts_gen_keyed_timestamp(['abc'], 900, '2020-01-01', '2020-01-02') a +LEFT JOIN dataset.table ON a.series_key = a.key AND a.tumble_val = b.timestamp + +Inputs: +keys: strings that are cross joined with the generated timestamps +tumble_seconds: the windowing interval for each generated timestamp +min_ts: the inclusive lower bound for the generated timestamps, normalized by the tumble_seconds +max_ts: the inclusive upper bound for the generated timestamps, normalized by the tumble_seconds + +Output: An array of generated timestamps for each key - ARRAY>""" +) +AS (( SELECT ARRAY_AGG(x) FROM ( SELECT series_key, tumble_val diff --git a/udfs/community/url_parse.sqlx b/udfs/community/url_parse.sqlx index d834baf25..5cf0828e8 100644 --- a/udfs/community/url_parse.sqlx +++ b/udfs/community/url_parse.sqlx @@ -19,6 +19,11 @@ config { hasOutput: true } --Returns the specified part from the URL. Valid values for partToExtract include PROTOCOL, HOST, PATH, QUERY, and REF. --For example, parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST') returns 'facebook.com'. CREATE OR REPLACE FUNCTION ${self()}(url STRING, part STRING) +OPTIONS ( + description="""Returns the specified part from the URL. Valid values for partToExtract include PROTOCOL, HOST, PATH, QUERY, and REF. +Returns the specified part from the URL. Valid values for partToExtract include PROTOCOL, HOST, PATH, QUERY, and REF. +For example, parse_url('http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1', 'HOST') returns 'facebook.com'.""" +) AS ( CASE WHEN UPPER(part) = 'HOST' THEN REGEXP_EXTRACT(url, r'(?:[a-zA-Z]+://)?([a-zA-Z0-9-.]+)/?') diff --git a/udfs/community/week_of_month.sqlx b/udfs/community/week_of_month.sqlx index 1efa8e0fc..a5837ee75 100644 --- a/udfs/community/week_of_month.sqlx +++ b/udfs/community/week_of_month.sqlx @@ -18,7 +18,13 @@ config { hasOutput: true } -- week_of_month: Returns the number of weeks from the beginning of the month to the specified date -- Input: date_expression DATE or TIMESTAMP -- Output: The result is an INTEGER value between 1 and 5, representing the nth occurrence of the week in the month. The value 0 means the partial week -CREATE OR REPLACE FUNCTION ${self()}(date_expression ANY TYPE) AS +CREATE OR REPLACE FUNCTION ${self()}(date_expression ANY TYPE) +OPTIONS ( + description="""Returns the number of weeks from the beginning of the month to the specified date. +Input: date_expression DATE or TIMESTAMP +Output: The result is an INTEGER value between 1 and 5, representing the nth occurrence of the week in the month. The value 0 means the partial week.""" +) +AS ( ( SELECT diff --git a/udfs/migration/snowflake/factorial.sqlx b/udfs/migration/snowflake/factorial.sqlx index a4d75b5d6..c2c8f26ee 100644 --- a/udfs/migration/snowflake/factorial.sqlx +++ b/udfs/migration/snowflake/factorial.sqlx @@ -21,7 +21,11 @@ config { hasOutput: true } -- Input: -- integer_expr: INT64 -- Output: NUMERIC -CREATE OR REPLACE FUNCTION ${self()}(integer_expr INT64) AS ( +CREATE OR REPLACE FUNCTION ${self()}(integer_expr INT64) +OPTIONS ( + description="Computes the factorial of its input. The input argument must be an integer expression in the range of 0 to 27." +) +AS ( ( SELECT ARRAY[