-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(cubesql) Implement format / col_description #8947
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3087,10 +3087,17 @@ | |
Some(as_str) => { | ||
match PgType::get_all().iter().find(|e| e.typname == as_str) { | ||
None => { | ||
return Err(DataFusionError::Execution(format!( | ||
"Unable to cast expression to Regclass: Unknown type: {}", | ||
as_str | ||
))) | ||
// If the type name contains a dot, it's a schema-qualified name | ||
// and we should return return the approprate RegClass to be converted to OID | ||
// For now, we'll return 0 so metabase can sync without failing | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add TODO here along the lines of "implement something" |
||
if as_str.contains('.') { | ||
builder.append_value(0)?; | ||
} else { | ||
return Err(DataFusionError::Execution(format!( | ||
"Unable to cast expression to Regclass: Unknown type: {}", | ||
as_str | ||
))); | ||
} | ||
} | ||
Some(ty) => { | ||
builder.append_value(ty.oid as i64)?; | ||
|
@@ -3148,6 +3155,155 @@ | |
) | ||
} | ||
|
||
// Return a NOOP for this so metabase can sync without failing | ||
// TODO: Implement this | ||
pub fn create_col_description_udf() -> ScalarUDF { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please attach links to respective parts of PostgreSQL documentation? Like https://www.postgresql.org/docs/17/functions-info.html#FUNCTIONS-INFO-COMMENT here, and https://www.postgresql.org/docs/17/functions-string.html#FUNCTIONS-STRING-FORMAT for format, maybe other important references. |
||
let fun = make_scalar_function(move |args: &[ArrayRef]| { | ||
// Ensure the output array has the same length as the input | ||
let input_length = args[0].len(); | ||
let mut builder = StringBuilder::new(input_length); | ||
|
||
for _ in 0..input_length { | ||
builder.append_null()?; | ||
} | ||
|
||
Ok(Arc::new(builder.finish()) as ArrayRef) | ||
}); | ||
|
||
let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(Arc::new(DataType::Utf8))); | ||
|
||
ScalarUDF::new( | ||
"col_description", | ||
&Signature::one_of( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the point of this |
||
vec![TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8])], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You can check against First argument would be used to filter |
||
Volatility::Immutable, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
), | ||
&return_type, | ||
&fun, | ||
) | ||
} | ||
|
||
pub fn create_format_udf() -> ScalarUDF { | ||
let fun = make_scalar_function(move |args: &[ArrayRef]| { | ||
// Ensure at least one argument is provided | ||
if args.is_empty() { | ||
return Err(DataFusionError::Execution( | ||
"format() requires at least one argument".to_string(), | ||
)); | ||
} | ||
|
||
// Ensure the first argument is a Utf8 (string) | ||
if args[0].data_type() != &DataType::Utf8 { | ||
return Err(DataFusionError::Execution( | ||
"format() first argument must be a string".to_string(), | ||
)); | ||
} | ||
|
||
let format_strings = downcast_string_arg!(&args[0], "format_str", i32); | ||
let mut builder = StringBuilder::new(format_strings.len()); | ||
|
||
for i in 0..format_strings.len() { | ||
if format_strings.is_null(i) { | ||
builder.append_null()?; | ||
continue; | ||
} | ||
|
||
let format_str = format_strings.value(i); | ||
let mut result = String::new(); | ||
let mut format_chars = format_str.chars().peekable(); | ||
let mut arg_index = 1; // Start from first argument after format string | ||
|
||
while let Some(c) = format_chars.next() { | ||
if c != '%' { | ||
result.push(c); | ||
continue; | ||
} | ||
|
||
match format_chars.next() { | ||
Some('I') => { | ||
// Handle %I - SQL identifier | ||
if arg_index >= args.len() { | ||
return Err(DataFusionError::Execution( | ||
"Not enough arguments for format string".to_string(), | ||
)); | ||
} | ||
|
||
let arg = &args[arg_index]; | ||
let value = match arg.data_type() { | ||
DataType::Utf8 => { | ||
let str_arr = downcast_string_arg!(arg, "arg", i32); | ||
if str_arr.is_null(i) { | ||
return Err(DataFusionError::Execution( | ||
"NULL values cannot be formatted as identifiers" | ||
.to_string(), | ||
)); | ||
} | ||
str_arr.value(i).to_string() | ||
} | ||
_ => { | ||
// For other types, try to convert to string | ||
let str_arr = cast(&arg, &DataType::Utf8)?; | ||
let str_arr = | ||
str_arr.as_any().downcast_ref::<StringArray>().unwrap(); | ||
if str_arr.is_null(i) { | ||
return Err(DataFusionError::Execution( | ||
"NULL values cannot be formatted as identifiers" | ||
.to_string(), | ||
)); | ||
} | ||
str_arr.value(i).to_string() | ||
} | ||
}; | ||
|
||
// Quote identifier if necessary | ||
let needs_quoting = !value | ||
.chars() | ||
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bit wrong, because identifiers |
||
|| value.is_empty(); | ||
|
||
if needs_quoting { | ||
result.push('"'); | ||
result.push_str(&value.replace('"', "\"\"")); | ||
result.push('"'); | ||
} else { | ||
result.push_str(&value); | ||
} | ||
arg_index += 1; | ||
} | ||
Some('%') => { | ||
// %% is escaped to single % | ||
result.push('%'); | ||
} | ||
Some(c) => { | ||
return Err(DataFusionError::Execution(format!( | ||
"Unsupported format specifier %{}", | ||
c | ||
))); | ||
} | ||
None => { | ||
return Err(DataFusionError::Execution( | ||
"Invalid format string - ends with %".to_string(), | ||
)); | ||
} | ||
} | ||
} | ||
|
||
builder.append_value(result)?; | ||
} | ||
|
||
Ok(Arc::new(builder.finish()) as ArrayRef) | ||
}); | ||
|
||
let return_type: ReturnTypeFunction = Arc::new(move |_| Ok(Arc::new(DataType::Utf8))); | ||
|
||
ScalarUDF::new( | ||
"format", | ||
&Signature::variadic(vec![DataType::Utf8], Volatility::Immutable), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Unfortunately, there's no proper way to represent it in Please comment this part: about casting and TODO about incorrect signature |
||
&return_type, | ||
&fun, | ||
) | ||
} | ||
|
||
pub fn create_json_build_object_udf() -> ScalarUDF { | ||
let fun = make_scalar_function(move |_args: &[ArrayRef]| { | ||
// TODO: Implement | ||
|
@@ -3769,13 +3925,6 @@ | |
rettyp = TimestampTz, | ||
vol = Volatile | ||
); | ||
register_fun_stub!( | ||
udf, | ||
"col_description", | ||
tsig = [Oid, Int32], | ||
rettyp = Utf8, | ||
vol = Stable | ||
); | ||
register_fun_stub!(udf, "convert", tsig = [Binary, Utf8, Utf8], rettyp = Binary); | ||
register_fun_stub!(udf, "convert_from", tsig = [Binary, Utf8], rettyp = Utf8); | ||
register_fun_stub!(udf, "convert_to", tsig = [Utf8, Utf8], rettyp = Binary); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
source: cubesql/src/compile/mod.rs | ||
expression: result | ||
--- | ||
+----------------------+ | ||
| formatted_identifier | | ||
+----------------------+ | ||
| column_name | | ||
+----------------------+ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
source: cubesql/src/compile/mod.rs | ||
expression: result | ||
--- | ||
+-------------------------+ | ||
| formatted_identifiers | | ||
+-------------------------+ | ||
| table_name, column_name | | ||
+-------------------------+ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
source: cubesql/src/compile/mod.rs | ||
expression: result | ||
--- | ||
+-------------------+ | ||
| quoted_identifier | | ||
+-------------------+ | ||
| "column-name" | | ||
+-------------------+ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
--- | ||
source: cubesql/src/compile/mod.rs | ||
expression: result | ||
--- | ||
+----------------+ | ||
| quoted_keyword | | ||
+----------------+ | ||
| select | | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PostgreSQL actually wraps keywords with quotes, and accepts keywords unwrapped only in assigning column alias position, but does not accept those in subquery alias or in column expression. Maybe just wrap any |
||
+----------------+ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo:
return return