Skip to content

Commit a580002

Browse files
jayzhan211findepi
authored andcommitted
Remove Expr::GetIndexedField, replace Expr::{field,index,range} with FieldAccessor, IndexAccessor, and SliceAccessor (apache#10568)
* remove expr Signed-off-by: jayzhan211 <[email protected]> * add expr extension Signed-off-by: jayzhan211 <[email protected]> * doc Signed-off-by: jayzhan211 <[email protected]> * move test that has struct Signed-off-by: jayzhan211 <[email protected]> * fmt Signed-off-by: jayzhan211 <[email protected]> * add foc and fix displayed name Signed-off-by: jayzhan211 <[email protected]> * rm test Signed-off-by: jayzhan211 <[email protected]> * rebase Signed-off-by: jayzhan211 <[email protected]> * move doc Signed-off-by: jayzhan211 <[email protected]> --------- Signed-off-by: jayzhan211 <[email protected]>
1 parent c3345a5 commit a580002

File tree

27 files changed

+365
-498
lines changed

27 files changed

+365
-498
lines changed

datafusion/core/src/datasource/listing/helpers.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool {
8484
| Expr::Exists { .. }
8585
| Expr::InSubquery(_)
8686
| Expr::ScalarSubquery(_)
87-
| Expr::GetIndexedField { .. }
8887
| Expr::GroupingSet(_)
8988
| Expr::Case { .. } => Ok(TreeNodeRecursion::Continue),
9089

datafusion/core/src/physical_planner.rs

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,7 @@ use datafusion_common::{
8383
use datafusion_expr::dml::CopyTo;
8484
use datafusion_expr::expr::{
8585
self, AggregateFunction, AggregateFunctionDefinition, Alias, Between, BinaryExpr,
86-
Cast, GetFieldAccess, GetIndexedField, GroupingSet, InList, Like, TryCast,
87-
WindowFunction,
86+
Cast, GroupingSet, InList, Like, TryCast, WindowFunction,
8887
};
8988
use datafusion_expr::expr_rewriter::unnormalize_cols;
9089
use datafusion_expr::expr_vec_fmt;
@@ -216,29 +215,6 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
216215
let expr = create_physical_name(expr, false)?;
217216
Ok(format!("{expr} IS NOT UNKNOWN"))
218217
}
219-
Expr::GetIndexedField(GetIndexedField { expr: _, field }) => {
220-
match field {
221-
GetFieldAccess::NamedStructField { name: _ } => {
222-
unreachable!(
223-
"NamedStructField should have been rewritten in OperatorToFunction"
224-
)
225-
}
226-
GetFieldAccess::ListIndex { key: _ } => {
227-
unreachable!(
228-
"ListIndex should have been rewritten in OperatorToFunction"
229-
)
230-
}
231-
GetFieldAccess::ListRange {
232-
start: _,
233-
stop: _,
234-
stride: _,
235-
} => {
236-
unreachable!(
237-
"ListRange should have been rewritten in OperatorToFunction"
238-
)
239-
}
240-
};
241-
}
242218
Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args),
243219
Expr::WindowFunction(WindowFunction {
244220
fun,

datafusion/core/tests/expr_api/mod.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ use arrow_array::{ArrayRef, RecordBatch, StringArray, StructArray};
2121
use arrow_schema::{DataType, Field};
2222
use datafusion::prelude::*;
2323
use datafusion_common::DFSchema;
24+
use datafusion_functions::core::expr_ext::FieldAccessor;
25+
use datafusion_functions_array::expr_ext::{IndexAccessor, SliceAccessor};
2426
/// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan
2527
use std::sync::{Arc, OnceLock};
2628

@@ -61,7 +63,7 @@ fn test_eq_with_coercion() {
6163
#[test]
6264
fn test_get_field() {
6365
evaluate_expr_test(
64-
get_field(col("props"), "a"),
66+
col("props").field("a"),
6567
vec![
6668
"+------------+",
6769
"| expr |",
@@ -77,7 +79,8 @@ fn test_get_field() {
7779
#[test]
7880
fn test_nested_get_field() {
7981
evaluate_expr_test(
80-
get_field(col("props"), "a")
82+
col("props")
83+
.field("a")
8184
.eq(lit("2021-02-02"))
8285
.or(col("id").eq(lit(1))),
8386
vec![
@@ -95,7 +98,7 @@ fn test_nested_get_field() {
9598
#[test]
9699
fn test_list() {
97100
evaluate_expr_test(
98-
array_element(col("list"), lit(1i64)),
101+
col("list").index(lit(1i64)),
99102
vec![
100103
"+------+", "| expr |", "+------+", "| one |", "| two |", "| five |",
101104
"+------+",
@@ -106,7 +109,7 @@ fn test_list() {
106109
#[test]
107110
fn test_list_range() {
108111
evaluate_expr_test(
109-
array_slice(col("list"), lit(1i64), lit(2i64), None),
112+
col("list").range(lit(1i64), lit(2i64)),
110113
vec![
111114
"+--------------+",
112115
"| expr |",

datafusion/core/tests/optimizer_integration.rs

Lines changed: 127 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,19 @@ use std::collections::HashMap;
2323
use std::sync::Arc;
2424

2525
use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
26+
use arrow_schema::{Fields, SchemaBuilder};
2627
use datafusion_common::config::ConfigOptions;
27-
use datafusion_common::{plan_err, Result};
28-
use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource, WindowUDF};
28+
use datafusion_common::tree_node::{TransformedResult, TreeNode};
29+
use datafusion_common::{plan_err, DFSchema, Result, ScalarValue};
30+
use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
31+
use datafusion_expr::{
32+
col, lit, AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator,
33+
ScalarUDF, TableSource, WindowUDF,
34+
};
35+
use datafusion_functions::core::expr_ext::FieldAccessor;
2936
use datafusion_optimizer::analyzer::Analyzer;
3037
use datafusion_optimizer::optimizer::Optimizer;
38+
use datafusion_optimizer::simplify_expressions::GuaranteeRewriter;
3139
use datafusion_optimizer::{OptimizerConfig, OptimizerContext};
3240
use datafusion_sql::planner::{ContextProvider, SqlToRel};
3341
use datafusion_sql::sqlparser::ast::Statement;
@@ -233,3 +241,120 @@ impl TableSource for MyTableSource {
233241
self.schema.clone()
234242
}
235243
}
244+
245+
#[test]
246+
fn test_nested_schema_nullability() {
247+
let mut builder = SchemaBuilder::new();
248+
builder.push(Field::new("foo", DataType::Int32, true));
249+
builder.push(Field::new(
250+
"parent",
251+
DataType::Struct(Fields::from(vec![Field::new(
252+
"child",
253+
DataType::Int64,
254+
false,
255+
)])),
256+
true,
257+
));
258+
let schema = builder.finish();
259+
260+
let dfschema = DFSchema::from_field_specific_qualified_schema(
261+
vec![Some("table_name".into()), None],
262+
&Arc::new(schema),
263+
)
264+
.unwrap();
265+
266+
let expr = col("parent").field("child");
267+
assert!(expr.nullable(&dfschema).unwrap());
268+
}
269+
270+
#[test]
271+
fn test_inequalities_non_null_bounded() {
272+
let guarantees = vec![
273+
// x ∈ [1, 3] (not null)
274+
(
275+
col("x"),
276+
NullableInterval::NotNull {
277+
values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(),
278+
},
279+
),
280+
// s.y ∈ [1, 3] (not null)
281+
(
282+
col("s").field("y"),
283+
NullableInterval::NotNull {
284+
values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(),
285+
},
286+
),
287+
];
288+
289+
let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
290+
291+
// (original_expr, expected_simplification)
292+
let simplified_cases = &[
293+
(col("x").lt(lit(0)), false),
294+
(col("s").field("y").lt(lit(0)), false),
295+
(col("x").lt_eq(lit(3)), true),
296+
(col("x").gt(lit(3)), false),
297+
(col("x").gt(lit(0)), true),
298+
(col("x").eq(lit(0)), false),
299+
(col("x").not_eq(lit(0)), true),
300+
(col("x").between(lit(0), lit(5)), true),
301+
(col("x").between(lit(5), lit(10)), false),
302+
(col("x").not_between(lit(0), lit(5)), false),
303+
(col("x").not_between(lit(5), lit(10)), true),
304+
(
305+
Expr::BinaryExpr(BinaryExpr {
306+
left: Box::new(col("x")),
307+
op: Operator::IsDistinctFrom,
308+
right: Box::new(lit(ScalarValue::Null)),
309+
}),
310+
true,
311+
),
312+
(
313+
Expr::BinaryExpr(BinaryExpr {
314+
left: Box::new(col("x")),
315+
op: Operator::IsDistinctFrom,
316+
right: Box::new(lit(5)),
317+
}),
318+
true,
319+
),
320+
];
321+
322+
validate_simplified_cases(&mut rewriter, simplified_cases);
323+
324+
let unchanged_cases = &[
325+
col("x").gt(lit(2)),
326+
col("x").lt_eq(lit(2)),
327+
col("x").eq(lit(2)),
328+
col("x").not_eq(lit(2)),
329+
col("x").between(lit(3), lit(5)),
330+
col("x").not_between(lit(3), lit(10)),
331+
];
332+
333+
validate_unchanged_cases(&mut rewriter, unchanged_cases);
334+
}
335+
336+
fn validate_simplified_cases<T>(rewriter: &mut GuaranteeRewriter, cases: &[(Expr, T)])
337+
where
338+
ScalarValue: From<T>,
339+
T: Clone,
340+
{
341+
for (expr, expected_value) in cases {
342+
let output = expr.clone().rewrite(rewriter).data().unwrap();
343+
let expected = lit(ScalarValue::from(expected_value.clone()));
344+
assert_eq!(
345+
output, expected,
346+
"{} simplified to {}, but expected {}",
347+
expr, output, expected
348+
);
349+
}
350+
}
351+
fn validate_unchanged_cases(rewriter: &mut GuaranteeRewriter, cases: &[Expr]) {
352+
for expr in cases {
353+
let output = expr.clone().rewrite(rewriter).data().unwrap();
354+
assert_eq!(
355+
&output, expr,
356+
"{} was simplified to {}, but expected it to be unchanged",
357+
expr, output
358+
);
359+
}
360+
}

0 commit comments

Comments
 (0)