Skip to content

Commit 65006b2

Browse files
efredineEric Fredine
andauthored
Adds support for Dictionary statistics from parquet data pages. (apache#11195)
Co-authored-by: Eric Fredine <[email protected]>
1 parent e40c8a8 commit 65006b2

File tree

2 files changed

+4
-1
lines changed

2 files changed

+4
-1
lines changed

datafusion/core/src/datasource/physical_plan/parquet/statistics.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,9 @@ macro_rules! get_data_page_statistics {
846846
})
847847
}).flatten().collect::<Vec<_>>(),
848848
))),
849+
Some(DataType::Dictionary(_, value_type)) => {
850+
[<$stat_type_prefix:lower _ page_statistics>](Some(value_type), $iterator)
851+
},
849852
Some(DataType::Timestamp(unit, timezone)) => {
850853
let iter = [<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten();
851854
Ok(match unit {

datafusion/core/tests/parquet/arrow_statistics.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1752,7 +1752,7 @@ async fn test_dictionary() {
17521752
expected_null_counts: UInt64Array::from(vec![1, 0]),
17531753
expected_row_counts: Some(UInt64Array::from(vec![5, 2])),
17541754
column_name: "string_dict_i32",
1755-
check: Check::RowGroup,
1755+
check: Check::Both,
17561756
}
17571757
.run();
17581758

0 commit comments

Comments
 (0)