Skip to content

Commit 63d3b86

Browse files
authored
chore: patch changes from enterprise (#25776)
- reduce parquet row group size to 100k - add cli option to disable cached parquet loader
1 parent aa9213c commit 63d3b86

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

influxdb3_clap_blocks/src/datafusion.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,20 @@ pub struct IoxQueryDatafusionConfig {
3131
)]
3232
pub max_parquet_fanout: usize,
3333

34+
/// Use a cached parquet loader when reading parquet files from object store
35+
///
36+
/// This reduces IO operations to a remote object store as parquet is typically read via
37+
/// multiple read_range requests which would each require a IO operation. This will cache the
38+
/// entire parquet file in memory and serve the read_range requests from the cached data, thus
39+
/// requiring a single IO operation.
40+
#[clap(
41+
long = "datafusion-use-cached-parquet-loader",
42+
env = "INFLUXDB3_DATAFUSION_USE_CACHED_PARQUET_LOADER",
43+
default_value = "true",
44+
action
45+
)]
46+
pub use_cached_parquet_loader: bool,
47+
3448
/// Provide custom configuration to DataFusion as a comma-separated list of key:value pairs.
3549
///
3650
/// # Example
@@ -64,6 +78,13 @@ impl IoxQueryDatafusionConfig {
6478
format!("{prefix}.max_parquet_fanout", prefix = IoxConfigExt::PREFIX),
6579
self.max_parquet_fanout.to_string(),
6680
);
81+
self.datafusion_config.insert(
82+
format!(
83+
"{prefix}.use_cached_parquet_loader",
84+
prefix = IoxConfigExt::PREFIX
85+
),
86+
self.use_cached_parquet_loader.to_string(),
87+
);
6788
self.datafusion_config
6889
}
6990
}

influxdb3_write/src/persister.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ pub struct TrackedMemoryArrowWriter<W: Write + Send> {
392392
}
393393

394394
/// Parquet row group write size
395-
pub const ROW_GROUP_WRITE_SIZE: usize = 1024 * 1024;
395+
pub const ROW_GROUP_WRITE_SIZE: usize = 100_000;
396396

397397
impl<W: Write + Send> TrackedMemoryArrowWriter<W> {
398398
/// create a new `TrackedMemoryArrowWriter<`

0 commit comments

Comments
 (0)