Skip to content

Commit a9a3557

Browse files
authored
Add cache options to ArrowReaderProperties (#589)
1 parent 7449f56 commit a9a3557

File tree

5 files changed

+151
-0
lines changed

5 files changed

+151
-0
lines changed

cpp/arrow/ArrowReaderProperties.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,4 +96,46 @@ extern "C"
9696
{
9797
TRYCATCH(properties->set_arrow_extensions_enabled(extensions_enabled);)
9898
}
99+
100+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_GetCacheOptions_HoleSizeLimit(const ArrowReaderProperties* properties, int64_t* value)
101+
{
102+
TRYCATCH(
103+
const auto& opts = properties->cache_options();
104+
*value = opts.hole_size_limit;
105+
)
106+
}
107+
108+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_GetCacheOptions_RangeSizeLimit(const ArrowReaderProperties* properties, int64_t* value)
109+
{
110+
TRYCATCH(
111+
const auto& opts = properties->cache_options();
112+
*value = opts.range_size_limit;
113+
)
114+
}
115+
116+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_GetCacheOptions_Lazy(const ArrowReaderProperties* properties, bool* value)
117+
{
118+
TRYCATCH(
119+
const auto& opts = properties->cache_options();
120+
*value = opts.lazy;
121+
)
122+
}
123+
124+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_GetCacheOptions_PrefetchLimit(const ArrowReaderProperties* properties, int64_t* value)
125+
{
126+
TRYCATCH(
127+
const auto& opts = properties->cache_options();
128+
*value = opts.prefetch_limit;
129+
)
130+
}
131+
132+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_SetCacheOptions(ArrowReaderProperties* properties, int64_t hole_size_limit, int64_t range_size_limit, bool lazy, int64_t prefetch_limit)
133+
{
134+
::arrow::io::CacheOptions cache_options;
135+
cache_options.hole_size_limit = hole_size_limit;
136+
cache_options.range_size_limit = range_size_limit;
137+
cache_options.lazy = lazy;
138+
cache_options.prefetch_limit = prefetch_limit;
139+
TRYCATCH(properties->set_cache_options(cache_options);)
140+
}
99141
}

csharp.test/Arrow/TestArrowReaderProperties.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ public void TestDefaultProperties()
1919
Assert.That(properties.BinaryType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.Binary));
2020
Assert.That(properties.ListType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.List));
2121
Assert.That(properties.ArrowExtensionEnabled, Is.False);
22+
Assert.That(properties.CacheOptions.hole_size_limit, Is.EqualTo(8192));
23+
Assert.That(properties.CacheOptions.range_size_limit, Is.EqualTo(32 * 1024 * 1024));
24+
Assert.That(properties.CacheOptions.lazy, Is.True);
25+
Assert.That(properties.CacheOptions.prefetch_limit, Is.EqualTo(0));
2226
}
2327

2428
[Test]
@@ -34,6 +38,7 @@ public void TestSetProperties()
3438
properties.BinaryType = Apache.Arrow.Types.ArrowTypeId.LargeBinary;
3539
properties.ListType = Apache.Arrow.Types.ArrowTypeId.LargeList;
3640
properties.ArrowExtensionEnabled = true;
41+
properties.CacheOptions = new CacheOptions(hole_size_limit: 1024, range_size_limit: 2048, lazy: false, prefetch_limit: 4096);
3742

3843
Assert.That(properties.UseThreads, Is.True);
3944
Assert.That(properties.BatchSize, Is.EqualTo(789));
@@ -43,6 +48,10 @@ public void TestSetProperties()
4348
Assert.That(properties.BinaryType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.LargeBinary));
4449
Assert.That(properties.ListType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.LargeList));
4550
Assert.That(properties.ArrowExtensionEnabled, Is.True);
51+
Assert.That(properties.CacheOptions.hole_size_limit, Is.EqualTo(1024));
52+
Assert.That(properties.CacheOptions.range_size_limit, Is.EqualTo(2048));
53+
Assert.That(properties.CacheOptions.lazy, Is.False);
54+
Assert.That(properties.CacheOptions.prefetch_limit, Is.EqualTo(4096));
4655
}
4756
}
4857
}

csharp/Arrow/ArrowReaderProperties.cs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,37 @@ public bool ArrowExtensionEnabled
171171
}
172172
}
173173

174+
/// <summary>
175+
/// The options for read coalescing.
176+
/// This can be used to tune the
177+
/// implementation for characteristics of different filesystems.
178+
/// </summary>
179+
public CacheOptions CacheOptions
180+
{
181+
get
182+
{
183+
ExceptionInfo.Check(ArrowReaderProperties_GetCacheOptions_HoleSizeLimit(Handle.IntPtr, out long holeSizeLimit));
184+
ExceptionInfo.Check(ArrowReaderProperties_GetCacheOptions_RangeSizeLimit(Handle.IntPtr, out long rangeSizeLimit));
185+
ExceptionInfo.Check(ArrowReaderProperties_GetCacheOptions_Lazy(Handle.IntPtr, out bool lazy));
186+
ExceptionInfo.Check(ArrowReaderProperties_GetCacheOptions_PrefetchLimit(Handle.IntPtr, out long prefetchLimit));
187+
GC.KeepAlive(Handle);
188+
189+
return new CacheOptions(holeSizeLimit, rangeSizeLimit, lazy, prefetchLimit);
190+
}
191+
192+
set
193+
{
194+
ExceptionInfo.Check(ArrowReaderProperties_SetCacheOptions(
195+
Handle.IntPtr,
196+
value.hole_size_limit,
197+
value.range_size_limit,
198+
value.lazy,
199+
value.prefetch_limit));
200+
201+
GC.KeepAlive(Handle);
202+
}
203+
}
204+
174205
[DllImport(ParquetDll.Name)]
175206
private static extern IntPtr ArrowReaderProperties_GetDefault(out IntPtr readerProperties);
176207

@@ -225,6 +256,21 @@ public bool ArrowExtensionEnabled
225256
[DllImport(ParquetDll.Name)]
226257
private static extern IntPtr ArrowReaderProperties_SetArrowExtensionEnabled(IntPtr readerProperties, bool extensionsEnabled);
227258

259+
[DllImport(ParquetDll.Name)]
260+
private static extern IntPtr ArrowReaderProperties_GetCacheOptions_HoleSizeLimit(IntPtr readerProperties, out long holeSizeLimit);
261+
262+
[DllImport(ParquetDll.Name)]
263+
private static extern IntPtr ArrowReaderProperties_GetCacheOptions_RangeSizeLimit(IntPtr readerProperties, out long rangeSizeLimit);
264+
265+
[DllImport(ParquetDll.Name)]
266+
private static extern IntPtr ArrowReaderProperties_GetCacheOptions_Lazy(IntPtr readerProperties, out bool lazy);
267+
268+
[DllImport(ParquetDll.Name)]
269+
private static extern IntPtr ArrowReaderProperties_GetCacheOptions_PrefetchLimit(IntPtr readerProperties, out long prefetchLimit);
270+
271+
[DllImport(ParquetDll.Name)]
272+
private static extern IntPtr ArrowReaderProperties_SetCacheOptions(IntPtr readerProperties, long holeSizeLimit, long rangeSizeLimit, bool lazy, long prefetchLimit);
273+
228274
internal readonly ParquetHandle Handle;
229275
}
230276
}

csharp/CacheOption.cs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
using System;
2+
using System.Runtime.InteropServices;
3+
4+
namespace ParquetSharp
5+
{
6+
public struct CacheOptions
7+
{
8+
public CacheOptions(long hole_size_limit, long range_size_limit, bool lazy, long prefetch_limit = 0)
9+
{
10+
this.hole_size_limit = hole_size_limit;
11+
this.range_size_limit = range_size_limit;
12+
this.lazy = lazy;
13+
this.prefetch_limit = prefetch_limit;
14+
}
15+
16+
/// <summary>
17+
/// The maximum distance in bytes between two consecutive
18+
/// ranges; beyond this value, ranges are not combined
19+
/// </summary>
20+
public long hole_size_limit;
21+
22+
/// <summary>
23+
/// The maximum size in bytes of a combined range; if
24+
/// combining two consecutive ranges would produce a range of a
25+
/// size greater than this, they are not combined
26+
/// </summary>
27+
public long range_size_limit;
28+
29+
/// <summary>
30+
/// A lazy cache does not perform any I/O until requested.
31+
/// lazy = false: request all byte ranges when PreBuffer or WillNeed is called.
32+
/// lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader
33+
/// needs them.
34+
/// lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the
35+
/// range that is currently being read.
36+
/// </summary>
37+
public bool lazy;
38+
39+
/// <summary>
40+
/// The maximum number of ranges to be prefetched. This is only used
41+
/// for lazy cache to asynchronously read some ranges after reading the target range.
42+
/// </summary>
43+
public long prefetch_limit;
44+
}
45+
}

csharp/PublicAPI.Unshipped.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,15 @@ ParquetSharp.Arrow.ArrowReaderProperties.BinaryType.get -> Apache.Arrow.Types.Ar
55
ParquetSharp.Arrow.ArrowReaderProperties.BinaryType.set -> void
66
ParquetSharp.Arrow.ArrowReaderProperties.ListType.get -> Apache.Arrow.Types.ArrowTypeId
77
ParquetSharp.Arrow.ArrowReaderProperties.ListType.set -> void
8+
ParquetSharp.Arrow.ArrowReaderProperties.CacheOptions.get -> ParquetSharp.CacheOptions
9+
ParquetSharp.Arrow.ArrowReaderProperties.CacheOptions.set -> void
10+
ParquetSharp.CacheOptions
11+
ParquetSharp.CacheOptions.CacheOptions() -> void
12+
ParquetSharp.CacheOptions.CacheOptions(long hole_size_limit, long range_size_limit, bool lazy, long prefetch_limit = 0) -> void
13+
ParquetSharp.CacheOptions.hole_size_limit -> long
14+
ParquetSharp.CacheOptions.range_size_limit -> long
15+
ParquetSharp.CacheOptions.lazy -> bool
16+
ParquetSharp.CacheOptions.prefetch_limit -> long
817
ParquetSharp.ReaderProperties.ThriftStringSizeLimit.get -> int
918
ParquetSharp.ReaderProperties.SetThriftStringSizeLimit(int size) -> void
1019
ParquetSharp.ReaderProperties.ThriftContainerSizeLimit.get -> int

0 commit comments

Comments
 (0)