Skip to content

Commit 43576f5

Browse files
authored
Add binary_type to ArrowReaderProperties (#569)
1 parent ab4bdc9 commit 43576f5

File tree

7 files changed

+92
-1
lines changed

7 files changed

+92
-1
lines changed

cpp/Enums.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,9 @@ namespace
102102
static_assert((int) ::arrow::StatusCode::OutOfMemory == 1);
103103
static_assert((int) ::arrow::StatusCode::IOError == 5);
104104
static_assert((int) ::arrow::StatusCode::UnknownError == 9);
105+
106+
static_assert((int) ::arrow::Type::type::BINARY == 14);
107+
static_assert((int) ::arrow::Type::type::LARGE_BINARY == 35);
108+
static_assert((int) ::arrow::Type::type::BINARY_VIEW == 40);
105109
}
106110
}

cpp/arrow/ArrowReaderProperties.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,14 @@ extern "C"
6666
{
6767
TRYCATCH(properties->set_coerce_int96_timestamp_unit(unit);)
6868
}
69+
70+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_BinaryType(ArrowReaderProperties* properties, ::arrow::Type::type* value)
71+
{
72+
TRYCATCH(*value = properties->binary_type();)
73+
}
74+
75+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_SetBinaryType(ArrowReaderProperties* properties, ::arrow::Type::type value)
76+
{
77+
TRYCATCH(properties->set_binary_type(value);)
78+
}
6979
}

csharp.test/Arrow/TestArrowReaderProperties.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,14 @@ public void TestSetProperties()
2828
properties.SetReadDictionary(0, true);
2929
properties.PreBuffer = false;
3030
properties.CoerceInt96TimestampUnit = Apache.Arrow.Types.TimeUnit.Microsecond;
31+
properties.BinaryType = Apache.Arrow.Types.ArrowTypeId.LargeBinary;
3132

3233
Assert.That(properties.UseThreads, Is.True);
3334
Assert.That(properties.BatchSize, Is.EqualTo(789));
3435
Assert.That(properties.GetReadDictionary(0), Is.True);
3536
Assert.That(properties.PreBuffer, Is.False);
3637
Assert.That(properties.CoerceInt96TimestampUnit, Is.EqualTo(Apache.Arrow.Types.TimeUnit.Microsecond));
38+
Assert.That(properties.BinaryType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.LargeBinary));
3739
}
3840
}
3941
}

csharp/Arrow/ArrowReaderProperties.cs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,34 @@ public Apache.Arrow.Types.TimeUnit CoerceInt96TimestampUnit
105105
}
106106
}
107107

108+
/// <summary>
109+
/// The Arrow binary type to read BYTE_ARRAY columns as.
110+
///
111+
/// Allowed values are ArrowTypeId.Binary, ArrowTypeId.LargeBinary and ArrowTypeId.BinaryView.
112+
/// Default is ArrowTypeId.Binary.
113+
///
114+
/// If a BYTE_ARRAY column has the STRING logical type, it is read as the
115+
/// Arrow string type corresponding to the configured binary type (for example
116+
/// Type::LARGE_STRING if the configured binary type is Type::LARGE_BINARY).
117+
///
118+
/// However, if a serialized Arrow schema is found in the Parquet metadata,
119+
/// this setting is ignored and the Arrow schema takes precedence
120+
/// </summary>
121+
public Apache.Arrow.Types.ArrowTypeId BinaryType
122+
{
123+
get
124+
{
125+
ParquetSharp.CppTypeId value = ExceptionInfo.Return<ParquetSharp.CppTypeId>(Handle, ArrowReaderProperties_BinaryType);
126+
return value.toPublicEnum();
127+
}
128+
set
129+
{
130+
ParquetSharp.CppTypeId cppValue = value.toCppEnum();
131+
ExceptionInfo.Check(ArrowReaderProperties_SetBinaryType(Handle.IntPtr, cppValue));
132+
GC.KeepAlive(Handle);
133+
}
134+
}
135+
108136
[DllImport(ParquetDll.Name)]
109137
private static extern IntPtr ArrowReaderProperties_GetDefault(out IntPtr readerProperties);
110138

@@ -141,6 +169,12 @@ public Apache.Arrow.Types.TimeUnit CoerceInt96TimestampUnit
141169
[DllImport(ParquetDll.Name)]
142170
private static extern IntPtr ArrowReaderProperties_SetCoerceInt96TimestampUnit(IntPtr readerProperties, Apache.Arrow.Types.TimeUnit unit);
143171

172+
[DllImport(ParquetDll.Name)]
173+
private static extern IntPtr ArrowReaderProperties_BinaryType(IntPtr readerProperties, out ParquetSharp.CppTypeId value);
174+
175+
[DllImport(ParquetDll.Name)]
176+
private static extern IntPtr ArrowReaderProperties_SetBinaryType(IntPtr readerProperties, ParquetSharp.CppTypeId value);
177+
144178
internal readonly ParquetHandle Handle;
145179
}
146180
}

csharp/ArrowTypeIdExtensions.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
using System;
2+
using System.Runtime.InteropServices;
3+
4+
namespace ParquetSharp
5+
{
6+
internal static class ArrowTypeIdExtensions
7+
{
8+
public static ParquetSharp.CppTypeId toCppEnum(this Apache.Arrow.Types.ArrowTypeId arrowTypeId) => arrowTypeId switch
9+
{
10+
Apache.Arrow.Types.ArrowTypeId.Binary => ParquetSharp.CppTypeId.Binary,
11+
Apache.Arrow.Types.ArrowTypeId.LargeBinary => ParquetSharp.CppTypeId.LargeBinary,
12+
Apache.Arrow.Types.ArrowTypeId.BinaryView => ParquetSharp.CppTypeId.BinaryView,
13+
_ => throw new ArgumentOutOfRangeException(nameof(arrowTypeId), arrowTypeId, null)
14+
};
15+
}
16+
}

csharp/CppTypeId.cs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
using System;
2+
using System.Runtime.InteropServices;
3+
4+
namespace ParquetSharp
5+
{
6+
internal enum CppTypeId
7+
{
8+
Binary = 14,
9+
LargeBinary = 35,
10+
BinaryView = 40,
11+
}
12+
13+
internal static class CppTypeIdExtensions
14+
{
15+
public static Apache.Arrow.Types.ArrowTypeId toPublicEnum(this CppTypeId binaryType) => binaryType switch
16+
{
17+
CppTypeId.Binary => Apache.Arrow.Types.ArrowTypeId.Binary,
18+
CppTypeId.LargeBinary => Apache.Arrow.Types.ArrowTypeId.LargeBinary,
19+
CppTypeId.BinaryView => Apache.Arrow.Types.ArrowTypeId.BinaryView,
20+
_ => throw new ArgumentOutOfRangeException(nameof(binaryType), binaryType, null)
21+
};
22+
}
23+
}

csharp/PublicAPI.Unshipped.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#nullable enable
2+
ParquetSharp.Arrow.ArrowReaderProperties.BinaryType.get -> Apache.Arrow.Types.ArrowTypeId
3+
ParquetSharp.Arrow.ArrowReaderProperties.BinaryType.set -> void
24
ParquetSharp.ReaderProperties.ThriftStringSizeLimit.get -> int
35
ParquetSharp.ReaderProperties.SetThriftStringSizeLimit(int size) -> void
46
ParquetSharp.ReaderProperties.ThriftContainerSizeLimit.get -> int
57
ParquetSharp.ReaderProperties.SetThriftContainerSizeLimit(int size) -> void
68
ParquetSharp.ReaderProperties.FooterReadSize.get -> long
7-
ParquetSharp.ReaderProperties.SetFooterReadSize(long size) -> void
9+
ParquetSharp.ReaderProperties.SetFooterReadSize(long size) -> void

0 commit comments

Comments
 (0)