Skip to content

Commit 9cd95c4

Browse files
authored
Add ListType & ArrowExtensionsEnabled to ArrowReaderProperties (#582)
1 parent 39fe42a commit 9cd95c4

File tree

7 files changed

+94
-0
lines changed

7 files changed

+94
-0
lines changed

cpp/Enums.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,5 +106,9 @@ namespace
106106
static_assert((int) ::arrow::Type::type::BINARY == 14);
107107
static_assert((int) ::arrow::Type::type::LARGE_BINARY == 35);
108108
static_assert((int) ::arrow::Type::type::BINARY_VIEW == 40);
109+
110+
static_assert((int) ::arrow::Type::type::LIST == 25);
111+
static_assert((int) ::arrow::Type::type::LARGE_LIST == 36);
112+
static_assert((int) ::arrow::Type::type::LIST_VIEW == 41);
109113
}
110114
}

cpp/arrow/ArrowReaderProperties.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,24 @@ extern "C"
7676
{
7777
TRYCATCH(properties->set_binary_type(value);)
7878
}
79+
80+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_ListType(ArrowReaderProperties* properties, ::arrow::Type::type* value)
81+
{
82+
TRYCATCH(*value = properties->list_type();)
83+
}
84+
85+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_SetListType(ArrowReaderProperties* properties, ::arrow::Type::type value)
86+
{
87+
TRYCATCH(properties->set_list_type(value);)
88+
}
89+
90+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_GetArrowExtensionEnabled(ArrowReaderProperties* properties, bool* extensions_enabled)
91+
{
92+
TRYCATCH(*extensions_enabled = properties->get_arrow_extensions_enabled();)
93+
}
94+
95+
PARQUETSHARP_EXPORT ExceptionInfo* ArrowReaderProperties_SetArrowExtensionEnabled(ArrowReaderProperties* properties, bool extensions_enabled)
96+
{
97+
TRYCATCH(properties->set_arrow_extensions_enabled(extensions_enabled);)
98+
}
7999
}

csharp.test/Arrow/TestArrowReaderProperties.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ public void TestDefaultProperties()
1616
Assert.That(properties.GetReadDictionary(0), Is.False);
1717
Assert.That(properties.PreBuffer, Is.True);
1818
Assert.That(properties.CoerceInt96TimestampUnit, Is.EqualTo(Apache.Arrow.Types.TimeUnit.Nanosecond));
19+
Assert.That(properties.BinaryType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.Binary));
20+
Assert.That(properties.ListType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.List));
21+
Assert.That(properties.ArrowExtensionEnabled, Is.False);
1922
}
2023

2124
[Test]
@@ -29,13 +32,17 @@ public void TestSetProperties()
2932
properties.PreBuffer = false;
3033
properties.CoerceInt96TimestampUnit = Apache.Arrow.Types.TimeUnit.Microsecond;
3134
properties.BinaryType = Apache.Arrow.Types.ArrowTypeId.LargeBinary;
35+
properties.ListType = Apache.Arrow.Types.ArrowTypeId.LargeList;
36+
properties.ArrowExtensionEnabled = true;
3237

3338
Assert.That(properties.UseThreads, Is.True);
3439
Assert.That(properties.BatchSize, Is.EqualTo(789));
3540
Assert.That(properties.GetReadDictionary(0), Is.True);
3641
Assert.That(properties.PreBuffer, Is.False);
3742
Assert.That(properties.CoerceInt96TimestampUnit, Is.EqualTo(Apache.Arrow.Types.TimeUnit.Microsecond));
3843
Assert.That(properties.BinaryType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.LargeBinary));
44+
Assert.That(properties.ListType, Is.EqualTo(Apache.Arrow.Types.ArrowTypeId.LargeList));
45+
Assert.That(properties.ArrowExtensionEnabled, Is.True);
3946
}
4047
}
4148
}

csharp/Arrow/ArrowReaderProperties.cs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,44 @@ public Apache.Arrow.Types.ArrowTypeId BinaryType
133133
}
134134
}
135135

136+
/// <summary>
137+
/// The Arrow list type to read Parquet list columns as.
138+
///
139+
/// Allowed values are ArrowTypeId.List, ArrowTypeId.LargeList and ArrowTypeId.ListView.
140+
/// Default is ArrowTypeId.List.
141+
///
142+
/// If a serialized Arrow schema is found in the Parquet metadata,
143+
/// this setting is ignored and the Arrow schema takes precedence
144+
/// </summary>
145+
public Apache.Arrow.Types.ArrowTypeId ListType
146+
{
147+
get
148+
{
149+
ParquetSharp.CppTypeId value = ExceptionInfo.Return<ParquetSharp.CppTypeId>(Handle, ArrowReaderProperties_ListType);
150+
return value.toPublicEnum();
151+
}
152+
set
153+
{
154+
ParquetSharp.CppTypeId cppValue = value.toCppEnum();
155+
ExceptionInfo.Check(ArrowReaderProperties_SetListType(Handle.IntPtr, cppValue));
156+
GC.KeepAlive(Handle);
157+
}
158+
}
159+
160+
/// <summary>
161+
/// Whether to enable Parquet-supported Arrow extension types.
162+
/// Default is false.
163+
/// </summary>
164+
public bool ArrowExtensionEnabled
165+
{
166+
get => ExceptionInfo.Return<bool>(Handle, ArrowReaderProperties_GetArrowExtensionEnabled);
167+
set
168+
{
169+
ExceptionInfo.Check(ArrowReaderProperties_SetArrowExtensionEnabled(Handle.IntPtr, value));
170+
GC.KeepAlive(Handle);
171+
}
172+
}
173+
136174
[DllImport(ParquetDll.Name)]
137175
private static extern IntPtr ArrowReaderProperties_GetDefault(out IntPtr readerProperties);
138176

@@ -175,6 +213,18 @@ public Apache.Arrow.Types.ArrowTypeId BinaryType
175213
[DllImport(ParquetDll.Name)]
176214
private static extern IntPtr ArrowReaderProperties_SetBinaryType(IntPtr readerProperties, ParquetSharp.CppTypeId value);
177215

216+
[DllImport(ParquetDll.Name)]
217+
private static extern IntPtr ArrowReaderProperties_ListType(IntPtr readerProperties, out ParquetSharp.CppTypeId value);
218+
219+
[DllImport(ParquetDll.Name)]
220+
private static extern IntPtr ArrowReaderProperties_SetListType(IntPtr readerProperties, ParquetSharp.CppTypeId value);
221+
222+
[DllImport(ParquetDll.Name)]
223+
private static extern IntPtr ArrowReaderProperties_GetArrowExtensionEnabled(IntPtr readerProperties, out bool extensionsEnabled);
224+
225+
[DllImport(ParquetDll.Name)]
226+
private static extern IntPtr ArrowReaderProperties_SetArrowExtensionEnabled(IntPtr readerProperties, bool extensionsEnabled);
227+
178228
internal readonly ParquetHandle Handle;
179229
}
180230
}

csharp/ArrowTypeIdExtensions.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ internal static class ArrowTypeIdExtensions
1010
Apache.Arrow.Types.ArrowTypeId.Binary => ParquetSharp.CppTypeId.Binary,
1111
Apache.Arrow.Types.ArrowTypeId.LargeBinary => ParquetSharp.CppTypeId.LargeBinary,
1212
Apache.Arrow.Types.ArrowTypeId.BinaryView => ParquetSharp.CppTypeId.BinaryView,
13+
Apache.Arrow.Types.ArrowTypeId.List => ParquetSharp.CppTypeId.List,
14+
Apache.Arrow.Types.ArrowTypeId.LargeList => ParquetSharp.CppTypeId.LargeList,
15+
Apache.Arrow.Types.ArrowTypeId.ListView => ParquetSharp.CppTypeId.ListView,
1316
_ => throw new ArgumentOutOfRangeException(nameof(arrowTypeId), arrowTypeId, null)
1417
};
1518
}

csharp/CppTypeId.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ internal enum CppTypeId
88
Binary = 14,
99
LargeBinary = 35,
1010
BinaryView = 40,
11+
List = 25,
12+
LargeList = 36,
13+
ListView = 41
1114
}
1215

1316
internal static class CppTypeIdExtensions
@@ -17,6 +20,9 @@ internal static class CppTypeIdExtensions
1720
CppTypeId.Binary => Apache.Arrow.Types.ArrowTypeId.Binary,
1821
CppTypeId.LargeBinary => Apache.Arrow.Types.ArrowTypeId.LargeBinary,
1922
CppTypeId.BinaryView => Apache.Arrow.Types.ArrowTypeId.BinaryView,
23+
CppTypeId.List => Apache.Arrow.Types.ArrowTypeId.List,
24+
CppTypeId.LargeList => Apache.Arrow.Types.ArrowTypeId.LargeList,
25+
CppTypeId.ListView => Apache.Arrow.Types.ArrowTypeId.ListView,
2026
_ => throw new ArgumentOutOfRangeException(nameof(binaryType), binaryType, null)
2127
};
2228
}

csharp/PublicAPI.Unshipped.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#nullable enable
2+
ParquetSharp.Arrow.ArrowReaderProperties.ArrowExtensionEnabled.get -> bool
3+
ParquetSharp.Arrow.ArrowReaderProperties.ArrowExtensionEnabled.set -> void
24
ParquetSharp.Arrow.ArrowReaderProperties.BinaryType.get -> Apache.Arrow.Types.ArrowTypeId
35
ParquetSharp.Arrow.ArrowReaderProperties.BinaryType.set -> void
6+
ParquetSharp.Arrow.ArrowReaderProperties.ListType.get -> Apache.Arrow.Types.ArrowTypeId
7+
ParquetSharp.Arrow.ArrowReaderProperties.ListType.set -> void
48
ParquetSharp.ReaderProperties.ThriftStringSizeLimit.get -> int
59
ParquetSharp.ReaderProperties.SetThriftStringSizeLimit(int size) -> void
610
ParquetSharp.ReaderProperties.ThriftContainerSizeLimit.get -> int

0 commit comments

Comments
 (0)