Skip to content

Commit d25ccf6

Browse files
authored
Add Thrift string size limit reader property (#564)
1 parent 4087cf0 commit d25ccf6

File tree

4 files changed

+78
-0
lines changed

4 files changed

+78
-0
lines changed

cpp/ReaderProperties.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,14 @@ extern "C"
8484
*memory_pool = reader_properties->memory_pool();
8585
)
8686
}
87+
88+
PARQUETSHARP_EXPORT ExceptionInfo *ReaderProperties_Thrift_String_Size_Limit(const ReaderProperties *reader_properties, int *size)
89+
{
90+
TRYCATCH(*size = reader_properties->thrift_string_size_limit();)
91+
}
92+
93+
PARQUETSHARP_EXPORT ExceptionInfo *ReaderProperties_Set_Thrift_String_Size_Limit(ReaderProperties *reader_properties, int size)
94+
{
95+
TRYCATCH(reader_properties->set_thrift_string_size_limit(size);)
96+
}
8797
}

csharp.test/TestReaderProperties.cs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using NUnit.Framework;
2+
using ParquetSharp.IO;
23

34
namespace ParquetSharp.Test
45
{
@@ -35,6 +36,45 @@ public static void TestModifyProperties()
3536
Assert.That(p.IsBufferedStreamEnabled, Is.True);
3637
p.DisableBufferedStream();
3738
Assert.That(p.IsBufferedStreamEnabled, Is.False);
39+
40+
p.SetThriftStringSizeLimit(2048576);
41+
Assert.That(p.ThriftStringSizeLimit, Is.EqualTo(2048576));
42+
}
43+
44+
[Test]
45+
public static void TestSetThriftStringSizeLimit_ReturnException()
46+
{
47+
using var buffer = new ResizableBuffer();
48+
49+
using (var output = new BufferOutputStream(buffer))
50+
{
51+
var longColumnName = new string('X', 100); // 100 chars
52+
var schema = new Column[] { new Column<string>(longColumnName) };
53+
54+
using var writer = new ParquetFileWriter(output, schema);
55+
using (var rowGroup = writer.AppendRowGroup())
56+
{
57+
using var colWriter = rowGroup.NextColumn().LogicalWriter<string>();
58+
colWriter.WriteBatch(new[] { "hello" });
59+
}
60+
writer.Close();
61+
}
62+
63+
// Configure reader with a small thrift string size limit
64+
using var props = ReaderProperties.GetDefaultReaderProperties();
65+
props.SetThriftStringSizeLimit(10);
66+
67+
var ex = Assert.Throws<ParquetException>(() =>
68+
{
69+
using var input = new BufferReader(buffer);
70+
using var reader = new ParquetFileReader(input, props);
71+
var rg = reader.RowGroup(0); // Force metadata read
72+
});
73+
74+
// Validate the exception is related to the thrift string size limit
75+
Assert.That(ex?.Message,
76+
Does.Contain("Couldn't deserialize thrift: TProtocolException: Exceeded size limit")
77+
.IgnoreCase);
3878
}
3979

4080
[TestCaseSource(typeof(MemoryPools), nameof(MemoryPools.NonNullTestCases))]

csharp/PublicAPI.Unshipped.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
#nullable enable
2+
ParquetSharp.ReaderProperties.ThriftStringSizeLimit.get -> int
3+
ParquetSharp.ReaderProperties.SetThriftStringSizeLimit(int size) -> void

csharp/ReaderProperties.cs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,26 @@ public MemoryPool MemoryPool
137137
}
138138
}
139139

140+
/// <summary>
141+
/// Return the size limit on thrift strings.
142+
///
143+
/// This limit helps prevent space and time bombs in files,
144+
/// but may need to be increased in order to read files with especially large headers.
145+
/// </summary>
146+
public int ThriftStringSizeLimit
147+
{
148+
get => ExceptionInfo.Return<int>(Handle, ReaderProperties_Thrift_String_Size_Limit);
149+
}
150+
151+
/// <summary>
152+
/// Set the size limit on thrift strings.
153+
/// </summary>
154+
public void SetThriftStringSizeLimit(int size)
155+
{
156+
ExceptionInfo.Check(ReaderProperties_Set_Thrift_String_Size_Limit(Handle.IntPtr, size));
157+
GC.KeepAlive(Handle);
158+
}
159+
140160
[DllImport(ParquetDll.Name)]
141161
private static extern IntPtr ReaderProperties_Get_Default_Reader_Properties(out IntPtr readerProperties);
142162

@@ -179,6 +199,12 @@ public MemoryPool MemoryPool
179199
[DllImport(ParquetDll.Name)]
180200
private static extern IntPtr ReaderProperties_Get_Memory_Pool(IntPtr readerProperties, out IntPtr memoryPool);
181201

202+
[DllImport(ParquetDll.Name)]
203+
private static extern IntPtr ReaderProperties_Thrift_String_Size_Limit(IntPtr readerProperties, out int size);
204+
205+
[DllImport(ParquetDll.Name)]
206+
private static extern IntPtr ReaderProperties_Set_Thrift_String_Size_Limit(IntPtr readerProperties, int size);
207+
182208
internal readonly ParquetHandle Handle;
183209
}
184210
}

0 commit comments

Comments
 (0)