Skip to content

Commit 3182e68

Browse files
authored
Expose page checksum verification reader and writer properties (#419)
1 parent 3b562b0 commit 3182e68

9 files changed

+154
-0
lines changed

cpp/ReaderProperties.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,19 @@ extern "C"
5656
*file_decryption_properties = p ? new std::shared_ptr(p) : nullptr;
5757
)
5858
}
59+
60+
PARQUETSHARP_EXPORT ExceptionInfo* ReaderProperties_Page_Checksum_Verification(const ReaderProperties* reader_properties, bool* verification_enabled)
61+
{
62+
TRYCATCH(*verification_enabled = reader_properties->page_checksum_verification();)
63+
}
64+
65+
PARQUETSHARP_EXPORT ExceptionInfo* ReaderProperties_Enable_Page_Checksum_Verification(ReaderProperties* reader_properties)
66+
{
67+
TRYCATCH(reader_properties->set_page_checksum_verification(true);)
68+
}
69+
70+
PARQUETSHARP_EXPORT ExceptionInfo* ReaderProperties_Disable_Page_Checksum_Verification(ReaderProperties* reader_properties)
71+
{
72+
TRYCATCH(reader_properties->set_page_checksum_verification(false);)
73+
}
5974
}

cpp/WriterProperties.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ extern "C"
7575
TRYCATCH(*enabled = (*writer_properties)->page_index_enabled(*path);)
7676
}
7777

78+
PARQUETSHARP_EXPORT ExceptionInfo* WriterProperties_Page_Checksum_Enabled(const std::shared_ptr<WriterProperties>* writer_properties, bool* enabled)
79+
{
80+
TRYCATCH(*enabled = (*writer_properties)->page_checksum_enabled();)
81+
}
82+
7883
// ColumnPath taking methods.
7984

8085
//PARQUETSHARP_EXPORT ExceptionInfo* WriterProperties_Column_Properties(const std::shared_ptr<WriterProperties>* writer_properties, const std::shared_ptr<schema::ColumnPath>* path, const ColumnProperties** columnProperties)

cpp/WriterPropertiesBuilder.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,4 +199,14 @@ extern "C"
199199
{
200200
TRYCATCH(builder->disable_write_page_index(*path);)
201201
}
202+
203+
PARQUETSHARP_EXPORT ExceptionInfo* WriterPropertiesBuilder_Enable_Page_Checksum(WriterProperties::Builder* builder)
204+
{
205+
TRYCATCH(builder->enable_page_checksum();)
206+
}
207+
208+
PARQUETSHARP_EXPORT ExceptionInfo* WriterPropertiesBuilder_Disable_Page_Checksum(WriterProperties::Builder* builder)
209+
{
210+
TRYCATCH(builder->disable_page_checksum();)
211+
}
202212
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
using System.Linq;
2+
using NUnit.Framework;
3+
using ParquetSharp.IO;
4+
using ParquetSharp.Schema;
5+
6+
namespace ParquetSharp.Test
7+
{
8+
[TestFixture]
9+
internal static class TestReaderProperties
10+
{
11+
12+
[Test]
13+
public static void TestDefaultProperties()
14+
{
15+
using var p = ReaderProperties.GetDefaultReaderProperties();
16+
17+
Assert.That(p.BufferSize, Is.EqualTo(1 << 14));
18+
Assert.That(p.IsBufferedStreamEnabled, Is.False);
19+
Assert.That(p.PageChecksumVerification, Is.False);
20+
}
21+
22+
[Test]
23+
public static void TestModifyProperties()
24+
{
25+
using var p = ReaderProperties.GetDefaultReaderProperties();
26+
27+
p.BufferSize = 1 << 13;
28+
Assert.That(p.BufferSize, Is.EqualTo(1 << 13));
29+
30+
p.EnablePageChecksumVerification();
31+
Assert.That(p.PageChecksumVerification, Is.True);
32+
p.DisablePageChecksumVerification();
33+
Assert.That(p.PageChecksumVerification, Is.False);
34+
35+
p.EnableBufferedStream();
36+
Assert.That(p.IsBufferedStreamEnabled, Is.True);
37+
p.DisableBufferedStream();
38+
Assert.That(p.IsBufferedStreamEnabled, Is.False);
39+
}
40+
}
41+
}

csharp.test/TestWriterProperties.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ public static void TestDefaultProperties()
2525
Assert.AreEqual(ParquetVersion.PARQUET_2_6, p.Version);
2626
Assert.AreEqual(1024, p.WriteBatchSize);
2727
Assert.False(p.WritePageIndex);
28+
Assert.False(p.PageChecksumEnabled);
2829
}
2930

3031
[Test]
@@ -42,6 +43,7 @@ public static void TestPropertiesBuilder()
4243
.WriteBatchSize(666)
4344
.DisableWritePageIndex()
4445
.EnableWritePageIndex()
46+
.EnablePageChecksum()
4547
.Build();
4648

4749
Assert.AreEqual("Meeeee!!!", p.CreatedBy);
@@ -55,6 +57,7 @@ public static void TestPropertiesBuilder()
5557
Assert.AreEqual(ParquetVersion.PARQUET_1_0, p.Version);
5658
Assert.AreEqual(666, p.WriteBatchSize);
5759
Assert.True(p.WritePageIndex);
60+
Assert.True(p.PageChecksumEnabled);
5861
}
5962

6063
[Test]
@@ -110,6 +113,7 @@ public static void TestOverrideDefaults()
110113
DefaultWriterProperties.Version = ParquetVersion.PARQUET_1_0;
111114
DefaultWriterProperties.WriteBatchSize = 666;
112115
DefaultWriterProperties.WritePageIndex = true;
116+
DefaultWriterProperties.PageChecksumEnabled = true;
113117

114118
using var builder = new WriterPropertiesBuilder();
115119
using var p = builder.Build();
@@ -127,6 +131,7 @@ public static void TestOverrideDefaults()
127131
Assert.AreEqual(ParquetVersion.PARQUET_1_0, p.Version);
128132
Assert.AreEqual(666, p.WriteBatchSize);
129133
Assert.True(p.WritePageIndex);
134+
Assert.True(p.PageChecksumEnabled);
130135
}
131136
finally
132137
{
@@ -143,6 +148,7 @@ public static void TestOverrideDefaults()
143148
DefaultWriterProperties.Version = null;
144149
DefaultWriterProperties.WriteBatchSize = null;
145150
DefaultWriterProperties.WritePageIndex = null;
151+
DefaultWriterProperties.PageChecksumEnabled = null;
146152
}
147153
}
148154

csharp/DefaultWriterProperties.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,5 +70,10 @@ public static class DefaultWriterProperties
7070
/// Write the page index
7171
/// </summary>
7272
public static bool? WritePageIndex { get; set; }
73+
74+
/// <summary>
75+
/// Write CRC page checksums
76+
/// </summary>
77+
public static bool? PageChecksumEnabled { get; set; }
7378
}
7479
}

csharp/ReaderProperties.cs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,23 @@ public void DisableBufferedStream()
5959
GC.KeepAlive(Handle);
6060
}
6161

62+
/// <summary>
63+
/// Whether page checksums are verified during reading to check for data corruption
64+
/// </summary>
65+
public bool PageChecksumVerification => ExceptionInfo.Return<bool>(Handle, ReaderProperties_Page_Checksum_Verification);
66+
67+
public void EnablePageChecksumVerification()
68+
{
69+
ExceptionInfo.Check(ReaderProperties_Enable_Page_Checksum_Verification(Handle.IntPtr));
70+
GC.KeepAlive(Handle);
71+
}
72+
73+
public void DisablePageChecksumVerification()
74+
{
75+
ExceptionInfo.Check(ReaderProperties_Disable_Page_Checksum_Verification(Handle.IntPtr));
76+
GC.KeepAlive(Handle);
77+
}
78+
6279
[DllImport(ParquetDll.Name)]
6380
private static extern IntPtr ReaderProperties_Get_Default_Reader_Properties(out IntPtr readerProperties);
6481

@@ -86,6 +103,15 @@ public void DisableBufferedStream()
86103
[DllImport(ParquetDll.Name)]
87104
private static extern IntPtr ReaderProperties_Get_File_Decryption_Properties(IntPtr readerProperties, out IntPtr fileDecryptionProperties);
88105

106+
[DllImport(ParquetDll.Name)]
107+
private static extern IntPtr ReaderProperties_Page_Checksum_Verification(IntPtr readerProperties, [MarshalAs(UnmanagedType.I1)] out bool enabled);
108+
109+
[DllImport(ParquetDll.Name)]
110+
private static extern IntPtr ReaderProperties_Enable_Page_Checksum_Verification(IntPtr readerProperties);
111+
112+
[DllImport(ParquetDll.Name)]
113+
private static extern IntPtr ReaderProperties_Disable_Page_Checksum_Verification(IntPtr readerProperties);
114+
89115
internal readonly ParquetHandle Handle;
90116
}
91117
}

csharp/WriterProperties.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ public ulong MaxStatisticsSize(ColumnPath path)
7474
return ExceptionInfo.Return<ulong>(Handle, path.Handle, WriterProperties_Max_Statistics_Size);
7575
}
7676

77+
/// <summary>
78+
/// Whether CRC checksums are written for data pages
79+
/// </summary>
80+
public bool PageChecksumEnabled => ExceptionInfo.Return<bool>(Handle, WriterProperties_Page_Checksum_Enabled);
81+
7782
internal readonly ParquetHandle Handle;
7883

7984
[DllImport(ParquetDll.Name)]
@@ -115,6 +120,9 @@ public ulong MaxStatisticsSize(ColumnPath path)
115120
[DllImport(ParquetDll.Name)]
116121
private static extern IntPtr WriterProperties_Page_Index_Enabled_For_Path(IntPtr writerProperties, IntPtr path, out bool enabled);
117122

123+
[DllImport(ParquetDll.Name)]
124+
private static extern IntPtr WriterProperties_Page_Checksum_Enabled(IntPtr writerProperties, out bool enabled);
125+
118126
//[DllImport(ParquetDll.Name)]
119127
//private static extern IntPtr WriterProperties_Column_Properties(IntPtr writerProperties, IntPtr path, out IntPtr columnProperties);
120128

csharp/WriterPropertiesBuilder.cs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,26 @@ public WriterPropertiesBuilder DisableWritePageIndex(string path)
302302
return this;
303303
}
304304

305+
/// <summary>
306+
/// Enable writing CRC checksums for data pages
307+
/// </summary>
308+
public WriterPropertiesBuilder EnablePageChecksum()
309+
{
310+
ExceptionInfo.Check(WriterPropertiesBuilder_Enable_Page_Checksum(_handle.IntPtr));
311+
GC.KeepAlive(_handle);
312+
return this;
313+
}
314+
315+
/// <summary>
316+
/// Disable writing CRC checksums for data pages
317+
/// </summary>
318+
public WriterPropertiesBuilder DisablePageChecksum()
319+
{
320+
ExceptionInfo.Check(WriterPropertiesBuilder_Disable_Page_Checksum(_handle.IntPtr));
321+
GC.KeepAlive(_handle);
322+
return this;
323+
}
324+
305325
private void ApplyDefaults()
306326
{
307327
OnDefaultProperty(DefaultWriterProperties.EnableDictionary, enabled =>
@@ -357,6 +377,18 @@ private void ApplyDefaults()
357377
DisableWritePageIndex();
358378
}
359379
});
380+
381+
OnDefaultProperty(DefaultWriterProperties.PageChecksumEnabled, checksumEnabled =>
382+
{
383+
if (checksumEnabled)
384+
{
385+
EnablePageChecksum();
386+
}
387+
else
388+
{
389+
DisablePageChecksum();
390+
}
391+
});
360392
}
361393

362394
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -495,6 +527,12 @@ private static void OnDefaultRefProperty<T>(T? defaultPropertyValue, Action<T> s
495527
[DllImport(ParquetDll.Name)]
496528
private static extern IntPtr WriterPropertiesBuilder_Disable_Write_Page_Index_By_ColumnPath(IntPtr builder, IntPtr path);
497529

530+
[DllImport(ParquetDll.Name)]
531+
private static extern IntPtr WriterPropertiesBuilder_Enable_Page_Checksum(IntPtr builder);
532+
533+
[DllImport(ParquetDll.Name)]
534+
private static extern IntPtr WriterPropertiesBuilder_Disable_Page_Checksum(IntPtr builder);
535+
498536
private readonly ParquetHandle _handle;
499537
}
500538
}

0 commit comments

Comments
 (0)