Skip to content

Commit 2f6dd16

Browse files
authored
Documentation Improvements: Arrow API (#501)
1 parent 52ea5fb commit 2f6dd16

18 files changed

+548
-19
lines changed

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,20 @@ devcontainer exec ./build_unix.sh
262262
devcontainer exec dotnet test csharp.test
263263
```
264264

265+
#### Code Formatting
266+
267+
When formatting for the first time, you'll need to restore the formatter tool:
268+
269+
```bash
270+
dotnet tool restore
271+
```
272+
273+
Then, you can format any time with the following command which is also executed by the CI format checker:
274+
275+
```bash
276+
dotnet jb cleanupcode "csharp" "csharp.test" "csharp.benchmark" --profile="Built-in: Reformat Code" --settings="ParquetSharp.DotSettings" --verbosity=WARN
277+
```
278+
265279
### Native
266280

267281
Building ParquetSharp natively requires the following dependencies:

csharp/Arrow/ArrowReaderProperties.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44
namespace ParquetSharp.Arrow
55
{
66
/// <summary>
7-
/// Configures Arrow specific options for reading Parquet files
7+
/// Configures Arrow specific options for reading Parquet files.
88
/// </summary>
99
public sealed class ArrowReaderProperties : IDisposable
1010
{
11+
/// <summary>
12+
/// Create a new <see cref="ArrowReaderProperties"/> with default values.
13+
/// </summary>
14+
/// <returns></returns>
1115
public static ArrowReaderProperties GetDefault()
1216
{
1317
return new ArrowReaderProperties(ExceptionInfo.Return<IntPtr>(ArrowReaderProperties_GetDefault));

csharp/Arrow/ArrowWriterProperties.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ public enum WriterEngineVersion
1717
V2 = 1, // Full support for all nesting combinations
1818
}
1919

20+
/// <summary>
21+
/// Create a new <see cref="ArrowWriterProperties"/> with default values.
22+
/// </summary>
2023
public static ArrowWriterProperties GetDefault()
2124
{
2225
return new ArrowWriterProperties(ExceptionInfo.Return<IntPtr>(ArrowWriterProperties_GetDefault));

csharp/Arrow/FileReader.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,10 @@ public unsafe Apache.Arrow.Schema Schema
122122

123123
/// <summary>
124124
/// Get a record batch reader for the file data
125+
/// </summary>
125126
/// <param name="rowGroups">The indices of row groups to read data from</param>
126127
/// <param name="columns">The indices of columns to read, based on the schema</param>
127-
/// </summary>
128+
/// <returns>An Arrow array stream reader</returns>
128129
public unsafe IArrowArrayStream GetRecordBatchReader(
129130
int[]? rowGroups = null,
130131
int[]? columns = null)

csharp/Arrow/FileWriter.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ public void WriteRecordBatch(RecordBatch recordBatch, long chunkSize = 1024 * 10
185185
/// multiple record batches to be written to the same row group.
186186
///
187187
/// New row groups are started if the data reaches the MaxRowGroupLength configured
188-
/// in the WriterProperties.
188+
/// in the <see cref="WriterProperties"/>.
189189
/// </summary>
190190
/// <param name="recordBatch">The record batch to write</param>
191191
public void WriteBufferedRecordBatch(RecordBatch recordBatch)

csharp/Column.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,29 @@ public class Column
1212
{
1313
#pragma warning disable RS0027
1414

15+
/// <summary>
16+
/// Create a column with the given properties.
17+
/// </summary>
18+
/// <param name="logicalSystemType">The <see cref="Type"/> of the column.</param>
19+
/// <param name="name">The name of the column.</param>
20+
/// <param name="logicalTypeOverride">Optional override for the logical type of the column.</param>
21+
/// <exception cref="ArgumentNullException">Thrown if any of the arguments are null.</exception>
1522
public Column(Type logicalSystemType, string name, LogicalType? logicalTypeOverride = null)
1623
: this(logicalSystemType, name, logicalTypeOverride, GetTypeLength(logicalSystemType, logicalTypeOverride))
1724
{
1825
LogicalSystemType = logicalSystemType ?? throw new ArgumentNullException(nameof(logicalSystemType));
1926
Name = name ?? throw new ArgumentNullException(nameof(name));
2027
}
2128

29+
/// <summary>
30+
/// Create a column with the given properties.
31+
/// </summary>
32+
/// <param name="logicalSystemType">The <see cref="Type"/> of the column.</param>
33+
/// <param name="name">The name of the column.</param>
34+
/// <param name="logicalTypeOverride">Optional override for the logical type of the column.</param>
35+
/// <param name="length">The length of the column for decimal, Guid or Half types.</param>
36+
/// <exception cref="ArgumentNullException">Thrown if any of the required arguments are null.</exception>
37+
/// <exception cref="ArgumentException">Thrown if the length is set with an incompatible type.</exception>
2238
public Column(Type logicalSystemType, string name, LogicalType? logicalTypeOverride, int length)
2339
{
2440
var isDecimal = logicalSystemType == typeof(decimal) || logicalSystemType == typeof(decimal?);

csharp/ColumnWriter.cs

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,28 +53,76 @@ public long Close()
5353
return ExceptionInfo.Return<long>(Handle, ColumnWriter_Close);
5454
}
5555

56+
/// <summary>
57+
/// Get the index of the column within the row group.
58+
/// </summary>
5659
public int ColumnIndex { get; }
60+
/// <summary>
61+
/// Get the <see cref="ParquetSharp.LogicalTypeFactory"/> for the Parquet file writer.
62+
/// </summary>
5763
public LogicalTypeFactory LogicalTypeFactory => RowGroupWriter.ParquetFileWriter.LogicalTypeFactory;
64+
/// <summary>
65+
/// Get the <see cref="ParquetSharp.LogicalWriteConverterFactory"/> for the Parquet file writer.
66+
/// </summary>
5867
public LogicalWriteConverterFactory LogicalWriteConverterFactory => RowGroupWriter.ParquetFileWriter.LogicalWriteConverterFactory;
5968

69+
/// <summary>
70+
/// Get the <see cref="ParquetSharp.ColumnDescriptor"/> for the column.
71+
/// </summary>
6072
public ColumnDescriptor ColumnDescriptor => new(ExceptionInfo.Return<IntPtr>(Handle, ColumnWriter_Descr));
73+
/// <summary>
74+
/// Get the number of rows written to the column so far.
75+
/// </summary>
6176
public long RowWritten => ExceptionInfo.Return<long>(Handle, ColumnWriter_Rows_Written);
77+
/// <summary>
78+
/// Get the physical type of the column.
79+
/// </summary>
6280
public PhysicalType Type => ExceptionInfo.Return<PhysicalType>(Handle, ColumnWriter_Type);
81+
/// <summary>
82+
/// Get the <see cref="ParquetSharp.WriterProperties"/> for the column.
83+
/// </summary>
6384
public WriterProperties WriterProperties => new(ExceptionInfo.Return<IntPtr>(Handle, ColumnWriter_Properties));
6485

86+
/// <summary>
87+
/// Get the element <see cref="Type"/> of the data being written.
88+
/// </summary>
6589
public abstract Type ElementType { get; }
90+
91+
/// <summary>
92+
/// Apply a visitor to the column writer.
93+
/// </summary>
94+
/// <typeparam name="TReturn">The return type of the visitor.</typeparam>
95+
/// <param name="visitor">The visitor instance.</param>
96+
/// <returns>The result of the visitor operation.</returns>
6697
public abstract TReturn Apply<TReturn>(IColumnWriterVisitor<TReturn> visitor);
6798

99+
/// <summary>
100+
/// Create a <see cref="LogicalColumnWriter"/>.
101+
/// </summary>
102+
/// <param name="bufferLength">The buffer length in bytes. Default is 4KB.</param>
103+
/// <returns>A <see cref="LogicalColumnWriter"/> instance.</returns>
68104
public LogicalColumnWriter LogicalWriter(int bufferLength = 4 * 1024)
69105
{
70106
return LogicalColumnWriter.Create(this, bufferLength, elementTypeOverride: null);
71107
}
72108

109+
/// <summary>
110+
/// Create a strongly-typed <see cref="LogicalColumnWriter"/> without an explicit element type override.
111+
/// </summary>
112+
/// <typeparam name="TElement">The type of the data to write.</typeparam>
113+
/// <param name="bufferLength">The buffer length in bytes. Default is 4KB.</param>
114+
/// <returns>A <see cref="LogicalColumnWriter"/> instance.</returns>
73115
public LogicalColumnWriter<TElement> LogicalWriter<TElement>(int bufferLength = 4 * 1024)
74116
{
75117
return LogicalColumnWriter.Create<TElement>(this, bufferLength, elementTypeOverride: null);
76118
}
77119

120+
/// <summary>
121+
/// Create a strongly-typed <see cref="LogicalColumnWriter"/> with an explicit element type override.
122+
/// </summary>
123+
/// <typeparam name="TElement">The type of the data to write.</typeparam>
124+
/// <param name="bufferLength">The buffer length in bytes. Default is 4KB.</param>
125+
/// <returns>A <see cref="LogicalColumnWriter"/> instance.</returns>
78126
public LogicalColumnWriter<TElement> LogicalWriterOverride<TElement>(int bufferLength = 4 * 1024)
79127
{
80128
return LogicalColumnWriter.Create<TElement>(this, bufferLength, typeof(TElement));
@@ -181,26 +229,48 @@ protected IntPtr Handle
181229
internal readonly RowGroupWriter RowGroupWriter;
182230
}
183231

184-
/// <inheritdoc />
232+
/// <summary>
233+
/// Strongly-typed writer of physical Parquet values to a single column.
234+
/// </summary>
235+
/// <typeparam name="TValue">The data type of the column.</typeparam>
185236
public sealed class ColumnWriter<TValue> : ColumnWriter where TValue : unmanaged
186237
{
187238
internal ColumnWriter(IntPtr handle, RowGroupWriter rowGroupWriter, int columnIndex)
188239
: base(handle, rowGroupWriter, columnIndex)
189240
{
190241
}
191242

243+
/// <inheritdoc />
192244
public override Type ElementType => typeof(TValue);
193245

246+
/// <inheritdoc />
194247
public override TReturn Apply<TReturn>(IColumnWriterVisitor<TReturn> visitor)
195248
{
196249
return visitor.OnColumnWriter(this);
197250
}
198251

252+
/// <summary>
253+
/// Write a batch of values to the column.
254+
/// </summary>
255+
/// <param name="values">The values to write.</param>
199256
public void WriteBatch(ReadOnlySpan<TValue> values)
200257
{
201258
WriteBatch(values.Length, null, null, values);
202259
}
203260

261+
/// <summary>
262+
/// Write a batch of values to the column with optional definition and repetition levels.
263+
/// </summary>
264+
/// <param name="numValues">The number of values to write.</param>
265+
/// <param name="defLevels">The definition levels for the values.</param>
266+
/// <param name="repLevels">The repetition levels for the values.</param>
267+
/// <param name="values">The values to write.</param>
268+
/// <remarks>
269+
/// The lengths of <paramref name="defLevels"/> and <paramref name="repLevels"/> must be at least <paramref name="numValues"/>.
270+
/// </remarks>
271+
/// <exception cref="ArgumentNullException">Thrown if <paramref name="values"/> is null.</exception>
272+
/// <exception cref="ArgumentOutOfRangeException">Thrown if <paramref name="numValues"/> is larger
273+
/// than the length of <paramref name="defLevels"/> or <paramref name="repLevels"/>.</exception>
204274
public unsafe void WriteBatch(int numValues, ReadOnlySpan<short> defLevels, ReadOnlySpan<short> repLevels, ReadOnlySpan<TValue> values)
205275
{
206276
if (values == null) throw new ArgumentNullException(nameof(values));

csharp/FileDecryptionPropertiesBuilder.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ public FileDecryptionPropertiesBuilder PlaintextFilesAllowed()
148148
/// <summary>
149149
/// Build the <see cref="FileDecryptionProperties"/> object.
150150
/// </summary>
151-
/// <returns>A new <see cref="FileDecryptionProperties"/> object with the configured decryption properties.</returns>
151+
/// <returns>The configured <see cref="FileDecryptionProperties"/> object.</returns>
152152
public FileDecryptionProperties Build() => new FileDecryptionProperties(ExceptionInfo.Return<IntPtr>(_handle, FileDecryptionPropertiesBuilder_Build));
153153

154154
[DllImport(ParquetDll.Name)]

csharp/FileMetaData.cs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
namespace ParquetSharp
77
{
8+
/// <summary>
9+
/// Metadata for a Parquet file. Includes information about the schema, row groups, versions, etc.
10+
/// </summary>
811
public sealed class FileMetaData : IEquatable<FileMetaData>, IDisposable
912
{
1013
internal FileMetaData(IntPtr handle)
@@ -17,8 +20,15 @@ public void Dispose()
1720
_handle.Dispose();
1821
}
1922

23+
/// <summary>
24+
/// Get the name of the entity that created the file.
25+
/// </summary>
2026
public string CreatedBy => ExceptionInfo.ReturnString(_handle, FileMetaData_Created_By);
2127

28+
/// <summary>
29+
/// Get the key-value metadata.
30+
/// </summary>
31+
/// <returns>An <see cref="IReadOnlyDictionary{TKey,TValue}"/> containing the key-value metadata.</returns>
2232
public IReadOnlyDictionary<string, string> KeyValueMetadata
2333
{
2434
get
@@ -34,13 +44,38 @@ public IReadOnlyDictionary<string, string> KeyValueMetadata
3444
}
3545
}
3646

47+
/// <summary>
48+
/// Get the number of columns in the file.
49+
/// </summary>
3750
public int NumColumns => ExceptionInfo.Return<int>(_handle, FileMetaData_Num_Columns);
51+
/// <summary>
52+
/// Get the number of rows in the file.
53+
/// </summary>
3854
public long NumRows => ExceptionInfo.Return<long>(_handle, FileMetaData_Num_Rows);
55+
/// <summary>
56+
/// Get the number of row groups in the file.
57+
/// </summary>
3958
public int NumRowGroups => ExceptionInfo.Return<int>(_handle, FileMetaData_Num_Row_Groups);
59+
/// <summary>
60+
/// Get the number of schema elements in the file.
61+
/// </summary>
4062
public int NumSchemaElements => ExceptionInfo.Return<int>(_handle, FileMetaData_Num_Schema_Elements);
63+
/// <summary>
64+
/// Get the schema descriptor for the file.
65+
/// </summary>
66+
/// <returns>A <see cref="SchemaDescriptor"/> object that describes the schema of the file.</returns>
4167
public SchemaDescriptor Schema => _schema ??= new SchemaDescriptor(ExceptionInfo.Return<IntPtr>(_handle, FileMetaData_Schema));
68+
/// <summary>
69+
/// Get the total size of the file in bytes.
70+
/// </summary>
4271
public int Size => ExceptionInfo.Return<int>(_handle, FileMetaData_Size);
72+
/// <summary>
73+
/// Get the Parquet format version of the file.
74+
/// </summary>
4375
public ParquetVersion Version => ExceptionInfo.Return<ParquetVersion>(_handle, FileMetaData_Version);
76+
/// <summary>
77+
/// Get the version of the writer that created the file.
78+
/// </summary>
4479
public ApplicationVersion WriterVersion => new ApplicationVersion(ExceptionInfo.Return<AppVer>(_handle, FileMetaData_Writer_Version));
4580

4681
public bool Equals(FileMetaData? other)

csharp/LogicalColumnWriter.cs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@ internal static LogicalColumnWriter<TElementType> Create<TElementType>(ColumnWri
6565
}
6666
}
6767

68+
/// <summary>
69+
/// Apply a visitor to this logical column writer.
70+
/// </summary>
71+
/// <typeparam name="TReturn">The return type of the visitor.</typeparam>
72+
/// <param name="visitor">The visitor instance.</param>
73+
/// <returns>The result of the visitor.</returns>
6874
public abstract TReturn Apply<TReturn>(ILogicalColumnWriterVisitor<TReturn> visitor);
6975

7076
private sealed class Creator : IColumnDescriptorVisitor<LogicalColumnWriter>
@@ -85,6 +91,7 @@ public LogicalColumnWriter OnColumnDescriptor<TPhysical, TLogical, TElement>() w
8591
}
8692
}
8793

94+
/// <inheritdoc />
8895
public sealed class LogicalColumnWriter<TElement> : LogicalColumnWriter
8996
{
9097
private LogicalColumnWriter(ColumnWriter columnWriter, int bufferLength, ByteBuffer? byteBuffer, ILogicalBatchWriter<TElement> batchWriter)
@@ -130,21 +137,36 @@ public override void Dispose()
130137
base.Dispose();
131138
}
132139

140+
/// <inheritdoc />
133141
public override TReturn Apply<TReturn>(ILogicalColumnWriterVisitor<TReturn> visitor)
134142
{
135143
return visitor.OnLogicalColumnWriter(this);
136144
}
137145

146+
/// <summary>
147+
/// Write an array of values to the column.
148+
/// </summary>
149+
/// <param name="values">An array of values to write.</param>
138150
public void WriteBatch(TElement[] values)
139151
{
140152
WriteBatch(values.AsSpan());
141153
}
142154

155+
/// <summary>
156+
/// Write a range of values to the column.
157+
/// </summary>
158+
/// <param name="values">An array of values to write.</param>
159+
/// <param name="start">The index of the first value to write.</param>
160+
/// <param name="length">The number of values to write.</param>
143161
public void WriteBatch(TElement[] values, int start, int length)
144162
{
145163
WriteBatch(values.AsSpan(start, length));
146164
}
147165

166+
/// <summary>
167+
/// Write a span of values to the column.
168+
/// </summary>
169+
/// <param name="values">A <see cref="ReadOnlySpan{TElement}"/> of values to write.</param>
148170
public void WriteBatch(ReadOnlySpan<TElement> values)
149171
{
150172
_batchWriter.WriteBatch(values);

0 commit comments

Comments
 (0)