From 5b5489621cf431c06d431a910b55673e1a5f408b Mon Sep 17 00:00:00 2001 From: haianhng31 Date: Wed, 19 Nov 2025 09:11:51 +0000 Subject: [PATCH 01/13] Add visitor pattern section --- docs/guides/Reading.md | 25 +-- docs/guides/VisitorPatterns.md | 378 +++++++++++++++++++++++++++++++++ docs/guides/Writing.md | 24 +-- docs/guides/toc.yml | 2 + 4 files changed, 384 insertions(+), 45 deletions(-) create mode 100644 docs/guides/VisitorPatterns.md diff --git a/docs/guides/Reading.md b/docs/guides/Reading.md index 5355d869..c6d1fc91 100644 --- a/docs/guides/Reading.md +++ b/docs/guides/Reading.md @@ -63,29 +63,8 @@ DateTime[] timestamps = rowGroupReader.Column(0).LogicalReader().ReadA ### Reading columns with unknown types -However, if you don't know ahead of time the types for each column, you can implement the -`ILogicalColumnReaderVisitor` interface to handle column data in a type-safe way, for example: - -```csharp -sealed class ColumnPrinter : ILogicalColumnReaderVisitor -{ - public string OnLogicalColumnReader(LogicalColumnReader columnReader) - { - var stringBuilder = new StringBuilder(); - foreach (var value in columnReader) { - stringBuilder.Append(value?.ToString() ?? "null"); - stringBuilder.Append(","); - } - return stringBuilder.ToString(); - } -} - -string columnValues = rowGroupReader.Column(0).LogicalReader().Apply(new ColumnPrinter()); -``` - -There's a similar `IColumnReaderVisitor` interface for working with `ColumnReader` objects -and reading physical values in a type-safe way, but most users will want to work at the logical element level. - +If you don't know ahead of time the types for each column, use the visitor-based guide: +See [Visitor patterns: reading & writing with unknown column types](VisitorPatterns.md) for examples using `ILogicalColumnReaderVisitor` and related visitor types. ### Reading data in batches diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md new file mode 100644 index 00000000..44521ba7 --- /dev/null +++ b/docs/guides/VisitorPatterns.md @@ -0,0 +1,378 @@ +# Visitor patterns: reading & writing with unknown column types + +ParquetSharp exposes a number of "visitor" interfaces that make it convenient to read or write columns when you don't know the concrete column types at compile time. These visitors let you write type-safe code that is invoked for the actual column element type at runtime. + +## ILogicalColumnWriterVisitor + +The `ILogicalColumnWriterVisitor` interface is invoked for logical writers (high-level typed writers). Use this when you need to write data to columns but don't know the column types at compile time. + +### Example: Generic column writer + +```csharp +// A visitor that writes arrays of values to any column type +sealed class GenericColumnWriter : ILogicalColumnWriterVisitor +{ + private readonly IDictionary _valuesByColumn; + + public GenericColumnWriter(IDictionary valuesByColumn) + { + _valuesByColumn = valuesByColumn; + } + + public bool OnLogicalColumnWriter(LogicalColumnWriter columnWriter) + { + // Look up values for this column name + if (!_valuesByColumn.TryGetValue(columnWriter.ColumnDescriptor.Path[0], out var raw)) + return false; + + // Cast through object to TValue[] for WriteBatch + var values = (TValue[])(object)raw; + columnWriter.WriteBatch(values); + return true; + } +} + +// Usage +var valuesByColumn = new Dictionary +{ + { "Id", new[] { 1, 2, 3 } }, + { "Name", new[] { "Alice", "Bob", "Carol" } }, + { "Price", new[] { 9.99, 12.50, 5.75 } } +}; + +using var logicalWriter = columnWriter.LogicalWriter(); +var success = logicalWriter.Apply(new GenericColumnWriter(valuesByColumn)); +``` + +### Example: Conditional writer based on type + +```csharp +// A visitor that only writes numeric columns, skipping others +sealed class NumericOnlyWriter : ILogicalColumnWriterVisitor +{ + private readonly double _fillValue; + + public NumericOnlyWriter(double fillValue) => _fillValue = fillValue; + + public bool OnLogicalColumnWriter(LogicalColumnWriter columnWriter) + { + // Only write if TValue is a numeric type + if (typeof(TValue) == typeof(int) || typeof(TValue) == typeof(double) || + typeof(TValue) == typeof(float) || typeof(TValue) == typeof(long)) + { + var values = new TValue[] { (TValue)(object)_fillValue }; + columnWriter.WriteBatch(values); + return true; + } + return false; + } +} +``` + +## ILogicalColumnReaderVisitor + +The `ILogicalColumnReaderVisitor` interface is invoked for logical readers (high-level typed readers). Use this when you need to read data from columns of unknown types. + +### Example: Convert columns to strings + +```csharp +// A visitor that reads all values and returns them as a comma-separated string +sealed class ColumnToStringReader : ILogicalColumnReaderVisitor +{ + public string OnLogicalColumnReader(LogicalColumnReader columnReader) + { + var sb = new StringBuilder(); + const int bufferSize = 1024; + var buffer = new TElement[bufferSize]; + + while (columnReader.HasNext) + { + var read = columnReader.ReadBatch(buffer); + for (var i = 0; i < read; ++i) + { + var v = buffer[i]; + sb.Append(v?.ToString() ?? "null"); + sb.Append(", "); + } + } + + if (sb.Length >= 2) sb.Length -= 2; + return sb.ToString(); + } +} + +// Usage +using var logicalReader = columnReader.LogicalReader(); +var columnString = logicalReader.Apply(new ColumnToStringReader()); +Console.WriteLine($"Column data: {columnString}"); +``` + +### Example: Calculate column statistics + +```csharp +// A visitor that computes row count for any column type +sealed class RowCountReader : ILogicalColumnReaderVisitor +{ + public long OnLogicalColumnReader(LogicalColumnReader columnReader) + { + long count = 0; + const int bufferSize = 1024; + var buffer = new TElement[bufferSize]; + + while (columnReader.HasNext) + { + var read = columnReader.ReadBatch(buffer); + count += read; + } + + return count; + } +} + +// Usage +using var logicalReader = columnReader.LogicalReader(); +var rowCount = logicalReader.Apply(new RowCountReader()); +Console.WriteLine($"Total rows: {rowCount}"); +``` + +## IColumnWriterVisitor + +The `IColumnWriterVisitor` interface provides lower-level access to physical column writers. Use this when you need to work with physical types, definition levels, repetition levels, or encodings. + +### Example: Physical type inspector + +```csharp +// A visitor that reports the physical type being written +sealed class PhysicalTypeWriter : IColumnWriterVisitor +{ + public string OnColumnWriter(ColumnWriter columnWriter) + where TValue : unmanaged + { + var physicalType = typeof(TValue).Name; + Console.WriteLine($"Writing physical type: {physicalType}"); + + // Could perform low-level writes here if needed + // columnWriter.WriteBatch(..., definitionLevels, repetitionLevels); + + return physicalType; + } +} +``` + +## IColumnReaderVisitor + +The `IColumnReaderVisitor` interface provides lower-level access to physical column readers. Use this for low-level operations that require access to definition levels, repetition levels, or physical encodings. + +### Example: Definition level analyzer + +```csharp +// A visitor that counts null values using definition levels +sealed class NullCountReader : IColumnReaderVisitor +{ + public int OnColumnReader(ColumnReader columnReader) + where TValue : unmanaged + { + const int bufferSize = 1024; + var values = new TValue[bufferSize]; + var defLevels = new short[bufferSize]; + var repLevels = new short[bufferSize]; + int nullCount = 0; + + while (columnReader.HasNext) + { + var read = columnReader.ReadBatch(bufferSize, defLevels, repLevels, values, out var valuesRead); + + // Count definition levels that indicate null + for (int i = 0; i < read; i++) + { + if (defLevels[i] < columnReader.ColumnDescriptor.MaxDefinitionLevel) + { + nullCount++; + } + } + } + + return nullCount; + } +} +``` + +## IColumnDescriptorVisitor + +The `IColumnDescriptorVisitor` interface visits column descriptors (schema metadata) without performing any I/O. Use this when you only need to inspect or process schema information. + +### Example: Schema type reporter + +```csharp +// A visitor that generates a human-readable type description +sealed class TypeDescriptionVisitor : IColumnDescriptorVisitor +{ + public string OnColumnDescriptor(ColumnDescriptor descriptor) + where TValue : unmanaged + { + var logicalType = descriptor.LogicalType?.ToString() ?? "none"; + var physicalType = typeof(TValue).Name; + var repetition = descriptor.MaxRepetitionLevel > 0 ? "repeated" : "required"; + + return $"{descriptor.Name}: {logicalType} (physical: {physicalType}, {repetition})"; + } +} + +// Usage +var description = columnDescriptor.Apply(new TypeDescriptionVisitor()); +Console.WriteLine(description); +``` + +### Example: Schema validator + +```csharp +// A visitor that checks if a column meets certain requirements +sealed class SchemaValidator : IColumnDescriptorVisitor +{ + private readonly HashSet _allowedNames; + + public SchemaValidator(HashSet allowedNames) + { + _allowedNames = allowedNames; + } + + public bool OnColumnDescriptor(ColumnDescriptor descriptor) + where TValue : unmanaged + { + // Check if column name is in allowed list + if (!_allowedNames.Contains(descriptor.Name)) + return false; + + // Check if nullable when it shouldn't be + if (descriptor.MaxDefinitionLevel > 0) + { + Console.WriteLine($"Warning: Column {descriptor.Name} is nullable"); + } + + return true; + } +} +``` + +## Complete working example + +Here's a full example demonstrating `ILogicalColumnWriterVisitor` and `ILogicalColumnReaderVisitor` together: + +```csharp +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using ParquetSharp; + +namespace ParquetSharp.Examples +{ + sealed class ExampleWriter : ILogicalColumnWriterVisitor + { + private readonly IDictionary _valuesByColumn; + + public ExampleWriter(IDictionary valuesByColumn) + { + _valuesByColumn = valuesByColumn; + } + + public bool OnLogicalColumnWriter(LogicalColumnWriter columnWriter) + { + if (!_valuesByColumn.TryGetValue(columnWriter.ColumnDescriptor.Path[0], out var raw)) + return false; + + var values = (TValue[])(object)raw; + columnWriter.WriteBatch(values); + return true; + } + } + + sealed class ExampleReader : ILogicalColumnReaderVisitor + { + public string OnLogicalColumnReader(LogicalColumnReader columnReader) + { + var sb = new StringBuilder(); + const int bufferSize = 1024; + var buffer = new TElement[bufferSize]; + + while (columnReader.HasNext) + { + var read = columnReader.ReadBatch(buffer); + for (var i = 0; i < read; ++i) + { + var v = buffer[i]; + sb.Append(v?.ToString() ?? "null"); + sb.Append(", "); + } + } + + if (sb.Length >= 2) sb.Length -= 2; + return sb.ToString(); + } + } + + class VisitorReadWriteExample + { + public static void Run(string path) + { + var columns = new Column[] + { + new Column("Id"), + new Column("Name"), + new Column("Timestamp") + }; + + var ids = new[] { 1, 2, 3 }; + var names = new[] { "Alice", "Bob", "Carol" }; + var times = new[] { DateTime.UtcNow, DateTime.UtcNow.AddMinutes(1), DateTime.UtcNow.AddMinutes(2) }; + + using (var writer = new ParquetFileWriter(path, columns)) + { + using var rowGroup = writer.AppendRowGroup(); + for (int i = 0; i < columns.Length; ++i) + { + using var colWriter = rowGroup.NextColumn(); + var valuesByColumn = new Dictionary + { + { "Id", ids }, + { "Name", names }, + { "Timestamp", times } + }; + + using var logicalWriter = colWriter.LogicalWriter(); + logicalWriter.Apply(new ExampleWriter(valuesByColumn)); + } + writer.Close(); + } + + using var fileReader = new ParquetFileReader(path); + using var rg = fileReader.RowGroup(0); + for (int i = 0; i < fileReader.FileMetaData.NumColumns; ++i) + { + using var colReader = rg.Column(i); + using var logicalReader = colReader.LogicalReader(); + var colString = logicalReader.Apply(new ExampleReader()); + Console.WriteLine($"Column {colReader.ColumnDescriptor.Path[0]}: {colString}"); + } + } + } +} +``` + +## Best practices + +### When to use each visitor type + +- **ILogicalColumnWriterVisitor / ILogicalColumnReaderVisitor**: Use for high-level, type-safe reading and writing when column types are unknown at compile time. Ideal for generic tooling, schema-driven processing, and data exporters. + +- **IColumnWriterVisitor / IColumnReaderVisitor**: Use for low-level operations requiring access to definition levels, repetition levels, or physical encodings. Needed for nested types and null handling. + +- **IColumnDescriptorVisitor**: Use when you only need to inspect schema metadata without performing I/O. Perfect for schema validation, type checking, and metadata extraction. + +### When to avoid visitors + +If you already know the schema at compile time, prefer the generic `LogicalWriter` / `LogicalReader` APIs — they are simpler, faster, and more maintainable. + +### Casting arrays safely + +The `(TValue[])(object)array` cast pattern is safe when the visitor is invoked with the concrete `TValue` type that matches your stored array element type. Always ensure your stored arrays match the declared column types to avoid runtime exceptions. \ No newline at end of file diff --git a/docs/guides/Writing.md b/docs/guides/Writing.md index 728da5d7..66e246e0 100644 --- a/docs/guides/Writing.md +++ b/docs/guides/Writing.md @@ -121,28 +121,8 @@ There is also a `ColumnWriter.LogicalWriterOverride` method, which supports writ to the default .NET type corresponding to the column's logical type. For more information on how to use this, see the [type factories documentation](TypeFactories.md). -If you don't know ahead of time the column types that will be written, -you can implement the `ILogicalColumnWriterVisitor` interface to handle writing data in a type-safe way: - -```csharp -sealed class ExampleWriter : ILogicalColumnWriterVisitor -{ - public bool OnLogicalColumnWriter(LogicalColumnWriter columnWriter) - { - TValue[] values = GetValues(); - columnWriter.WriteBatch(values); - return true; - } -} - -using RowGroupWriter rowGroup = file.AppendRowGroup(); -for (int columnIndex = 0; columnIndex < file.NumColumns; ++columnIndex) -{ - using var columnWriter = rowGroup.NextColumn(); - using var logicalWriter = columnWriter.LogicalWriter(); - var returnVal = logicalWriter.Apply(new ExampleWriter()); -} -``` +If you don't know ahead of time the column types that will be written, see the visitor-pattern guide: +[Visitor patterns: reading & writing with unknown column types](VisitorPatterns.md) — it includes a full example demonstrating writing and then reading a file with mixed column types using `ILogicalColumnWriterVisitor` and `ILogicalColumnReaderVisitor`. ### Closing the ParquetFileWriter diff --git a/docs/guides/toc.yml b/docs/guides/toc.yml index bcaa9594..7b4db103 100644 --- a/docs/guides/toc.yml +++ b/docs/guides/toc.yml @@ -16,3 +16,5 @@ href: TimeSpan.md - name: Use from PowerShell href: PowerShell.md +- name: Visitor Patterns + href: VisitorPatterns.md From 000ae91a3bf765910f57d31ff99e0225acb5dc7c Mon Sep 17 00:00:00 2001 From: haianhng31 Date: Wed, 19 Nov 2025 10:00:46 +0000 Subject: [PATCH 02/13] Update doc --- docs/guides/VisitorPatterns.md | 67 ++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index 44521ba7..fb5b550a 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -56,15 +56,24 @@ sealed class NumericOnlyWriter : ILogicalColumnWriterVisitor public bool OnLogicalColumnWriter(LogicalColumnWriter columnWriter) { - // Only write if TValue is a numeric type - if (typeof(TValue) == typeof(int) || typeof(TValue) == typeof(double) || - typeof(TValue) == typeof(float) || typeof(TValue) == typeof(long)) + TValue val; + if (typeof(TValue) == typeof(int) || + typeof(TValue) == typeof(double) || + typeof(TValue) == typeof(float) || + typeof(TValue) == typeof(long)) { - var values = new TValue[] { (TValue)(object)_fillValue }; - columnWriter.WriteBatch(values); - return true; + // Convert _fillValue to the correct TValue + val = (TValue)Convert.ChangeType(_fillValue, typeof(TValue)); + } + else + { + // write default(TValue) so the row count matches + val = default!; } - return false; + + var arr = new TValue[] { val }; + columnWriter.WriteBatch(arr); + return true; } } ``` @@ -90,8 +99,7 @@ sealed class ColumnToStringReader : ILogicalColumnReaderVisitor var read = columnReader.ReadBatch(buffer); for (var i = 0; i < read; ++i) { - var v = buffer[i]; - sb.Append(v?.ToString() ?? "null"); + sb.Append(buffer[i]?.ToString() ?? "null"); sb.Append(", "); } } @@ -121,8 +129,7 @@ sealed class RowCountReader : ILogicalColumnReaderVisitor while (columnReader.HasNext) { - var read = columnReader.ReadBatch(buffer); - count += read; + count += columnReader.ReadBatch(buffer); } return count; @@ -207,14 +214,24 @@ The `IColumnDescriptorVisitor` interface visits column descriptors (sch // A visitor that generates a human-readable type description sealed class TypeDescriptionVisitor : IColumnDescriptorVisitor { - public string OnColumnDescriptor(ColumnDescriptor descriptor) - where TValue : unmanaged + public string OnColumnDescriptor(ColumnDescriptor descriptor) { - var logicalType = descriptor.LogicalType?.ToString() ?? "none"; - var physicalType = typeof(TValue).Name; - var repetition = descriptor.MaxRepetitionLevel > 0 ? "repeated" : "required"; - - return $"{descriptor.Name}: {logicalType} (physical: {physicalType}, {repetition})"; + var logical = descriptor.LogicalType?.ToString() ?? "none"; + var physical = descriptor.PhysicalType.ToString(); + + return $"{descriptor.Name}: logical={logical}, physical={physical}"; + } + + public string OnColumnDescriptor() + where TPhysical : unmanaged + { + // This method is for complex (logical) types. + // You can return something generic, or customize it as needed. + var physical = typeof(TPhysical).Name; + var logical = typeof(TLogical).Name; + var element = typeof(TElement).Name; + + return $"ComplexType: physical={physical}, logical={logical}, element={element}"; } } @@ -236,8 +253,7 @@ sealed class SchemaValidator : IColumnDescriptorVisitor _allowedNames = allowedNames; } - public bool OnColumnDescriptor(ColumnDescriptor descriptor) - where TValue : unmanaged + public bool OnColumnDescriptor(ColumnDescriptor descriptor) { // Check if column name is in allowed list if (!_allowedNames.Contains(descriptor.Name)) @@ -251,6 +267,17 @@ sealed class SchemaValidator : IColumnDescriptorVisitor return true; } + + public bool OnColumnDescriptor() where TPhysical : unmanaged + { + + var physical = typeof(TPhysical).Name; + var logical = typeof(TLogical).Name; + var element = typeof(TElement).Name; + + Console.WriteLine($"ComplexType: physical={physical}, logical={logical}, element={element}"); + throw new NotImplementedException(); + } } ``` From c09483a7d780b80941bcdcada2c3a3e5bbaf958f Mon Sep 17 00:00:00 2001 From: haianhng31 Date: Thu, 20 Nov 2025 10:06:32 +0000 Subject: [PATCH 03/13] Update doc --- docs/guides/VisitorPatterns.md | 178 --------------------------------- 1 file changed, 178 deletions(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index fb5b550a..b3e67d37 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -208,184 +208,6 @@ sealed class NullCountReader : IColumnReaderVisitor The `IColumnDescriptorVisitor` interface visits column descriptors (schema metadata) without performing any I/O. Use this when you only need to inspect or process schema information. -### Example: Schema type reporter - -```csharp -// A visitor that generates a human-readable type description -sealed class TypeDescriptionVisitor : IColumnDescriptorVisitor -{ - public string OnColumnDescriptor(ColumnDescriptor descriptor) - { - var logical = descriptor.LogicalType?.ToString() ?? "none"; - var physical = descriptor.PhysicalType.ToString(); - - return $"{descriptor.Name}: logical={logical}, physical={physical}"; - } - - public string OnColumnDescriptor() - where TPhysical : unmanaged - { - // This method is for complex (logical) types. - // You can return something generic, or customize it as needed. - var physical = typeof(TPhysical).Name; - var logical = typeof(TLogical).Name; - var element = typeof(TElement).Name; - - return $"ComplexType: physical={physical}, logical={logical}, element={element}"; - } -} - -// Usage -var description = columnDescriptor.Apply(new TypeDescriptionVisitor()); -Console.WriteLine(description); -``` - -### Example: Schema validator - -```csharp -// A visitor that checks if a column meets certain requirements -sealed class SchemaValidator : IColumnDescriptorVisitor -{ - private readonly HashSet _allowedNames; - - public SchemaValidator(HashSet allowedNames) - { - _allowedNames = allowedNames; - } - - public bool OnColumnDescriptor(ColumnDescriptor descriptor) - { - // Check if column name is in allowed list - if (!_allowedNames.Contains(descriptor.Name)) - return false; - - // Check if nullable when it shouldn't be - if (descriptor.MaxDefinitionLevel > 0) - { - Console.WriteLine($"Warning: Column {descriptor.Name} is nullable"); - } - - return true; - } - - public bool OnColumnDescriptor() where TPhysical : unmanaged - { - - var physical = typeof(TPhysical).Name; - var logical = typeof(TLogical).Name; - var element = typeof(TElement).Name; - - Console.WriteLine($"ComplexType: physical={physical}, logical={logical}, element={element}"); - throw new NotImplementedException(); - } -} -``` - -## Complete working example - -Here's a full example demonstrating `ILogicalColumnWriterVisitor` and `ILogicalColumnReaderVisitor` together: - -```csharp -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; -using ParquetSharp; - -namespace ParquetSharp.Examples -{ - sealed class ExampleWriter : ILogicalColumnWriterVisitor - { - private readonly IDictionary _valuesByColumn; - - public ExampleWriter(IDictionary valuesByColumn) - { - _valuesByColumn = valuesByColumn; - } - - public bool OnLogicalColumnWriter(LogicalColumnWriter columnWriter) - { - if (!_valuesByColumn.TryGetValue(columnWriter.ColumnDescriptor.Path[0], out var raw)) - return false; - - var values = (TValue[])(object)raw; - columnWriter.WriteBatch(values); - return true; - } - } - - sealed class ExampleReader : ILogicalColumnReaderVisitor - { - public string OnLogicalColumnReader(LogicalColumnReader columnReader) - { - var sb = new StringBuilder(); - const int bufferSize = 1024; - var buffer = new TElement[bufferSize]; - - while (columnReader.HasNext) - { - var read = columnReader.ReadBatch(buffer); - for (var i = 0; i < read; ++i) - { - var v = buffer[i]; - sb.Append(v?.ToString() ?? "null"); - sb.Append(", "); - } - } - - if (sb.Length >= 2) sb.Length -= 2; - return sb.ToString(); - } - } - - class VisitorReadWriteExample - { - public static void Run(string path) - { - var columns = new Column[] - { - new Column("Id"), - new Column("Name"), - new Column("Timestamp") - }; - - var ids = new[] { 1, 2, 3 }; - var names = new[] { "Alice", "Bob", "Carol" }; - var times = new[] { DateTime.UtcNow, DateTime.UtcNow.AddMinutes(1), DateTime.UtcNow.AddMinutes(2) }; - - using (var writer = new ParquetFileWriter(path, columns)) - { - using var rowGroup = writer.AppendRowGroup(); - for (int i = 0; i < columns.Length; ++i) - { - using var colWriter = rowGroup.NextColumn(); - var valuesByColumn = new Dictionary - { - { "Id", ids }, - { "Name", names }, - { "Timestamp", times } - }; - - using var logicalWriter = colWriter.LogicalWriter(); - logicalWriter.Apply(new ExampleWriter(valuesByColumn)); - } - writer.Close(); - } - - using var fileReader = new ParquetFileReader(path); - using var rg = fileReader.RowGroup(0); - for (int i = 0; i < fileReader.FileMetaData.NumColumns; ++i) - { - using var colReader = rg.Column(i); - using var logicalReader = colReader.LogicalReader(); - var colString = logicalReader.Apply(new ExampleReader()); - Console.WriteLine($"Column {colReader.ColumnDescriptor.Path[0]}: {colString}"); - } - } - } -} -``` - ## Best practices ### When to use each visitor type From 8acc83bfb0edf3ddf10c041568339407c51a5783 Mon Sep 17 00:00:00 2001 From: "Hai-Anh (Hennessy) Nguyen" <126405175+haianhng31@users.noreply.github.com> Date: Sat, 22 Nov 2025 21:46:57 +0800 Subject: [PATCH 04/13] Update docs/guides/VisitorPatterns.md Co-authored-by: Adam Reeve --- docs/guides/VisitorPatterns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index b3e67d37..292a9ee1 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -206,7 +206,7 @@ sealed class NullCountReader : IColumnReaderVisitor ## IColumnDescriptorVisitor -The `IColumnDescriptorVisitor` interface visits column descriptors (schema metadata) without performing any I/O. Use this when you only need to inspect or process schema information. +The @ParquetSharp.IColumnDescriptorVisitor`1 interface visits column descriptors (schema metadata) without performing any I/O. Use this when you only need to inspect or process schema information. ## Best practices From ee9ec51ef3a190e65f60e5f79a7673ea0c8d0235 Mon Sep 17 00:00:00 2001 From: "Hai-Anh (Hennessy) Nguyen" <126405175+haianhng31@users.noreply.github.com> Date: Sat, 22 Nov 2025 21:47:47 +0800 Subject: [PATCH 05/13] Update docs/guides/VisitorPatterns.md Co-authored-by: Adam Reeve --- docs/guides/VisitorPatterns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index 292a9ee1..f800763a 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -168,7 +168,7 @@ sealed class PhysicalTypeWriter : IColumnWriterVisitor ## IColumnReaderVisitor -The `IColumnReaderVisitor` interface provides lower-level access to physical column readers. Use this for low-level operations that require access to definition levels, repetition levels, or physical encodings. +The @ParquetSharp.IColumnReaderVisitor`1 interface provides lower-level access to physical column readers. Use this for low-level operations that require access to definition levels, repetition levels, or physical encodings. ### Example: Definition level analyzer From cdeec256111770684370e990bbdc1da7124281c4 Mon Sep 17 00:00:00 2001 From: "Hai-Anh (Hennessy) Nguyen" <126405175+haianhng31@users.noreply.github.com> Date: Sat, 22 Nov 2025 21:47:53 +0800 Subject: [PATCH 06/13] Update docs/guides/VisitorPatterns.md Co-authored-by: Adam Reeve --- docs/guides/VisitorPatterns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index f800763a..c5514504 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -144,7 +144,7 @@ Console.WriteLine($"Total rows: {rowCount}"); ## IColumnWriterVisitor -The `IColumnWriterVisitor` interface provides lower-level access to physical column writers. Use this when you need to work with physical types, definition levels, repetition levels, or encodings. +The @ParquetSharp.IColumnWriterVisitor`1 interface provides lower-level access to physical column writers. Use this when you need to work with physical types, definition levels, repetition levels, or encodings. ### Example: Physical type inspector From 1516b4640c906f0d991d2807d9f8316ae05842ea Mon Sep 17 00:00:00 2001 From: "Hai-Anh (Hennessy) Nguyen" <126405175+haianhng31@users.noreply.github.com> Date: Sat, 22 Nov 2025 21:48:01 +0800 Subject: [PATCH 07/13] Update docs/guides/VisitorPatterns.md Co-authored-by: Adam Reeve --- docs/guides/VisitorPatterns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index c5514504..c3b4ff6e 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -80,7 +80,7 @@ sealed class NumericOnlyWriter : ILogicalColumnWriterVisitor ## ILogicalColumnReaderVisitor -The `ILogicalColumnReaderVisitor` interface is invoked for logical readers (high-level typed readers). Use this when you need to read data from columns of unknown types. +The @ParquetSharp.ILogicalColumnReaderVisitor`1 interface is invoked for logical readers (high-level typed readers). Use this when you need to read data from columns of unknown types. ### Example: Convert columns to strings From ad9493efbcda4a51b21722cb487653d35038b88e Mon Sep 17 00:00:00 2001 From: "Hai-Anh (Hennessy) Nguyen" <126405175+haianhng31@users.noreply.github.com> Date: Sat, 22 Nov 2025 21:48:40 +0800 Subject: [PATCH 08/13] Update docs/guides/VisitorPatterns.md Co-authored-by: Adam Reeve --- docs/guides/VisitorPatterns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index c3b4ff6e..1f301bd6 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -220,7 +220,7 @@ The @ParquetSharp.IColumnDescriptorVisitor`1 interface visits column descriptors ### When to avoid visitors -If you already know the schema at compile time, prefer the generic `LogicalWriter` / `LogicalReader` APIs — they are simpler, faster, and more maintainable. +If you already know the schema at compile time, prefer the generic `LogicalWriter` / `LogicalReader` APIs — they are simpler and more maintainable. ### Casting arrays safely From 88e6737f3884ec77ddd59b544f8c39ebf000dd14 Mon Sep 17 00:00:00 2001 From: "Hai-Anh (Hennessy) Nguyen" <126405175+haianhng31@users.noreply.github.com> Date: Sat, 22 Nov 2025 21:49:09 +0800 Subject: [PATCH 09/13] Update docs/guides/VisitorPatterns.md Co-authored-by: Adam Reeve --- docs/guides/VisitorPatterns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index 1f301bd6..ebc96be2 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -214,7 +214,7 @@ The @ParquetSharp.IColumnDescriptorVisitor`1 interface visits column descriptors - **ILogicalColumnWriterVisitor / ILogicalColumnReaderVisitor**: Use for high-level, type-safe reading and writing when column types are unknown at compile time. Ideal for generic tooling, schema-driven processing, and data exporters. -- **IColumnWriterVisitor / IColumnReaderVisitor**: Use for low-level operations requiring access to definition levels, repetition levels, or physical encodings. Needed for nested types and null handling. +- **IColumnWriterVisitor / IColumnReaderVisitor**: Use for low-level operations requiring access to definition levels, repetition levels, or physical encodings. - **IColumnDescriptorVisitor**: Use when you only need to inspect schema metadata without performing I/O. Perfect for schema validation, type checking, and metadata extraction. From 5530ac7bb1e216dc5276959d0ec5a324cd50de23 Mon Sep 17 00:00:00 2001 From: "Hai-Anh (Hennessy) Nguyen" <126405175+haianhng31@users.noreply.github.com> Date: Sat, 22 Nov 2025 21:49:50 +0800 Subject: [PATCH 10/13] Update docs/guides/VisitorPatterns.md Co-authored-by: Adam Reeve --- docs/guides/VisitorPatterns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index ebc96be2..f98f87f5 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -4,7 +4,7 @@ ParquetSharp exposes a number of "visitor" interfaces that make it convenient to ## ILogicalColumnWriterVisitor -The `ILogicalColumnWriterVisitor` interface is invoked for logical writers (high-level typed writers). Use this when you need to write data to columns but don't know the column types at compile time. +The @ParquetSharp.ILogicalColumnWriterVisitor`1 interface is invoked for logical writers (high-level typed writers). Use this when you need to write data to columns but don't know the column types at compile time. ### Example: Generic column writer From 99b6f5c48c607ea5ddfb9abc68c7de13c4e97ffe Mon Sep 17 00:00:00 2001 From: haianhng31 Date: Sat, 22 Nov 2025 14:14:12 +0000 Subject: [PATCH 11/13] Update documentation --- docs/guides/VisitorPatterns.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index f98f87f5..5c629ca0 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -14,7 +14,7 @@ sealed class GenericColumnWriter : ILogicalColumnWriterVisitor { private readonly IDictionary _valuesByColumn; - public GenericColumnWriter(IDictionary valuesByColumn) + public GenericColumnWriter(IDictionary valuesByColumn) { _valuesByColumn = valuesByColumn; } @@ -22,11 +22,11 @@ sealed class GenericColumnWriter : ILogicalColumnWriterVisitor public bool OnLogicalColumnWriter(LogicalColumnWriter columnWriter) { // Look up values for this column name - if (!_valuesByColumn.TryGetValue(columnWriter.ColumnDescriptor.Path[0], out var raw)) + if (!_valuesByColumn.TryGetValue(columnWriter.ColumnDescriptor.ToDotString(), out var raw)) return false; // Cast through object to TValue[] for WriteBatch - var values = (TValue[])(object)raw; + var values = (TValue[])raw; columnWriter.WriteBatch(values); return true; } From 27dd2efd8e4818a0d11cbf2bc8dc7e4092527c50 Mon Sep 17 00:00:00 2001 From: haianhng31 Date: Sat, 22 Nov 2025 14:18:58 +0000 Subject: [PATCH 12/13] Update documentation --- docs/guides/VisitorPatterns.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index 5c629ca0..c629a662 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -44,6 +44,11 @@ using var logicalWriter = columnWriter.LogicalWriter(); var success = logicalWriter.Apply(new GenericColumnWriter(valuesByColumn)); ``` +#### Casting arrays safely + +The `(TValue[])(object)array` cast pattern is safe when the visitor is invoked with the concrete `TValue` type that matches your stored array element type. Always ensure your stored arrays match the declared column types to avoid runtime exceptions. + + ### Example: Conditional writer based on type ```csharp @@ -220,8 +225,4 @@ The @ParquetSharp.IColumnDescriptorVisitor`1 interface visits column descriptors ### When to avoid visitors -If you already know the schema at compile time, prefer the generic `LogicalWriter` / `LogicalReader` APIs — they are simpler and more maintainable. - -### Casting arrays safely - -The `(TValue[])(object)array` cast pattern is safe when the visitor is invoked with the concrete `TValue` type that matches your stored array element type. Always ensure your stored arrays match the declared column types to avoid runtime exceptions. \ No newline at end of file +If you already know the schema at compile time, prefer the generic `LogicalWriter` / `LogicalReader` APIs — they are simpler and more maintainable. \ No newline at end of file From 9a4bfe27b7f5f758e10ff81d27f6bcf9785976d2 Mon Sep 17 00:00:00 2001 From: haianhng31 Date: Mon, 24 Nov 2025 02:21:20 +0000 Subject: [PATCH 13/13] Update documentation --- docs/guides/VisitorPatterns.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/guides/VisitorPatterns.md b/docs/guides/VisitorPatterns.md index c629a662..bed8d4dd 100644 --- a/docs/guides/VisitorPatterns.md +++ b/docs/guides/VisitorPatterns.md @@ -12,7 +12,7 @@ The @ParquetSharp.ILogicalColumnWriterVisitor`1 interface is invoked for logical // A visitor that writes arrays of values to any column type sealed class GenericColumnWriter : ILogicalColumnWriterVisitor { - private readonly IDictionary _valuesByColumn; + private readonly IDictionary _valuesByColumn; public GenericColumnWriter(IDictionary valuesByColumn) { @@ -22,10 +22,9 @@ sealed class GenericColumnWriter : ILogicalColumnWriterVisitor public bool OnLogicalColumnWriter(LogicalColumnWriter columnWriter) { // Look up values for this column name - if (!_valuesByColumn.TryGetValue(columnWriter.ColumnDescriptor.ToDotString(), out var raw)) + if (!_valuesByColumn.TryGetValue(columnWriter.ColumnDescriptor.Path.ToDotString(), out var raw)) return false; - // Cast through object to TValue[] for WriteBatch var values = (TValue[])raw; columnWriter.WriteBatch(values); return true; @@ -33,7 +32,7 @@ sealed class GenericColumnWriter : ILogicalColumnWriterVisitor } // Usage -var valuesByColumn = new Dictionary +var valuesByColumn = new Dictionary { { "Id", new[] { 1, 2, 3 } }, { "Name", new[] { "Alice", "Bob", "Carol" } }, @@ -46,7 +45,7 @@ var success = logicalWriter.Apply(new GenericColumnWriter(valuesByColumn)); #### Casting arrays safely -The `(TValue[])(object)array` cast pattern is safe when the visitor is invoked with the concrete `TValue` type that matches your stored array element type. Always ensure your stored arrays match the declared column types to avoid runtime exceptions. +The `(TValue[])array` cast pattern is safe when the visitor is invoked with the concrete `TValue` type that matches your stored array element type. Always ensure your stored arrays match the declared column types to avoid runtime exceptions. ### Example: Conditional writer based on type