|
1 | | - |
| 1 | + |
2 | 2 |
|
3 | 3 | ## Introduction |
4 | 4 |
|
@@ -35,56 +35,126 @@ Supported platforms: |
35 | 35 |
|
36 | 36 | The following examples show how to write and then read a Parquet file with three columns representing a timeseries of object-value pairs. |
37 | 37 | These use the low-level API, which is the recommended API for working with native .NET types and closely maps to the API of Apache Parquet C++. |
38 | | -For reading and writing data in the [Apache Arrow](https://arrow.apache.org/) format, an [Arrow based API](docs/Arrow.md) is also provided. |
| 38 | +For reading and writing data in the [Apache Arrow](https://arrow.apache.org/) format, an [Arrow-based API](docs/Arrow.md) is also provided. |
39 | 39 |
|
40 | | -### How to write a Parquet File: |
| 40 | +### 1. Initialize a new project |
41 | 41 |
|
42 | | -```csharp |
43 | | -var timestamps = new DateTime[] { /* ... */ }; |
44 | | -var objectIds = new int[] { /* ... */ }; |
45 | | -var values = new float[] { /* ... */ }; |
| 42 | +First, let's create a new console application: |
46 | 43 |
|
47 | | -var columns = new Column[] |
48 | | -{ |
49 | | - new Column<DateTime>("Timestamp"), |
50 | | - new Column<int>("ObjectId"), |
51 | | - new Column<float>("Value") |
52 | | -}; |
| 44 | +```bash |
| 45 | +dotnet new console -n ParquetExample |
| 46 | +cd ParquetExample |
| 47 | +``` |
53 | 48 |
|
54 | | -using var file = new ParquetFileWriter("float_timeseries.parquet", columns); |
55 | | -using var rowGroup = file.AppendRowGroup(); |
| 49 | +In your project directory, you'll find a `Program.cs` file that we'll use to write a Parquet file, and then read it back. |
56 | 50 |
|
57 | | -using (var timestampWriter = rowGroup.NextColumn().LogicalWriter<DateTime>()) |
58 | | -{ |
59 | | - timestampWriter.WriteBatch(timestamps); |
60 | | -} |
61 | | -using (var objectIdWriter = rowGroup.NextColumn().LogicalWriter<int>()) |
62 | | -{ |
63 | | - objectIdWriter.WriteBatch(objectIds); |
64 | | -} |
65 | | -using (var valueWriter = rowGroup.NextColumn().LogicalWriter<float>()) |
| 51 | +### 2. Install ParquetSharp |
| 52 | + |
| 53 | +ParquetSharp is available as a [NuGet package](https://www.nuget.org/packages/ParquetSharp/). You can install it using the following command: |
| 54 | + |
| 55 | +```bash |
| 56 | +dotnet add package ParquetSharp |
| 57 | +``` |
| 58 | + |
| 59 | +### 3. Write a Parquet File |
| 60 | + |
| 61 | +This example shows how to write a Parquet file with three columns: `Timestamp`, `ObjectId`, and `Value`. |
| 62 | + |
| 63 | +Update your `Program.cs` with the following code: |
| 64 | + |
| 65 | +```csharp |
| 66 | +using System; |
| 67 | +using ParquetSharp; |
| 68 | + |
| 69 | +class Program |
66 | 70 | { |
67 | | - valueWriter.WriteBatch(values); |
| 71 | + static void Main() |
| 72 | + { |
| 73 | + var timestamps = new DateTime[] { DateTime.Now, DateTime.Now.AddMinutes(1) }; |
| 74 | + var objectIds = new int[] { 1, 2 }; |
| 75 | + var values = new float[] { 1.23f, 4.56f }; |
| 76 | + |
| 77 | + var columns = new Column[] |
| 78 | + { |
| 79 | + new Column<DateTime>("Timestamp"), |
| 80 | + new Column<int>("ObjectId"), |
| 81 | + new Column<float>("Value") |
| 82 | + }; |
| 83 | + |
| 84 | + using var file = new ParquetFileWriter("float_timeseries.parquet", columns); |
| 85 | + using var rowGroup = file.AppendRowGroup(); |
| 86 | + |
| 87 | + using (var timestampWriter = rowGroup.NextColumn().LogicalWriter<DateTime>()) |
| 88 | + { |
| 89 | + timestampWriter.WriteBatch(timestamps); |
| 90 | + } |
| 91 | + using (var objectIdWriter = rowGroup.NextColumn().LogicalWriter<int>()) |
| 92 | + { |
| 93 | + objectIdWriter.WriteBatch(objectIds); |
| 94 | + } |
| 95 | + using (var valueWriter = rowGroup.NextColumn().LogicalWriter<float>()) |
| 96 | + { |
| 97 | + valueWriter.WriteBatch(values); |
| 98 | + } |
| 99 | + |
| 100 | + file.Close(); |
| 101 | + Console.WriteLine("Parquet file written successfully!"); |
| 102 | + } |
68 | 103 | } |
| 104 | +``` |
69 | 105 |
|
70 | | -file.Close(); |
| 106 | +You can execute it with: |
| 107 | + |
| 108 | +```bash |
| 109 | +dotnet run |
71 | 110 | ``` |
72 | 111 |
|
73 | | -### How to read a Parquet file: |
| 112 | +### 4. Read a Parquet File |
74 | 113 |
|
75 | | -```csharp |
76 | | -using var file = new ParquetFileReader("float_timeseries.parquet"); |
| 114 | +After writing the Parquet file, we can read it back by updating the `Program.cs` file with the following code: |
77 | 115 |
|
78 | | -for (int rowGroup = 0; rowGroup < file.FileMetaData.NumRowGroups; ++rowGroup) { |
79 | | - using var rowGroupReader = file.RowGroup(rowGroup); |
80 | | - var groupNumRows = checked((int) rowGroupReader.MetaData.NumRows); |
| 116 | +```csharp |
| 117 | +using System; |
| 118 | +using ParquetSharp; |
81 | 119 |
|
82 | | - var groupTimestamps = rowGroupReader.Column(0).LogicalReader<DateTime>().ReadAll(groupNumRows); |
83 | | - var groupObjectIds = rowGroupReader.Column(1).LogicalReader<int>().ReadAll(groupNumRows); |
84 | | - var groupValues = rowGroupReader.Column(2).LogicalReader<float>().ReadAll(groupNumRows); |
| 120 | +class Program |
| 121 | +{ |
| 122 | + static void Main() |
| 123 | + { |
| 124 | + using var file = new ParquetFileReader("float_timeseries.parquet"); |
| 125 | + |
| 126 | + for (int rowGroup = 0; rowGroup < file.FileMetaData.NumRowGroups; ++rowGroup) |
| 127 | + { |
| 128 | + using var rowGroupReader = file.RowGroup(rowGroup); |
| 129 | + var groupNumRows = checked((int)rowGroupReader.MetaData.NumRows); |
| 130 | + |
| 131 | + var groupTimestamps = rowGroupReader.Column(0).LogicalReader<DateTime>().ReadAll(groupNumRows); |
| 132 | + var groupObjectIds = rowGroupReader.Column(1).LogicalReader<int>().ReadAll(groupNumRows); |
| 133 | + var groupValues = rowGroupReader.Column(2).LogicalReader<float>().ReadAll(groupNumRows); |
| 134 | + |
| 135 | + Console.WriteLine("Read Parquet file:"); |
| 136 | + for (int i = 0; i < groupNumRows; ++i) |
| 137 | + { |
| 138 | + Console.WriteLine($"Timestamp: {groupTimestamps[i]}, ObjectId: {groupObjectIds[i]}, Value: {groupValues[i]}"); |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + file.Close(); |
| 143 | + } |
85 | 144 | } |
| 145 | +``` |
86 | 146 |
|
87 | | -file.Close(); |
| 147 | +Once again, run the program with: |
| 148 | + |
| 149 | +```bash |
| 150 | +dotnet run |
| 151 | +``` |
| 152 | + |
| 153 | +This should give you an output similar to: |
| 154 | +``` |
| 155 | +Read Parquet file: |
| 156 | +Timestamp: 2025-01-25 10:15:25 AM, ObjectId: 1, Value: 1.23 |
| 157 | +Timestamp: 2025-01-25 10:16:25 AM, ObjectId: 2, Value: 4.56 |
88 | 158 | ``` |
89 | 159 |
|
90 | 160 | ## Documentation |
|
0 commit comments