3333import java .util .stream .Collectors ;
3434import java .util .stream .StreamSupport ;
3535import org .apache .iceberg .data .GenericRecord ;
36+ import org .apache .iceberg .expressions .Literal ;
3637import org .apache .iceberg .io .CloseableIterable ;
3738import org .apache .iceberg .io .FileAppender ;
3839import org .apache .iceberg .io .InputFile ;
4243import org .apache .iceberg .relocated .com .google .common .collect .Lists ;
4344import org .apache .iceberg .relocated .com .google .common .collect .Queues ;
4445import org .apache .iceberg .types .Comparators ;
46+ import org .apache .iceberg .types .Types ;
4547import org .apache .iceberg .types .Types .IntegerType ;
4648import org .apache .iceberg .types .Types .LongType ;
4749import org .apache .iceberg .types .Types .NestedField ;
@@ -89,17 +91,57 @@ private PartitionStatsHandler() {}
8991 NestedField .optional (11 , "last_updated_at" , LongType .get ());
9092 public static final NestedField LAST_UPDATED_SNAPSHOT_ID =
9193 NestedField .optional (12 , "last_updated_snapshot_id" , LongType .get ());
94+ // Using default value for v3 field to support v3 reader reading file written by v2
95+ public static final NestedField DV_COUNT =
96+ NestedField .required ("dv_count" )
97+ .withId (13 )
98+ .ofType (Types .IntegerType .get ())
99+ .withInitialDefault (Literal .of (0 ))
100+ .withWriteDefault (Literal .of (0 ))
101+ .build ();
92102
93103 /**
94104 * Generates the partition stats file schema based on a combined partition type which considers
95105 * all specs in a table.
96106 *
107+ * <p>Use this only for format version 1 and 2. For version 3 and above use {@link
108+ * #schema(StructType, int)}
109+ *
97110 * @param unifiedPartitionType unified partition schema type. Could be calculated by {@link
98111 * Partitioning#partitionType(Table)}.
99112 * @return a schema that corresponds to the provided unified partition type.
113+ * @deprecated since 1.10.0, will be removed in 1.11.0. Use {@link #schema(StructType, int)}
114+ * instead.
100115 */
116+ @ Deprecated
101117 public static Schema schema (StructType unifiedPartitionType ) {
102118 Preconditions .checkState (!unifiedPartitionType .fields ().isEmpty (), "Table must be partitioned" );
119+ return v2Schema (unifiedPartitionType );
120+ }
121+
122+ /**
123+ * Generates the partition stats file schema for a given format version based on a combined
124+ * partition type which considers all specs in a table.
125+ *
126+ * @param unifiedPartitionType unified partition schema type. Could be calculated by {@link
127+ * Partitioning#partitionType(Table)}.
128+ * @return a schema that corresponds to the provided unified partition type.
129+ */
130+ public static Schema schema (StructType unifiedPartitionType , int formatVersion ) {
131+ Preconditions .checkState (!unifiedPartitionType .fields ().isEmpty (), "Table must be partitioned" );
132+ Preconditions .checkState (
133+ formatVersion > 0 && formatVersion <= TableMetadata .SUPPORTED_TABLE_FORMAT_VERSION ,
134+ "Invalid format version: %d" ,
135+ formatVersion );
136+
137+ if (formatVersion <= 2 ) {
138+ return v2Schema (unifiedPartitionType );
139+ }
140+
141+ return v3Schema (unifiedPartitionType );
142+ }
143+
144+ private static Schema v2Schema (StructType unifiedPartitionType ) {
103145 return new Schema (
104146 NestedField .required (PARTITION_FIELD_ID , PARTITION_FIELD_NAME , unifiedPartitionType ),
105147 SPEC_ID ,
@@ -115,6 +157,35 @@ public static Schema schema(StructType unifiedPartitionType) {
115157 LAST_UPDATED_SNAPSHOT_ID );
116158 }
117159
160+ private static Schema v3Schema (StructType unifiedPartitionType ) {
161+ return new Schema (
162+ NestedField .required (PARTITION_FIELD_ID , PARTITION_FIELD_NAME , unifiedPartitionType ),
163+ SPEC_ID ,
164+ DATA_RECORD_COUNT ,
165+ DATA_FILE_COUNT ,
166+ TOTAL_DATA_FILE_SIZE_IN_BYTES ,
167+ NestedField .required (
168+ POSITION_DELETE_RECORD_COUNT .fieldId (),
169+ POSITION_DELETE_RECORD_COUNT .name (),
170+ LongType .get ()),
171+ NestedField .required (
172+ POSITION_DELETE_FILE_COUNT .fieldId (),
173+ POSITION_DELETE_FILE_COUNT .name (),
174+ IntegerType .get ()),
175+ NestedField .required (
176+ EQUALITY_DELETE_RECORD_COUNT .fieldId (),
177+ EQUALITY_DELETE_RECORD_COUNT .name (),
178+ LongType .get ()),
179+ NestedField .required (
180+ EQUALITY_DELETE_FILE_COUNT .fieldId (),
181+ EQUALITY_DELETE_FILE_COUNT .name (),
182+ IntegerType .get ()),
183+ TOTAL_RECORD_COUNT ,
184+ LAST_UPDATED_AT ,
185+ LAST_UPDATED_SNAPSHOT_ID ,
186+ DV_COUNT );
187+ }
188+
118189 /**
119190 * Computes the stats incrementally after the snapshot that has partition stats file till the
120191 * current snapshot and writes the combined result into a {@link PartitionStatisticsFile} after
@@ -190,7 +261,10 @@ public static PartitionStatisticsFile computeAndWriteStatsFile(Table table, long
190261
191262 List <PartitionStats > sortedStats = sortStatsByPartition (stats , partitionType );
192263 return writePartitionStatsFile (
193- table , snapshot .snapshotId (), schema (partitionType ), sortedStats );
264+ table ,
265+ snapshot .snapshotId (),
266+ schema (partitionType , TableUtil .formatVersion (table )),
267+ sortedStats );
194268 }
195269
196270 @ VisibleForTesting
@@ -269,13 +343,13 @@ private static Collection<PartitionStats> computeAndMergeStatsIncremental(
269343 Table table ,
270344 Snapshot snapshot ,
271345 StructType partitionType ,
272- PartitionStatisticsFile previousStatsFile )
273- throws IOException {
346+ PartitionStatisticsFile previousStatsFile ) {
274347 PartitionMap <PartitionStats > statsMap = PartitionMap .create (table .specs ());
275348 // read previous stats, note that partition field will be read as GenericRecord
276349 try (CloseableIterable <PartitionStats > oldStats =
277350 readPartitionStatsFile (
278- schema (partitionType ), table .io ().newInputFile (previousStatsFile .path ()))) {
351+ schema (partitionType , TableUtil .formatVersion (table )),
352+ table .io ().newInputFile (previousStatsFile .path ()))) {
279353 oldStats .forEach (
280354 partitionStats ->
281355 statsMap .put (partitionStats .specId (), partitionStats .partition (), partitionStats ));
0 commit comments