23
23
import static org .apache .iceberg .PlanningMode .DISTRIBUTED ;
24
24
import static org .apache .iceberg .PlanningMode .LOCAL ;
25
25
import static org .apache .iceberg .SnapshotSummary .ADDED_DELETE_FILES_PROP ;
26
+ import static org .apache .iceberg .SnapshotSummary .ADDED_DVS_PROP ;
26
27
import static org .apache .iceberg .SnapshotSummary .ADDED_FILES_PROP ;
28
+ import static org .apache .iceberg .SnapshotSummary .ADD_POS_DELETE_FILES_PROP ;
27
29
import static org .apache .iceberg .SnapshotSummary .CHANGED_PARTITION_COUNT_PROP ;
28
30
import static org .apache .iceberg .SnapshotSummary .DELETED_FILES_PROP ;
29
31
import static org .apache .iceberg .TableProperties .DATA_PLANNING_MODE ;
30
32
import static org .apache .iceberg .TableProperties .DEFAULT_FILE_FORMAT ;
31
33
import static org .apache .iceberg .TableProperties .DELETE_PLANNING_MODE ;
34
+ import static org .apache .iceberg .TableProperties .FORMAT_VERSION ;
32
35
import static org .apache .iceberg .TableProperties .ORC_VECTORIZATION_ENABLED ;
33
36
import static org .apache .iceberg .TableProperties .PARQUET_VECTORIZATION_ENABLED ;
34
37
import static org .apache .iceberg .TableProperties .SPARK_WRITE_PARTITIONED_FANOUT_ENABLED ;
55
58
import org .apache .iceberg .Snapshot ;
56
59
import org .apache .iceberg .SnapshotRef ;
57
60
import org .apache .iceberg .Table ;
61
+ import org .apache .iceberg .TableProperties ;
58
62
import org .apache .iceberg .data .GenericRecord ;
59
63
import org .apache .iceberg .data .parquet .GenericParquetWriter ;
64
+ import org .apache .iceberg .deletes .DeleteGranularity ;
60
65
import org .apache .iceberg .io .DataWriter ;
61
66
import org .apache .iceberg .io .OutputFile ;
62
67
import org .apache .iceberg .parquet .Parquet ;
@@ -85,6 +90,7 @@ public abstract class SparkRowLevelOperationsTestBase extends SparkExtensionsTes
85
90
protected final boolean fanoutEnabled ;
86
91
protected final String branch ;
87
92
protected final PlanningMode planningMode ;
93
+ protected final int formatVersion ;
88
94
89
95
public SparkRowLevelOperationsTestBase (
90
96
String catalogName ,
@@ -95,21 +101,23 @@ public SparkRowLevelOperationsTestBase(
95
101
String distributionMode ,
96
102
boolean fanoutEnabled ,
97
103
String branch ,
98
- PlanningMode planningMode ) {
104
+ PlanningMode planningMode ,
105
+ int formatVersion ) {
99
106
super (catalogName , implementation , config );
100
107
this .fileFormat = fileFormat ;
101
108
this .vectorized = vectorized ;
102
109
this .distributionMode = distributionMode ;
103
110
this .fanoutEnabled = fanoutEnabled ;
104
111
this .branch = branch ;
105
112
this .planningMode = planningMode ;
113
+ this .formatVersion = formatVersion ;
106
114
}
107
115
108
116
@ Parameters (
109
117
name =
110
118
"catalogName = {0}, implementation = {1}, config = {2},"
111
119
+ " format = {3}, vectorized = {4}, distributionMode = {5},"
112
- + " fanout = {6}, branch = {7}, planningMode = {8}" )
120
+ + " fanout = {6}, branch = {7}, planningMode = {8}, formatVersion = {9} " )
113
121
public static Object [][] parameters () {
114
122
return new Object [][] {
115
123
{
@@ -123,7 +131,8 @@ public static Object[][] parameters() {
123
131
WRITE_DISTRIBUTION_MODE_NONE ,
124
132
true ,
125
133
SnapshotRef .MAIN_BRANCH ,
126
- LOCAL
134
+ LOCAL ,
135
+ 2
127
136
},
128
137
{
129
138
"testhive" ,
@@ -136,7 +145,8 @@ public static Object[][] parameters() {
136
145
WRITE_DISTRIBUTION_MODE_NONE ,
137
146
false ,
138
147
"test" ,
139
- DISTRIBUTED
148
+ DISTRIBUTED ,
149
+ 2
140
150
},
141
151
{
142
152
"testhadoop" ,
@@ -147,7 +157,8 @@ public static Object[][] parameters() {
147
157
WRITE_DISTRIBUTION_MODE_HASH ,
148
158
true ,
149
159
null ,
150
- LOCAL
160
+ LOCAL ,
161
+ 2
151
162
},
152
163
{
153
164
"spark_catalog" ,
@@ -165,16 +176,52 @@ public static Object[][] parameters() {
165
176
WRITE_DISTRIBUTION_MODE_RANGE ,
166
177
false ,
167
178
"test" ,
168
- DISTRIBUTED
169
- }
179
+ DISTRIBUTED ,
180
+ 2
181
+ },
182
+ {
183
+ "testhadoop" ,
184
+ SparkCatalog .class .getName (),
185
+ ImmutableMap .of ("type" , "hadoop" ),
186
+ "parquet" ,
187
+ RANDOM .nextBoolean (),
188
+ WRITE_DISTRIBUTION_MODE_HASH ,
189
+ true ,
190
+ null ,
191
+ LOCAL ,
192
+ 3
193
+ },
194
+ {
195
+ "spark_catalog" ,
196
+ SparkSessionCatalog .class .getName (),
197
+ ImmutableMap .of (
198
+ "type" ,
199
+ "hive" ,
200
+ "default-namespace" ,
201
+ "default" ,
202
+ "clients" ,
203
+ "1" ,
204
+ "parquet-enabled" ,
205
+ "false" ,
206
+ "cache-enabled" ,
207
+ "false" // Spark will delete tables using v1, leaving the cache out of sync
208
+ ),
209
+ "avro" ,
210
+ false ,
211
+ WRITE_DISTRIBUTION_MODE_RANGE ,
212
+ false ,
213
+ "test" ,
214
+ DISTRIBUTED ,
215
+ 3
216
+ },
170
217
};
171
218
}
172
219
173
220
protected abstract Map <String , String > extraTableProperties ();
174
221
175
222
protected void initTable () {
176
223
sql (
177
- "ALTER TABLE %s SET TBLPROPERTIES('%s' '%s', '%s' '%s', '%s' '%s', '%s' '%s', '%s' '%s')" ,
224
+ "ALTER TABLE %s SET TBLPROPERTIES('%s' '%s', '%s' '%s', '%s' '%s', '%s' '%s', '%s' '%s', '%s' '%s' )" ,
178
225
tableName ,
179
226
DEFAULT_FILE_FORMAT ,
180
227
fileFormat ,
@@ -185,7 +232,9 @@ protected void initTable() {
185
232
DATA_PLANNING_MODE ,
186
233
planningMode .modeName (),
187
234
DELETE_PLANNING_MODE ,
188
- planningMode .modeName ());
235
+ planningMode .modeName (),
236
+ FORMAT_VERSION ,
237
+ formatVersion );
189
238
190
239
switch (fileFormat ) {
191
240
case "parquet" :
@@ -310,6 +359,10 @@ protected void validateSnapshot(
310
359
validateProperty (snapshot , DELETED_FILES_PROP , deletedDataFiles );
311
360
validateProperty (snapshot , ADDED_DELETE_FILES_PROP , addedDeleteFiles );
312
361
validateProperty (snapshot , ADDED_FILES_PROP , addedDataFiles );
362
+ if (formatVersion >= 3 ) {
363
+ validateProperty (snapshot , ADDED_DVS_PROP , addedDeleteFiles );
364
+ assertThat (snapshot .summary ()).doesNotContainKey (ADD_POS_DELETE_FILES_PROP );
365
+ }
313
366
}
314
367
315
368
protected void validateProperty (Snapshot snapshot , String property , Set <String > expectedValues ) {
@@ -401,4 +454,12 @@ protected void assertAllBatchScansVectorized(SparkPlan plan) {
401
454
List <SparkPlan > batchScans = SparkPlanUtil .collectBatchScans (plan );
402
455
assertThat (batchScans ).hasSizeGreaterThan (0 ).allMatch (SparkPlan ::supportsColumnar );
403
456
}
457
+
458
+ protected void createTableWithDeleteGranularity (
459
+ String schema , String partitionedBy , DeleteGranularity deleteGranularity ) {
460
+ createAndInitTable (schema , partitionedBy , null /* empty */ );
461
+ sql (
462
+ "ALTER TABLE %s SET TBLPROPERTIES ('%s' '%s')" ,
463
+ tableName , TableProperties .DELETE_GRANULARITY , deleteGranularity );
464
+ }
404
465
}
0 commit comments