19
19
package org .apache .iceberg .spark .extensions ;
20
20
21
21
import static org .assertj .core .api .Assertions .assertThat ;
22
+ import static org .assertj .core .api .Assumptions .assumeThat ;
22
23
23
24
import java .util .Map ;
24
25
import org .apache .iceberg .PlanningMode ;
@@ -75,19 +76,82 @@ public void testUpdatePartitionGranularity() {
75
76
checkUpdateFileGranularity (DeleteGranularity .PARTITION );
76
77
}
77
78
78
- private void checkUpdateFileGranularity (DeleteGranularity deleteGranularity ) {
79
- createAndInitTable ("id INT, dep STRING" , "PARTITIONED BY (dep)" , null /* empty */ );
79
+ @ Test
80
+ public void testUpdateFileGranularityMergesDeleteFiles () {
81
+ // Range distribution will produce partition scoped deletes which will not be cleaned up
82
+ assumeThat (distributionMode ).isNotEqualToIgnoringCase ("range" );
80
83
81
- sql (
82
- "ALTER TABLE %s SET TBLPROPERTIES ('%s' '%s')" ,
83
- tableName , TableProperties .DELETE_GRANULARITY , deleteGranularity );
84
+ checkUpdateFileGranularity (DeleteGranularity .FILE );
85
+ sql ("UPDATE %s SET id = id + 1 WHERE id = 4" , commitTarget ());
86
+ Table table = validationCatalog .loadTable (tableIdent );
87
+ Snapshot currentSnapshot = SnapshotUtil .latestSnapshot (table , branch );
88
+ String expectedDeleteFilesCount = "2" ;
89
+ validateMergeOnRead (currentSnapshot , "2" , expectedDeleteFilesCount , "2" );
84
90
85
- append (tableName , "{ \" id\" : 1, \" dep\" : \" hr\" }\n " + "{ \" id\" : 2, \" dep\" : \" hr\" }" );
86
- append (tableName , "{ \" id\" : 3, \" dep\" : \" hr\" }\n " + "{ \" id\" : 4, \" dep\" : \" hr\" }" );
87
- append (tableName , "{ \" id\" : 1, \" dep\" : \" it\" }\n " + "{ \" id\" : 2, \" dep\" : \" it\" }" );
88
- append (tableName , "{ \" id\" : 3, \" dep\" : \" it\" }\n " + "{ \" id\" : 4, \" dep\" : \" it\" }" );
91
+ assertThat (currentSnapshot .removedDeleteFiles (table .io ())).hasSize (2 );
92
+ assertEquals (
93
+ "Should have expected rows" ,
94
+ ImmutableList .of (
95
+ row (0 , "hr" ),
96
+ row (2 , "hr" ),
97
+ row (2 , "hr" ),
98
+ row (5 , "hr" ),
99
+ row (0 , "it" ),
100
+ row (2 , "it" ),
101
+ row (2 , "it" ),
102
+ row (5 , "it" )),
103
+ sql ("SELECT * FROM %s ORDER BY dep ASC, id ASC" , selectTarget ()));
104
+ }
89
105
90
- createBranchIfNeeded ();
106
+ @ Test
107
+ public void testUpdateUnpartitionedFileGranularityMergesDeleteFiles () {
108
+ // Range distribution will produce partition scoped deletes which will not be cleaned up
109
+ assumeThat (distributionMode ).isNotEqualToIgnoringCase ("range" );
110
+ initTable ("" , DeleteGranularity .FILE );
111
+
112
+ sql ("UPDATE %s SET id = id - 1 WHERE id = 1 OR id = 3" , commitTarget ());
113
+
114
+ Table table = validationCatalog .loadTable (tableIdent );
115
+ assertThat (table .snapshots ()).hasSize (5 );
116
+ Snapshot currentSnapshot = SnapshotUtil .latestSnapshot (table , branch );
117
+ String expectedDeleteFilesCount = "4" ;
118
+ validateMergeOnRead (currentSnapshot , "1" , expectedDeleteFilesCount , "1" );
119
+ assertEquals (
120
+ "Should have expected rows" ,
121
+ ImmutableList .of (
122
+ row (0 , "hr" ),
123
+ row (2 , "hr" ),
124
+ row (2 , "hr" ),
125
+ row (4 , "hr" ),
126
+ row (0 , "it" ),
127
+ row (2 , "it" ),
128
+ row (2 , "it" ),
129
+ row (4 , "it" )),
130
+ sql ("SELECT * FROM %s ORDER BY dep ASC, id ASC" , selectTarget ()));
131
+
132
+ sql ("UPDATE %s SET id = id + 1 WHERE id = 4" , commitTarget ());
133
+ table .refresh ();
134
+ currentSnapshot = SnapshotUtil .latestSnapshot (table , branch );
135
+ expectedDeleteFilesCount = "2" ;
136
+
137
+ validateMergeOnRead (currentSnapshot , "1" , expectedDeleteFilesCount , "1" );
138
+ assertThat (currentSnapshot .removedDeleteFiles (table .io ())).hasSize (2 );
139
+ assertEquals (
140
+ "Should have expected rows" ,
141
+ ImmutableList .of (
142
+ row (0 , "hr" ),
143
+ row (2 , "hr" ),
144
+ row (2 , "hr" ),
145
+ row (5 , "hr" ),
146
+ row (0 , "it" ),
147
+ row (2 , "it" ),
148
+ row (2 , "it" ),
149
+ row (5 , "it" )),
150
+ sql ("SELECT * FROM %s ORDER BY dep ASC, id ASC" , selectTarget ()));
151
+ }
152
+
153
+ private void checkUpdateFileGranularity (DeleteGranularity deleteGranularity ) {
154
+ initTable ("PARTITIONED BY (dep)" , deleteGranularity );
91
155
92
156
sql ("UPDATE %s SET id = id - 1 WHERE id = 1 OR id = 3" , commitTarget ());
93
157
@@ -111,4 +175,19 @@ private void checkUpdateFileGranularity(DeleteGranularity deleteGranularity) {
111
175
row (4 , "it" )),
112
176
sql ("SELECT * FROM %s ORDER BY dep ASC, id ASC" , selectTarget ()));
113
177
}
178
+
179
+ private void initTable (String partitionedBy , DeleteGranularity deleteGranularity ) {
180
+ createAndInitTable ("id INT, dep STRING" , partitionedBy , null /* empty */ );
181
+
182
+ sql (
183
+ "ALTER TABLE %s SET TBLPROPERTIES ('%s' '%s')" ,
184
+ tableName , TableProperties .DELETE_GRANULARITY , deleteGranularity );
185
+
186
+ append (tableName , "{ \" id\" : 1, \" dep\" : \" hr\" }\n " + "{ \" id\" : 2, \" dep\" : \" hr\" }" );
187
+ append (tableName , "{ \" id\" : 3, \" dep\" : \" hr\" }\n " + "{ \" id\" : 4, \" dep\" : \" hr\" }" );
188
+ append (tableName , "{ \" id\" : 1, \" dep\" : \" it\" }\n " + "{ \" id\" : 2, \" dep\" : \" it\" }" );
189
+ append (tableName , "{ \" id\" : 3, \" dep\" : \" it\" }\n " + "{ \" id\" : 4, \" dep\" : \" it\" }" );
190
+
191
+ createBranchIfNeeded ();
192
+ }
114
193
}
0 commit comments