Skip to content

Commit 0528ac7

Browse files
committed
[SPARK-51623][BUILD] Remove class files in source releases
### What changes were proposed in this pull request? This PR proposes to remove test class files in source releases during release process. ### Why are the changes needed? Apache source releases must not contained class files. The issue is discussed on https://lists.apache.org/thread/0ro5yn6lbbpmvmqp2px3s2pf7cwljlc4 ### Does this PR introduce _any_ user-facing change? To end users, no. ### How was this patch tested? Will be tested in CI. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#50422 from HyukjinKwon/SPARK-51623. Lead-authored-by: Hyukjin Kwon <[email protected]> Co-authored-by: Hyukjin Kwon <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent ef762a0 commit 0528ac7

File tree

9 files changed

+75
-19
lines changed

9 files changed

+75
-19
lines changed

dev/create-release/release-tag.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ git config user.email "$GIT_EMAIL"
7575

7676
# Remove test jars that do not belong to source releases.
7777
rm $(<dev/test-jars.txt)
78-
git commit -a -m "Removing test jars"
78+
rm $(<dev/test-classes.txt)
79+
git commit -a -m "Removing test jars and class files"
7980
JAR_RM_REF=$(git rev-parse HEAD)
8081

8182
# Create release version

dev/is-changed.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,20 @@ def main():
8080
"please add the file name into dev/test-jars.txt."
8181
)
8282

83+
if any(f.endswith(".class") for f in changed_files):
84+
with open(
85+
os.path.join(os.path.dirname(os.path.realpath(__file__)), "test-classes.txt")
86+
) as clslist:
87+
itrsect = set((line.strip() for line in clslist.readlines())).intersection(
88+
set(changed_files)
89+
)
90+
if len(itrsect) > 0:
91+
raise SystemExit(
92+
f"Cannot include class files in source codes ({', '.join(itrsect)}). "
93+
"If they have to be added temporarily, "
94+
"please add the file name into dev/test-classes.txt."
95+
)
96+
8397
changed_modules = determine_modules_to_test(
8498
determine_modules_for_files(changed_files), deduplicated=False
8599
)

dev/test-classes.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
repl/src/test/resources/IntSumUdf.class
2+
sql/core/src/test/resources/artifact-tests/Hello.class
3+
sql/core/src/test/resources/artifact-tests/IntSumUdf.class
4+
sql/core/src/test/resources/artifact-tests/smallClassFile.class
5+
sql/connect/common/src/test/resources/artifact-tests/Hello.class
6+
sql/core/src/test/resources/artifact-tests/HelloWithPackage.class
7+
sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class
8+
sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class

repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ class ReplSuite extends SparkFunSuite {
400400
test("register UDF via SparkSession.addArtifact") {
401401
val artifactPath = new File("src/test/resources").toPath
402402
val intSumUdfPath = artifactPath.resolve("IntSumUdf.class")
403+
assume(intSumUdfPath.toFile.exists)
403404
val output = runInterpreterInPasteMode("local",
404405
s"""
405406
|import org.apache.spark.sql.api.java.UDF2
@@ -438,6 +439,7 @@ class ReplSuite extends SparkFunSuite {
438439
test("register a class via SparkSession.addArtifact") {
439440
val artifactPath = new File("src/test/resources").toPath
440441
val intSumUdfPath = artifactPath.resolve("IntSumUdf.class")
442+
assume(intSumUdfPath.toFile.exists)
441443
val output = runInterpreterInPasteMode("local",
442444
s"""
443445
|import org.apache.spark.sql.functions.udf

sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,12 @@ class ArtifactSuite extends ConnectFunSuite with BeforeAndAfterEach {
199199
}
200200

201201
test("Batched SingleChunkArtifacts") {
202-
val file1 = artifactFilePath.resolve("smallClassFile.class").toUri
203-
val path = artifactFilePath.resolve("smallJar.jar")
204-
val file2 = path.toUri
205-
assume(path.toFile.exists)
202+
val path1 = artifactFilePath.resolve("smallClassFile.class")
203+
assume(path1.toFile.exists)
204+
val file1 = path1.toUri
205+
val path2 = artifactFilePath.resolve("smallJar.jar")
206+
assume(path2.toFile.exists)
207+
val file2 = path2.toUri
206208
artifactManager.addArtifacts(Seq(file1, file2))
207209
val receivedRequests = service.getAndClearLatestAddArtifactRequests()
208210
// Single request containing 2 artifacts.
@@ -223,11 +225,15 @@ class ArtifactSuite extends ConnectFunSuite with BeforeAndAfterEach {
223225
}
224226

225227
test("Mix of SingleChunkArtifact and chunked artifact") {
226-
val file1 = artifactFilePath.resolve("smallClassFile.class").toUri
228+
val path1 = artifactFilePath.resolve("smallClassFile.class")
229+
assume(path1.toFile.exists)
230+
val file1 = path1.toUri
227231
val path2 = artifactFilePath.resolve("junitLargeJar.jar")
228232
assume(path2.toFile.exists)
229233
val file2 = path2.toUri
230-
val file3 = artifactFilePath.resolve("smallClassFileDup.class").toUri
234+
val path3 = artifactFilePath.resolve("smallClassFileDup.class")
235+
assume(path3.toFile.exists)
236+
val file3 = path3.toUri
231237
val path4 = artifactFilePath.resolve("smallJar.jar")
232238
assume(path4.toFile.exists)
233239
val file4 = path4.toUri
@@ -298,6 +304,7 @@ class ArtifactSuite extends ConnectFunSuite with BeforeAndAfterEach {
298304

299305
test("artifact with custom target") {
300306
val artifactPath = artifactFilePath.resolve("smallClassFile.class")
307+
assume(artifactPath.toFile.exists)
301308
val target = "sub/package/smallClassFile.class"
302309
artifactManager.addArtifact(artifactPath.toString, target)
303310
val receivedRequests = service.getAndClearLatestAddArtifactRequests()
@@ -318,6 +325,7 @@ class ArtifactSuite extends ConnectFunSuite with BeforeAndAfterEach {
318325

319326
test("in-memory artifact with custom target") {
320327
val artifactPath = artifactFilePath.resolve("smallClassFile.class")
328+
assume(artifactPath.toFile.exists)
321329
val artifactBytes = Files.readAllBytes(artifactPath)
322330
val target = "sub/package/smallClassFile.class"
323331
artifactManager.addArtifact(artifactBytes, target)
@@ -341,6 +349,7 @@ class ArtifactSuite extends ConnectFunSuite with BeforeAndAfterEach {
341349
"When both source and target paths are given, extension conditions are checked " +
342350
"on target path") {
343351
val artifactPath = artifactFilePath.resolve("smallClassFile.class")
352+
assume(artifactPath.toFile.exists)
344353
assertThrows[UnsupportedOperationException] {
345354
artifactManager.addArtifact(artifactPath.toString, "dummy.extension")
346355
}

sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ClassFinderSuite.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class ClassFinderSuite extends ConnectFunSuite {
3737
"Hello.class",
3838
"smallClassFile.class",
3939
"smallClassFileDup.class") ++ additionalClasses).map(name => Paths.get(name))
40+
expectedClassFiles.foreach(p => assume(p.toFile.exists))
4041

4142
val foundArtifacts = monitor.findClasses().toSeq
4243
assert(expectedClassFiles.forall { classPath =>

sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,9 @@ class SparkConnectClientSuite extends ConnectFunSuite with BeforeAndAfterEach {
487487

488488
val session = SparkSession.builder().client(client).create()
489489
val artifactFilePath = commonResourcePath.resolve("artifact-tests")
490-
session.addArtifact(artifactFilePath.resolve("smallClassFile.class").toString)
490+
val path = artifactFilePath.resolve("smallClassFile.class")
491+
assume(path.toFile.exists)
492+
session.addArtifact(path.toString)
491493
}
492494

493495
private def buildPlan(query: String): proto.Plan = {

sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/AddArtifactsHandlerSuite.scala

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ class AddArtifactsHandlerSuite extends SharedSparkSession with ResourceHelper {
193193
try {
194194
val name = "classes/smallClassFile.class"
195195
val artifactPath = inputFilePath.resolve("smallClassFile.class")
196+
assume(artifactPath.toFile.exists)
196197
addSingleChunkArtifact(handler, name, artifactPath)
197198
handler.onCompleted()
198199
val response = ThreadUtils.awaitResult(promise.future, 5.seconds)
@@ -246,15 +247,12 @@ class AddArtifactsHandlerSuite extends SharedSparkSession with ResourceHelper {
246247
"classes/smallClassFileDup.class",
247248
"jars/smallJar.jar")
248249

249-
val path1 = inputFilePath.resolve("junitLargeJar.jar")
250-
val path2 = inputFilePath.resolve("smallJar.jar")
251250
val artifactPaths = Seq(
252251
inputFilePath.resolve("smallClassFile.class"),
253-
path1,
252+
inputFilePath.resolve("junitLargeJar.jar"),
254253
inputFilePath.resolve("smallClassFileDup.class"),
255-
path2)
256-
assume(path1.toFile.exists)
257-
assume(path2.toFile.exists)
254+
inputFilePath.resolve("smallJar.jar"))
255+
artifactPaths.foreach(p => assume(p.toFile.exists))
258256

259257
addSingleChunkArtifact(handler, names.head, artifactPaths.head)
260258
addChunkedArtifact(handler, names(1), artifactPaths(1))
@@ -286,6 +284,7 @@ class AddArtifactsHandlerSuite extends SharedSparkSession with ResourceHelper {
286284
try {
287285
val name = "classes/smallClassFile.class"
288286
val artifactPath = inputFilePath.resolve("smallClassFile.class")
287+
assume(artifactPath.toFile.exists)
289288
val dataChunks = getDataChunks(artifactPath)
290289
assert(dataChunks.size == 1)
291290
val bytes = dataChunks.head

sql/core/src/test/scala/org/apache/spark/sql/artifact/ArtifactManagerSuite.scala

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,13 @@ class ArtifactManagerSuite extends SharedSparkSession {
5353
}
5454

5555
test("Class artifacts are added to the correct directory.") {
56+
assume(artifactPath.resolve("smallClassFile.class").toFile.exists)
57+
5658
val copyDir = Utils.createTempDir().toPath
5759
FileUtils.copyDirectory(artifactPath.toFile, copyDir.toFile)
5860
val stagingPath = copyDir.resolve("smallClassFile.class")
59-
val remotePath = Paths.get("classes/smallClassFile.class")
6061
assert(stagingPath.toFile.exists())
62+
val remotePath = Paths.get("classes/smallClassFile.class")
6163
artifactManager.addArtifact(remotePath, stagingPath, None)
6264

6365
val movedClassFile = ArtifactManager.artifactRootDirectory
@@ -67,11 +69,13 @@ class ArtifactManagerSuite extends SharedSparkSession {
6769
}
6870

6971
test("Class file artifacts are added to SC classloader") {
72+
assume(artifactPath.resolve("Hello.class").toFile.exists)
73+
7074
val copyDir = Utils.createTempDir().toPath
7175
FileUtils.copyDirectory(artifactPath.toFile, copyDir.toFile)
7276
val stagingPath = copyDir.resolve("Hello.class")
73-
val remotePath = Paths.get("classes/Hello.class")
7477
assert(stagingPath.toFile.exists())
78+
val remotePath = Paths.get("classes/Hello.class")
7579
artifactManager.addArtifact(remotePath, stagingPath, None)
7680

7781
val movedClassFile = ArtifactManager.artifactRootDirectory
@@ -91,11 +95,13 @@ class ArtifactManagerSuite extends SharedSparkSession {
9195
}
9296

9397
test("UDF can reference added class file") {
98+
assume(artifactPath.resolve("Hello.class").toFile.exists)
99+
94100
val copyDir = Utils.createTempDir().toPath
95101
FileUtils.copyDirectory(artifactPath.toFile, copyDir.toFile)
96102
val stagingPath = copyDir.resolve("Hello.class")
97-
val remotePath = Paths.get("classes/Hello.class")
98103
assert(stagingPath.toFile.exists())
104+
val remotePath = Paths.get("classes/Hello.class")
99105

100106
artifactManager.addArtifact(remotePath, stagingPath, None)
101107

@@ -170,6 +176,8 @@ class ArtifactManagerSuite extends SharedSparkSession {
170176
}
171177

172178
test("SPARK-43790: Forward artifact file to cloud storage path") {
179+
assume(artifactPath.resolve("smallClassFile.class").toFile.exists)
180+
173181
val copyDir = Utils.createTempDir().toPath
174182
val destFSDir = Utils.createTempDir().toPath
175183
FileUtils.copyDirectory(artifactPath.toFile, copyDir.toFile)
@@ -184,6 +192,8 @@ class ArtifactManagerSuite extends SharedSparkSession {
184192
}
185193

186194
test("Removal of resources") {
195+
assume(artifactPath.resolve("smallClassFile.class").toFile.exists)
196+
187197
withTempPath { path =>
188198
// Setup cache
189199
val stagingPath = path.toPath
@@ -228,6 +238,8 @@ class ArtifactManagerSuite extends SharedSparkSession {
228238
}
229239

230240
test("Classloaders for spark sessions are isolated") {
241+
assume(artifactPath.resolve("Hello.class").toFile.exists)
242+
231243
val session1 = spark.newSession()
232244
val session2 = spark.newSession()
233245
val session3 = spark.newSession()
@@ -284,6 +296,8 @@ class ArtifactManagerSuite extends SharedSparkSession {
284296
}
285297

286298
test("SPARK-44300: Cleaning up resources only deletes session-specific resources") {
299+
assume(artifactPath.resolve("Hello.class").toFile.exists)
300+
287301
val copyDir = Utils.createTempDir().toPath
288302
FileUtils.copyDirectory(artifactPath.toFile, copyDir.toFile)
289303
val stagingPath = copyDir.resolve("Hello.class")
@@ -335,7 +349,9 @@ class ArtifactManagerSuite extends SharedSparkSession {
335349
}
336350

337351
test("Added artifact can be loaded by the current SparkSession") {
338-
val buffer = Files.readAllBytes(artifactPath.resolve("IntSumUdf.class"))
352+
val path = artifactPath.resolve("IntSumUdf.class")
353+
assume(path.toFile.exists)
354+
val buffer = Files.readAllBytes(path)
339355
spark.addArtifact(buffer, "IntSumUdf.class")
340356

341357
spark.udf.registerJava("intSum", "IntSumUdf", DataTypes.LongType)
@@ -350,6 +366,8 @@ class ArtifactManagerSuite extends SharedSparkSession {
350366
private def testAddArtifactToLocalSession(
351367
classFileToUse: String, binaryName: String)(addFunc: Path => String): Unit = {
352368
val copyDir = Utils.createTempDir().toPath
369+
assume(artifactPath.resolve(classFileToUse).toFile.exists)
370+
353371
FileUtils.copyDirectory(artifactPath.toFile, copyDir.toFile)
354372
val classPath = copyDir.resolve(classFileToUse)
355373
assert(classPath.toFile.exists())
@@ -395,8 +413,10 @@ class ArtifactManagerSuite extends SharedSparkSession {
395413
Files.write(randomFilePath, testBytes)
396414

397415
// Register multiple kinds of artifacts
416+
val clsPath = path.resolve("Hello.class")
417+
assume(clsPath.toFile.exists)
398418
artifactManager.addArtifact( // Class
399-
Paths.get("classes/Hello.class"), path.resolve("Hello.class"), None)
419+
Paths.get("classes/Hello.class"), clsPath, None)
400420
artifactManager.addArtifact( // Python
401421
Paths.get("pyfiles/abc.zip"), randomFilePath, None, deleteStagedFile = false)
402422
val jarPath = Paths.get("jars/udf_noA.jar")

0 commit comments

Comments
 (0)