Skip to content

Commit b829aea

Browse files
HyukjinKwonvrozov
andcommitted
[SPARK-51318][BUILD] Remove test jars in source releases
### What changes were proposed in this pull request? This PR proposes to remove test jars in source releases during release process. ### Why are the changes needed? Apache source releases must not contained jar files. The issue is discussed on https://lists.apache.org/thread/0ro5yn6lbbpmvmqp2px3s2pf7cwljlc4 ### Does this PR introduce _any_ user-facing change? To end users, no. ### How was this patch tested? Manually tested, and I will work together with release manager to make sure this wroks. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#50378 from HyukjinKwon/SPARK-51318. Lead-authored-by: Hyukjin Kwon <[email protected]> Co-authored-by: Vlad Rozov <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent 07c1ded commit b829aea

File tree

25 files changed

+118
-33
lines changed

25 files changed

+118
-33
lines changed

connect-examples/server-library-example/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ reading, writing and processing data in the custom format. The plugins (`CustomC
8585
4. **Copy relevant JARs to the root of the unpacked Spark distribution**:
8686
```bash
8787
cp \
88-
<SPARK_HOME>/connect-examples/server-library-example/resources/spark-daria_2.13-1.2.3.jar \
88+
<SPARK_HOME>/connect-examples/server-library-example/common/target/spark-daria_2.13-1.2.3.jar \
8989
<SPARK_HOME>/connect-examples/server-library-example/common/target/spark-server-library-example-common-1.0.0.jar \
9090
<SPARK_HOME>/connect-examples/server-library-example/server/target/spark-server-library-example-server-extension-1.0.0.jar \
9191
.
Binary file not shown.

connect-examples/server-library-example/server/pom.xml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,8 @@
6262
<!-- Include Spark Daria for utility Dataframe write methods -->
6363
<dependency>
6464
<groupId>com.github.mrpowers</groupId>
65-
<artifactId>spark-daria_2.12</artifactId>
65+
<artifactId>spark-daria_${scala.binary}</artifactId>
6666
<version>1.2.3</version>
67-
<scope>provided</scope>
6867
</dependency>
6968
</dependencies>
7069

core/src/test/scala/org/apache/spark/SparkContextSuite.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,10 +244,11 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
244244
}
245245

246246
test("add and list jar files") {
247-
val jarPath = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar")
247+
val testJar = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar")
248+
assume(testJar != null)
248249
try {
249250
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
250-
sc.addJar(jarPath.toString)
251+
sc.addJar(testJar.toString)
251252
assert(sc.listJars().count(_.contains("TestUDTF.jar")) == 1)
252253
} finally {
253254
sc.stop()
@@ -396,13 +397,15 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
396397
schedulingMode <- Seq("local-mode", "non-local-mode");
397398
method <- Seq("addJar", "addFile")
398399
) {
399-
val jarPath = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar").toString
400400
val master = schedulingMode match {
401401
case "local-mode" => "local"
402402
case "non-local-mode" => "local-cluster[1,1,1024]"
403403
}
404404
test(s"$method can be called twice with same file in $schedulingMode (SPARK-16787)") {
405+
val testJar = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar")
406+
assume(testJar != null)
405407
sc = new SparkContext(master, "test")
408+
val jarPath = testJar.toString
406409
method match {
407410
case "addJar" =>
408411
sc.addJar(jarPath)

core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,8 @@ class SparkSubmitSuite
506506

507507
test("SPARK-47495: Not to add primary resource to jars again" +
508508
" in k8s client mode & driver runs inside a POD") {
509+
val testJar = "src/test/resources/TestUDTF.jar"
510+
assume(new File(testJar).exists)
509511
val clArgs = Seq(
510512
"--deploy-mode", "client",
511513
"--proxy-user", "test.user",
@@ -514,7 +516,7 @@ class SparkSubmitSuite
514516
"--class", "org.SomeClass",
515517
"--driver-memory", "1g",
516518
"--conf", "spark.kubernetes.submitInDriver=true",
517-
"--jars", "src/test/resources/TestUDTF.jar",
519+
"--jars", testJar,
518520
"/home/jarToIgnore.jar",
519521
"arg1")
520522
val appArgs = new SparkSubmitArguments(clArgs)
@@ -524,6 +526,8 @@ class SparkSubmitSuite
524526
}
525527

526528
test("SPARK-33782: handles k8s files download to current directory") {
529+
val testJar = "src/test/resources/TestUDTF.jar"
530+
assume(new File(testJar).exists)
527531
val clArgs = Seq(
528532
"--deploy-mode", "client",
529533
"--proxy-user", "test.user",
@@ -537,7 +541,7 @@ class SparkSubmitSuite
537541
"--files", "src/test/resources/test_metrics_config.properties",
538542
"--py-files", "src/test/resources/test_metrics_system.properties",
539543
"--archives", "src/test/resources/log4j2.properties",
540-
"--jars", "src/test/resources/TestUDTF.jar",
544+
"--jars", testJar,
541545
"/home/thejar.jar",
542546
"arg1")
543547
val appArgs = new SparkSubmitArguments(clArgs)
@@ -561,6 +565,8 @@ class SparkSubmitSuite
561565
test("SPARK-47475: Avoid jars download if scheme matches " +
562566
"spark.kubernetes.jars.avoidDownloadSchemes " +
563567
"in k8s client mode & driver runs inside a POD") {
568+
val testJar = "src/test/resources/TestUDTF.jar"
569+
assume(new File(testJar).exists)
564570
val hadoopConf = new Configuration()
565571
updateConfWithFakeS3Fs(hadoopConf)
566572
withTempDir { tmpDir =>
@@ -579,7 +585,7 @@ class SparkSubmitSuite
579585
"--files", "src/test/resources/test_metrics_config.properties",
580586
"--py-files", "src/test/resources/test_metrics_system.properties",
581587
"--archives", "src/test/resources/log4j2.properties",
582-
"--jars", s"src/test/resources/TestUDTF.jar,$remoteJarFile",
588+
"--jars", s"$testJar,$remoteJarFile",
583589
"/home/jarToIgnore.jar",
584590
"arg1")
585591
val appArgs = new SparkSubmitArguments(clArgs)

core/src/test/scala/org/apache/spark/executor/ClassLoaderIsolationSuite.scala

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,28 @@ class ClassLoaderIsolationSuite extends SparkFunSuite with LocalSparkContext {
2929
.take(2)
3030
.mkString(".")
3131

32-
val jar1 = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar").toString
32+
private val jarURL1 = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar")
33+
private lazy val jar1 = jarURL1.toString
3334

3435
// package com.example
3536
// object Hello { def test(): Int = 2 }
3637
// case class Hello(x: Int, y: Int)
37-
val jar2 = Thread.currentThread().getContextClassLoader
38-
.getResource(s"TestHelloV2_$scalaVersion.jar").toString
38+
private val jarURL2 = Thread.currentThread().getContextClassLoader
39+
.getResource(s"TestHelloV2_$scalaVersion.jar")
40+
private lazy val jar2 = jarURL2.toString
3941

4042
// package com.example
4143
// object Hello { def test(): Int = 3 }
4244
// case class Hello(x: String)
43-
val jar3 = Thread.currentThread().getContextClassLoader
44-
.getResource(s"TestHelloV3_$scalaVersion.jar").toString
45+
private val jarURL3 = Thread.currentThread().getContextClassLoader
46+
.getResource(s"TestHelloV3_$scalaVersion.jar")
47+
private lazy val jar3 = jarURL3.toString
4548

4649
test("Executor classloader isolation with JobArtifactSet") {
50+
assume(jarURL1 != null)
51+
assume(jarURL2 != null)
52+
assume(jarURL3 != null)
53+
4754
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
4855
sc.addJar(jar1)
4956
sc.addJar(jar2)

core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,8 @@ class TaskSetManagerSuite
15731573
}
15741574

15751575
test("SPARK-21563 context's added jars shouldn't change mid-TaskSet") {
1576+
val jarPath = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar")
1577+
assume(jarPath != null)
15761578
sc = new SparkContext("local", "test")
15771579
val addedJarsPreTaskSet = Map[String, Long](sc.allAddedJars.toSeq: _*)
15781580
assert(addedJarsPreTaskSet.size === 0)
@@ -1588,7 +1590,6 @@ class TaskSetManagerSuite
15881590
assert(taskOption2.get.artifacts.jars === addedJarsPreTaskSet)
15891591

15901592
// even with a jar added mid-TaskSet
1591-
val jarPath = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar")
15921593
sc.addJar(jarPath.toString)
15931594
val addedJarsMidTaskSet = Map[String, Long](sc.allAddedJars.toSeq: _*)
15941595
assert(addedJarsPreTaskSet !== addedJarsMidTaskSet)

dev/create-release/release-tag.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ sed -i".tmp7" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$NEXT_VERSION"'/g' docs/_con
106106
sed -i".tmp8" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$R_NEXT_VERSION"'/g' docs/_config.yml
107107
# Update the version index of DocSearch as the short version
108108
sed -i".tmp9" "s/'facetFilters':.*$/'facetFilters': [\"version:$R_NEXT_VERSION\"]/g" docs/_config.yml
109+
# Remove test jars that do not beling to source releases.
110+
rm $(<dev/test-jars.txt)
109111

110112
git commit -a -m "Preparing development version $NEXT_VERSION"
111113

dev/test-jars.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
core/src/test/resources/TestHelloV2_2.13.jar
2+
core/src/test/resources/TestHelloV3_2.13.jar
3+
core/src/test/resources/TestUDTF.jar
4+
data/artifact-tests/junitLargeJar.jar
5+
data/artifact-tests/smallJar.jar
6+
sql/connect/client/jvm/src/test/resources/TestHelloV2_2.13.jar
7+
sql/connect/client/jvm/src/test/resources/udf2.13.jar
8+
sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar
9+
sql/connect/common/src/test/resources/artifact-tests/smallJar.jar
10+
sql/core/src/test/resources/SPARK-33084.jar
11+
sql/core/src/test/resources/artifact-tests/udf_noA.jar
12+
sql/hive-thriftserver/src/test/resources/TestUDTF.jar
13+
sql/hive/src/test/noclasspath/hive-test-udfs.jar
14+
sql/hive/src/test/resources/SPARK-21101-1.0.jar
15+
sql/hive/src/test/resources/TestUDTF.jar
16+
sql/hive/src/test/resources/data/files/TestSerDe.jar
17+
sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar

python/pyspark/sql/tests/connect/client/test_artifact.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ def conf(cls):
224224
def test_basic_requests(self):
225225
file_name = "smallJar"
226226
small_jar_path = os.path.join(self.artifact_file_path, f"{file_name}.jar")
227+
if not os.path.isfile(small_jar_path):
228+
raise unittest.SkipTest(f"Skipped as {small_jar_path} does not exist.")
227229
response = self.artifact_manager._retrieve_responses(
228230
self.artifact_manager._create_requests(
229231
small_jar_path, pyfile=False, archive=False, file=False
@@ -235,6 +237,8 @@ def test_single_chunk_artifact(self):
235237
file_name = "smallJar"
236238
small_jar_path = os.path.join(self.artifact_file_path, f"{file_name}.jar")
237239
small_jar_crc_path = os.path.join(self.artifact_crc_path, f"{file_name}.txt")
240+
if not os.path.isfile(small_jar_path):
241+
raise unittest.SkipTest(f"Skipped as {small_jar_path} does not exist.")
238242

239243
requests = list(
240244
self.artifact_manager._create_requests(
@@ -261,6 +265,8 @@ def test_chunked_artifacts(self):
261265
file_name = "junitLargeJar"
262266
large_jar_path = os.path.join(self.artifact_file_path, f"{file_name}.jar")
263267
large_jar_crc_path = os.path.join(self.artifact_crc_path, f"{file_name}.txt")
268+
if not os.path.isfile(large_jar_path):
269+
raise unittest.SkipTest(f"Skipped as {large_jar_path} does not exist.")
264270

265271
requests = list(
266272
self.artifact_manager._create_requests(
@@ -296,6 +302,8 @@ def test_batched_artifacts(self):
296302
file_name = "smallJar"
297303
small_jar_path = os.path.join(self.artifact_file_path, f"{file_name}.jar")
298304
small_jar_crc_path = os.path.join(self.artifact_crc_path, f"{file_name}.txt")
305+
if not os.path.isfile(small_jar_path):
306+
raise unittest.SkipTest(f"Skipped as {small_jar_path} does not exist.")
299307

300308
requests = list(
301309
self.artifact_manager._create_requests(
@@ -333,6 +341,10 @@ def test_single_chunked_and_chunked_artifact(self):
333341
large_jar_path = os.path.join(self.artifact_file_path, f"{file_name2}.jar")
334342
large_jar_crc_path = os.path.join(self.artifact_crc_path, f"{file_name2}.txt")
335343
large_jar_size = os.path.getsize(large_jar_path)
344+
if not os.path.isfile(small_jar_path):
345+
raise unittest.SkipTest(f"Skipped as {small_jar_path} does not exist.")
346+
if not os.path.isfile(large_jar_path):
347+
raise unittest.SkipTest(f"Skipped as {large_jar_path} does not exist.")
336348

337349
requests = list(
338350
self.artifact_manager._create_requests(

0 commit comments

Comments
 (0)