Skip to content

Commit eac413a

Browse files
committed
[SPARK-48392][CORE] Also load spark-defaults.conf when provided --properties-file
### What changes were proposed in this pull request? Currently if a property file is provided as argument to Spark submit, the `spark-defaults.conf` is ignored. This PR changes the behavior such that `spark-defaults.conf` is still loaded in this scenario, and any Spark configurations that are in the file but not in the input property file will be loaded. ### Why are the changes needed? Currently if a property file is provided as argument to Spark submit, the `spark-defaults.conf` is ignored. This causes inconveniences for users who want to split Spark configurations into the two. For example, in Spark on K8S users may want to store system wide default settings in `spark-defaults.conf`, while user specified configurations that are more dynamic in an property file and pass to Spark applications via `--properties-file` parameter. Currently this is not possible. See also kubeflow/spark-operator#1321. ### Does this PR introduce _any_ user-facing change? Yes, now when a property file is provided via `--properties-file`, `spark-defaults.conf` will also be loaded. However, those configurations specified in the former will take precedence over the same configurations in the latter. ### How was this patch tested? Existing tests and a new test. ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#46709 from sunchao/SPARK-48392. Authored-by: Chao Sun <[email protected]> Signed-off-by: Chao Sun <[email protected]>
1 parent e6236af commit eac413a

File tree

3 files changed

+76
-35
lines changed

3 files changed

+76
-35
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,28 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
125125
* When this is called, `sparkProperties` is already filled with configs from the latter.
126126
*/
127127
private def mergeDefaultSparkProperties(): Unit = {
128-
// Use common defaults file, if not specified by user
129-
propertiesFile = Option(propertiesFile).getOrElse(Utils.getDefaultPropertiesFile(env))
130-
// Honor --conf before the defaults file
128+
// Honor --conf before the specified properties file and defaults file
131129
defaultSparkProperties.foreach { case (k, v) =>
132130
if (!sparkProperties.contains(k)) {
133131
sparkProperties(k) = v
134132
}
135133
}
134+
135+
// Also load properties from `spark-defaults.conf` if they do not exist in the properties file
136+
// and --conf list
137+
val defaultSparkConf = Utils.getDefaultPropertiesFile(env)
138+
Option(defaultSparkConf).foreach { filename =>
139+
val properties = Utils.getPropertiesFromFile(filename)
140+
properties.foreach { case (k, v) =>
141+
if (!sparkProperties.contains(k)) {
142+
sparkProperties(k) = v
143+
}
144+
}
145+
}
146+
147+
if (propertiesFile == null) {
148+
propertiesFile = defaultSparkConf
149+
}
136150
}
137151

138152
/**

core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,6 +1113,23 @@ class SparkSubmitSuite
11131113
}
11141114
}
11151115

1116+
test("SPARK-48392: Allow both spark-defaults.conf and properties file") {
1117+
forConfDir(Map("spark.executor.memory" -> "3g")) { path =>
1118+
withPropertyFile("spark-conf.properties", Map("spark.executor.cores" -> "16")) { propsFile =>
1119+
val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
1120+
val args = Seq(
1121+
"--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
1122+
"--name", "testApp",
1123+
"--master", "local",
1124+
"--properties-file", propsFile,
1125+
unusedJar.toString)
1126+
val appArgs = new SparkSubmitArguments(args, env = Map("SPARK_CONF_DIR" -> path))
1127+
appArgs.executorMemory should be("3g")
1128+
appArgs.executorCores should be("16")
1129+
}
1130+
}
1131+
}
1132+
11161133
test("support glob path") {
11171134
withTempDir { tmpJarDir =>
11181135
withTempDir { tmpFileDir =>
@@ -1623,6 +1640,22 @@ class SparkSubmitSuite
16231640
}
16241641
}
16251642

1643+
private def withPropertyFile(fileName: String, conf: Map[String, String])(f: String => Unit) = {
1644+
withTempDir { tmpDir =>
1645+
val props = new java.util.Properties()
1646+
val propsFile = File.createTempFile(fileName, "", tmpDir)
1647+
val propsOutputStream = new FileOutputStream(propsFile)
1648+
try {
1649+
conf.foreach { case (k, v) => props.put(k, v) }
1650+
props.store(propsOutputStream, "")
1651+
} finally {
1652+
propsOutputStream.close()
1653+
}
1654+
1655+
f(propsFile.getPath)
1656+
}
1657+
}
1658+
16261659
private def updateConfWithFakeS3Fs(conf: Configuration): Unit = {
16271660
conf.set("fs.s3a.impl", classOf[TestFileSystem].getCanonicalName)
16281661
conf.set("fs.s3a.impl.disable.cache", "true")
@@ -1694,40 +1727,31 @@ class SparkSubmitSuite
16941727
val infixDelimFromFile = s"${delimKey}infixDelimFromFile" -> s"${CR}blah${LF}"
16951728
val nonDelimSpaceFromFile = s"${delimKey}nonDelimSpaceFromFile" -> " blah\f"
16961729

1697-
val testProps = Seq(leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile,
1730+
val testProps = Map(leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile,
16981731
nonDelimSpaceFromFile)
16991732

1700-
val props = new java.util.Properties()
1701-
val propsFile = File.createTempFile("test-spark-conf", ".properties",
1702-
Utils.createTempDir())
1703-
val propsOutputStream = new FileOutputStream(propsFile)
1704-
try {
1705-
testProps.foreach { case (k, v) => props.put(k, v) }
1706-
props.store(propsOutputStream, "test whitespace")
1707-
} finally {
1708-
propsOutputStream.close()
1709-
}
1733+
withPropertyFile("test-spark-conf.properties", testProps) { propsFile =>
1734+
val clArgs = Seq(
1735+
"--class", "org.SomeClass",
1736+
"--conf", s"${lineFeedFromCommandLine._1}=${lineFeedFromCommandLine._2}",
1737+
"--conf", "spark.master=yarn",
1738+
"--properties-file", propsFile,
1739+
"thejar.jar")
17101740

1711-
val clArgs = Seq(
1712-
"--class", "org.SomeClass",
1713-
"--conf", s"${lineFeedFromCommandLine._1}=${lineFeedFromCommandLine._2}",
1714-
"--conf", "spark.master=yarn",
1715-
"--properties-file", propsFile.getPath,
1716-
"thejar.jar")
1741+
val appArgs = new SparkSubmitArguments(clArgs)
1742+
val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
17171743

1718-
val appArgs = new SparkSubmitArguments(clArgs)
1719-
val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
1744+
Seq(
1745+
lineFeedFromCommandLine,
1746+
leadingDelimKeyFromFile,
1747+
trailingDelimKeyFromFile,
1748+
infixDelimFromFile
1749+
).foreach { case (k, v) =>
1750+
conf.get(k) should be (v)
1751+
}
17201752

1721-
Seq(
1722-
lineFeedFromCommandLine,
1723-
leadingDelimKeyFromFile,
1724-
trailingDelimKeyFromFile,
1725-
infixDelimFromFile
1726-
).foreach { case (k, v) =>
1727-
conf.get(k) should be (v)
1753+
conf.get(nonDelimSpaceFromFile._1) should be ("blah")
17281754
}
1729-
1730-
conf.get(nonDelimSpaceFromFile._1) should be ("blah")
17311755
}
17321756

17331757
test("get a Spark configuration from arguments") {

docs/configuration.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,15 @@ each line consists of a key and a value separated by whitespace. For example:
111111
spark.eventLog.enabled true
112112
spark.serializer org.apache.spark.serializer.KryoSerializer
113113

114+
In addition, a property file with Spark configurations can be passed to `bin/spark-submit` via
115+
the `--properties-file` parameter.
116+
114117
Any values specified as flags or in the properties file will be passed on to the application
115118
and merged with those specified through SparkConf. Properties set directly on the SparkConf
116-
take highest precedence, then flags passed to `spark-submit` or `spark-shell`, then options
117-
in the `spark-defaults.conf` file. A few configuration keys have been renamed since earlier
118-
versions of Spark; in such cases, the older key names are still accepted, but take lower
119-
precedence than any instance of the newer key.
119+
take the highest precedence, then those through `--conf` flags or `--properties-file` passed to
120+
`spark-submit` or `spark-shell`, then options in the `spark-defaults.conf` file. A few
121+
configuration keys have been renamed since earlier versions of Spark; in such cases, the older
122+
key names are still accepted, but take lower precedence than any instance of the newer key.
120123

121124
Spark properties mainly can be divided into two kinds: one is related to deploy, like
122125
"spark.driver.memory", "spark.executor.instances", this kind of properties may not be affected when

0 commit comments

Comments
 (0)