Skip to content

Commit d3346eb

Browse files
sryzadongjoon-hyun
authored andcommitted
[SPARK-54440][SDP] Give default pipeline spec file more idiomatic name, spark-pipeline.yml
### What changes were proposed in this pull request? Changes the default pipelines configuration file from pipeline.yml to spark-pipeline.yml ### Why are the changes needed? In the current implementation of Declarative Pipelines, the default name for the pipeline configuration YML file is "pipeline.yml". This is inconsistent with other user-provided Spark configuration file names: - spark-env.sh - spark-defaults.conf Changing it to spark-pipeline.yml would be more consistent. ### Does this PR introduce _any_ user-facing change? Not to anything released. ### How was this patch tested? Covered by unit tests. Updated tests. ### Was this patch authored or co-authored using generative AI tooling? Closes #53144 from sryza/spark-pipeline.yml. Authored-by: Sandy Ryza <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 6578b9b commit d3346eb

File tree

6 files changed

+20
-20
lines changed

6 files changed

+20
-20
lines changed

core/src/test/scala/org/apache/spark/deploy/SparkPipelinesSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class SparkPipelinesSuite extends SparkSubmitTestUtils with BeforeAndAfterEach {
5858
val args = Array(
5959
"run",
6060
"--spec",
61-
"pipeline.yml"
61+
"spark-pipeline.yml"
6262
)
6363
assert(
6464
SparkPipelines.constructSparkSubmitArgs(
@@ -71,7 +71,7 @@ class SparkPipelinesSuite extends SparkSubmitTestUtils with BeforeAndAfterEach {
7171
"abc/python/pyspark/pipelines/cli.py",
7272
"run",
7373
"--spec",
74-
"pipeline.yml"
74+
"spark-pipeline.yml"
7575
)
7676
)
7777
}
@@ -83,7 +83,7 @@ class SparkPipelinesSuite extends SparkSubmitTestUtils with BeforeAndAfterEach {
8383
"run",
8484
"--supervise",
8585
"--spec",
86-
"pipeline.yml",
86+
"spark-pipeline.yml",
8787
"--conf",
8888
"spark.conf2=3"
8989
)
@@ -101,7 +101,7 @@ class SparkPipelinesSuite extends SparkSubmitTestUtils with BeforeAndAfterEach {
101101
"abc/python/pyspark/pipelines/cli.py",
102102
"run",
103103
"--spec",
104-
"pipeline.yml"
104+
"spark-pipeline.yml"
105105
)
106106
)
107107
}

docs/declarative-pipelines-programming-guide.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ configuration:
9696
spark.sql.shuffle.partitions: "1000"
9797
```
9898
99-
It's conventional to name pipeline spec files `pipeline.yml`.
99+
It's conventional to name pipeline spec files `spark-pipeline.yml`.
100100

101101
The `spark-pipelines init` command, described below, makes it easy to generate a pipeline project with default configuration and directory structure.
102102

@@ -113,7 +113,7 @@ The `spark-pipelines` command line interface (CLI) is the primary way to execute
113113

114114
### `spark-pipelines run`
115115

116-
`spark-pipelines run` launches an execution of a pipeline and monitors its progress until it completes. The `--spec` parameter allows selecting the pipeline spec file. If not provided, the CLI will look in the current directory and parent directories for a file named `pipeline.yml` or `pipeline.yaml`.
116+
`spark-pipelines run` launches an execution of a pipeline and monitors its progress until it completes. The `--spec` parameter allows selecting the pipeline spec file. If not provided, the CLI will look in the current directory and parent directories for a file named `spark-pipeline.yml` or `spark-pipeline.yaml`.
117117

118118
### `spark-pipelines dry-run`
119119

python/pyspark/errors/error-conditions.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -908,7 +908,7 @@
908908
},
909909
"PIPELINE_SPEC_FILE_NOT_FOUND": {
910910
"message": [
911-
"No pipeline.yaml or pipeline.yml file provided in arguments or found in directory `<dir_path>` or readable ancestor directories."
911+
"No spark-pipeline.yaml or spark-pipeline.yml file provided in arguments or found in directory `<dir_path>` or readable ancestor directories."
912912
]
913913
},
914914
"PIPELINE_SPEC_INVALID_GLOB_PATTERN": {

python/pyspark/pipelines/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151

5252
from pyspark.pipelines.add_pipeline_analysis_context import add_pipeline_analysis_context
5353

54-
PIPELINE_SPEC_FILE_NAMES = ["pipeline.yaml", "pipeline.yml"]
54+
PIPELINE_SPEC_FILE_NAMES = ["spark-pipeline.yaml", "spark-pipeline.yml"]
5555

5656

5757
@dataclass(frozen=True)

python/pyspark/pipelines/init_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def init(name: str) -> None:
5959
storage_path = f"file://{storage_dir.resolve()}"
6060

6161
# Write the spec file to the project directory
62-
spec_file = project_dir / "pipeline.yml"
62+
spec_file = project_dir / "spark-pipeline.yml"
6363
with open(spec_file, "w") as f:
6464
spec_content = SPEC.replace("{{ name }}", name).replace("{{ storage_root }}", storage_path)
6565
f.write(spec_content)

python/pyspark/pipelines/tests/test_cli.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def test_unpack_pipeline_spec_bad_configuration(self):
191191

192192
def test_find_pipeline_spec_in_current_directory(self):
193193
with tempfile.TemporaryDirectory() as temp_dir:
194-
spec_path = Path(temp_dir) / "pipeline.yaml"
194+
spec_path = Path(temp_dir) / "spark-pipeline.yaml"
195195
with spec_path.open("w") as f:
196196
f.write(
197197
"""
@@ -208,7 +208,7 @@ def test_find_pipeline_spec_in_current_directory(self):
208208

209209
def test_find_pipeline_spec_in_current_directory_yml(self):
210210
with tempfile.TemporaryDirectory() as temp_dir:
211-
spec_path = Path(temp_dir) / "pipeline.yml"
211+
spec_path = Path(temp_dir) / "spark-pipeline.yml"
212212
with spec_path.open("w") as f:
213213
f.write(
214214
"""
@@ -225,10 +225,10 @@ def test_find_pipeline_spec_in_current_directory_yml(self):
225225

226226
def test_find_pipeline_spec_in_current_directory_yml_and_yaml(self):
227227
with tempfile.TemporaryDirectory() as temp_dir:
228-
with (Path(temp_dir) / "pipeline.yml").open("w") as f:
228+
with (Path(temp_dir) / "spark-pipeline.yml").open("w") as f:
229229
f.write("")
230230

231-
with (Path(temp_dir) / "pipeline.yaml").open("w") as f:
231+
with (Path(temp_dir) / "spark-pipeline.yaml").open("w") as f:
232232
f.write("")
233233

234234
with self.assertRaises(PySparkException) as context:
@@ -241,7 +241,7 @@ def test_find_pipeline_spec_in_parent_directory(self):
241241
parent_dir = Path(temp_dir)
242242
child_dir = Path(temp_dir) / "child"
243243
child_dir.mkdir()
244-
spec_path = parent_dir / "pipeline.yaml"
244+
spec_path = parent_dir / "spark-pipeline.yaml"
245245
with spec_path.open("w") as f:
246246
f.write(
247247
"""
@@ -296,7 +296,7 @@ def mv2():
296296

297297
registry = LocalGraphElementRegistry()
298298
register_definitions(
299-
outer_dir / "pipeline.yaml", registry, spec, self.spark, "test_graph_id"
299+
outer_dir / "spark-pipeline.yaml", registry, spec, self.spark, "test_graph_id"
300300
)
301301
self.assertEqual(len(registry.outputs), 1)
302302
self.assertEqual(registry.outputs[0].name, "mv1")
@@ -319,7 +319,7 @@ def test_register_definitions_file_raises_error(self):
319319
registry = LocalGraphElementRegistry()
320320
with self.assertRaises(RuntimeError) as context:
321321
register_definitions(
322-
outer_dir / "pipeline.yml", registry, spec, self.spark, "test_graph_id"
322+
outer_dir / "spark-pipeline.yml", registry, spec, self.spark, "test_graph_id"
323323
)
324324
self.assertIn("This is a test exception", str(context.exception))
325325

@@ -377,7 +377,7 @@ def test_python_import_current_directory(self):
377377
registry = LocalGraphElementRegistry()
378378
with change_dir(inner_dir2):
379379
register_definitions(
380-
inner_dir1 / "pipeline.yaml",
380+
inner_dir1 / "spark-pipeline.yaml",
381381
registry,
382382
PipelineSpec(
383383
name="test_pipeline",
@@ -394,7 +394,7 @@ def test_python_import_current_directory(self):
394394
def test_full_refresh_all_conflicts_with_full_refresh(self):
395395
with tempfile.TemporaryDirectory() as temp_dir:
396396
# Create a minimal pipeline spec
397-
spec_path = Path(temp_dir) / "pipeline.yaml"
397+
spec_path = Path(temp_dir) / "spark-pipeline.yaml"
398398
with spec_path.open("w") as f:
399399
f.write('{"name": "test_pipeline"}')
400400

@@ -418,7 +418,7 @@ def test_full_refresh_all_conflicts_with_full_refresh(self):
418418
def test_full_refresh_all_conflicts_with_refresh(self):
419419
with tempfile.TemporaryDirectory() as temp_dir:
420420
# Create a minimal pipeline spec
421-
spec_path = Path(temp_dir) / "pipeline.yaml"
421+
spec_path = Path(temp_dir) / "spark-pipeline.yaml"
422422
with spec_path.open("w") as f:
423423
f.write('{"name": "test_pipeline"}')
424424

@@ -443,7 +443,7 @@ def test_full_refresh_all_conflicts_with_refresh(self):
443443
def test_full_refresh_all_conflicts_with_both(self):
444444
with tempfile.TemporaryDirectory() as temp_dir:
445445
# Create a minimal pipeline spec
446-
spec_path = Path(temp_dir) / "pipeline.yaml"
446+
spec_path = Path(temp_dir) / "spark-pipeline.yaml"
447447
with spec_path.open("w") as f:
448448
f.write('{"name": "test_pipeline"}')
449449

0 commit comments

Comments
 (0)