From d5cc3438c47ddfbfdbf5f5d9174869dbad5a7086 Mon Sep 17 00:00:00 2001 From: Adam Nichols Date: Mon, 22 Jul 2024 15:48:14 -0400 Subject: [PATCH] WX-1710 Move option for final output files (#7472) --- centaur/README.md | 22 +++++++++- .../centaur/AbstractCentaurTestCaseSpec.scala | 5 ++- centaur/src/main/resources/reference.conf | 4 +- .../gcpWdlResultsCopying.test | 22 ---------- .../gcpWdlResultsCopyingRelative.test | 22 ---------- .../wdlResultsCopying/gcp/options.json | 6 --- .../gcp/optionsRelative.json | 6 --- ...ge_final_workflow_outputs_dir.options.json | 2 +- .../large_final_workflow_outputs_dir.wdl | 2 +- .../awsWdlResultsCopying.test | 0 .../awsWdlResultsCopyingRelative.test | 0 .../gcpWdlResultsCopying.test | 31 ++++++++++++++ .../gcpWdlResultsCopyingRelative.test | 31 ++++++++++++++ .../gcpWdlResultsMoving.test | 23 +++++++++++ .../gcpWdlResultsMovingFail.test | 23 +++++++++++ .../localWdlResultsCopying.test | 2 + .../localWdlResultsCopyingRelative.test | 2 + .../wdlResultsCopying/aws/options.json | 0 .../aws/optionsRelative.json | 0 .../wdlResultsCopying/gcp/options.json | 9 +++++ .../gcp/optionsRelative.json | 8 ++++ .../wdlResultsCopying/local/options.json | 0 .../local/optionsRelative.json | 0 .../wdlResultsCopying/simpleWorkflow.wdl | 0 .../wdlResultsMoving/gcp/options.json | 7 ++++ .../wdlResultsMoving/gcp/options_fail.json | 7 ++++ .../scala/cromwell/core/WorkflowOptions.scala | 13 ++++++ docs/developers/Centaur.md | 2 +- docs/wf_options/Overview.md | 16 ++++---- .../CopyWorkflowOutputsActor.scala | 40 +++++++++++++++---- 30 files changed, 227 insertions(+), 78 deletions(-) delete mode 100644 centaur/src/main/resources/resultsCopyingTestCases/gcpWdlResultsCopying.test delete mode 100644 centaur/src/main/resources/resultsCopyingTestCases/gcpWdlResultsCopyingRelative.test delete mode 100644 centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/gcp/options.json delete mode 100644 centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/gcp/optionsRelative.json rename centaur/src/main/resources/{ => standardTestCases}/resultsCopyingTestCases/awsWdlResultsCopying.test (100%) rename centaur/src/main/resources/{ => standardTestCases}/resultsCopyingTestCases/awsWdlResultsCopyingRelative.test (100%) create mode 100644 centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsCopying.test create mode 100644 centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsCopyingRelative.test create mode 100644 centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsMoving.test create mode 100644 centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsMovingFail.test rename centaur/src/main/resources/{ => standardTestCases}/resultsCopyingTestCases/localWdlResultsCopying.test (97%) rename centaur/src/main/resources/{ => standardTestCases}/resultsCopyingTestCases/localWdlResultsCopyingRelative.test (97%) rename centaur/src/main/resources/{ => standardTestCases}/resultsCopyingTestCases/wdlResultsCopying/aws/options.json (100%) rename centaur/src/main/resources/{ => standardTestCases}/resultsCopyingTestCases/wdlResultsCopying/aws/optionsRelative.json (100%) create mode 100644 centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/gcp/options.json create mode 100644 centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/gcp/optionsRelative.json rename centaur/src/main/resources/{ => standardTestCases}/resultsCopyingTestCases/wdlResultsCopying/local/options.json (100%) rename centaur/src/main/resources/{ => standardTestCases}/resultsCopyingTestCases/wdlResultsCopying/local/optionsRelative.json (100%) rename centaur/src/main/resources/{ => standardTestCases}/resultsCopyingTestCases/wdlResultsCopying/simpleWorkflow.wdl (100%) create mode 100644 centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsMoving/gcp/options.json create mode 100644 centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsMoving/gcp/options_fail.json diff --git a/centaur/README.md b/centaur/README.md index 7e3566de1ab..738a5b2697b 100644 --- a/centaur/README.md +++ b/centaur/README.md @@ -1 +1,21 @@ -For information on Cromwell's Integration Testing Suite, see the [Cromwell documentation on Centaur](https://cromwell.readthedocs.io/en/develop/developers/Centaur/). \ No newline at end of file +For information on Cromwell's Integration Testing Suite, see the [Cromwell documentation on Centaur](https://cromwell.readthedocs.io/en/develop/developers/Centaur/). + +### `centaur/src/it` + +Classes extending `org.scalatest` that ingest `.test` files and turn them into runnable test suites. + +### `centaur/src/main` + +#### `/resources` + +Collection of `.test` cases. In `test.inc.sh` we map Github Action jobs to case directories with `create_centaur_variables()`. Not all cases are run! + +As of July 2024, Centaur searches **recursively** for `.test` files, so they can be placed in subdirectories along with their resources. + +#### `/scala` + +Functionality to start, stop, and restart the Cromwell server under test. Also contains abstractions for asserting on metadata and workflow outputs. + +### `centaur/src/test` + +Tests for Centaur itself. diff --git a/centaur/src/it/scala/centaur/AbstractCentaurTestCaseSpec.scala b/centaur/src/it/scala/centaur/AbstractCentaurTestCaseSpec.scala index f90fb21e537..b3416abdab8 100644 --- a/centaur/src/it/scala/centaur/AbstractCentaurTestCaseSpec.scala +++ b/centaur/src/it/scala/centaur/AbstractCentaurTestCaseSpec.scala @@ -34,7 +34,7 @@ abstract class AbstractCentaurTestCaseSpec(cromwellBackends: List[String], SuccessReporters.getClass private def testCases(baseFile: File): List[CentaurTestCase] = { - val files = baseFile.list.filter(_.isRegularFile).toList + val files = baseFile.listRecursively.filter(isTestFile).toList val testCases = files.traverse(CentaurTestCase.fromFile(cromwellTracker)) testCases match { @@ -43,6 +43,9 @@ abstract class AbstractCentaurTestCaseSpec(cromwellBackends: List[String], } } + private def isTestFile(file: File) = + file.isRegularFile && file.extension.contains(".test") + def allTestCases: List[CentaurTestCase] = { val optionalTestCases = CentaurConfig.optionalTestPath map (File(_)) map testCases getOrElse List.empty val standardTestCases = testCases(CentaurConfig.standardTestCasePath) diff --git a/centaur/src/main/resources/reference.conf b/centaur/src/main/resources/reference.conf index ac85c5c0a87..df51bc0300e 100644 --- a/centaur/src/main/resources/reference.conf +++ b/centaur/src/main/resources/reference.conf @@ -56,9 +56,9 @@ centaur { genomics.endpoint-url = ${?CROMWELL_BUILD_PAPI_ENDPOINT_URL} genomics.location = "us-central1" batch.location = "us-central1" - auth = "Error: BA-6546 The environment variable CROMWELL_BUILD_PAPI_AUTH_MODE must be set/export pointing to a valid auth such as 'application-default'" + auth = "service-account" auth = ${?CROMWELL_BUILD_PAPI_AUTH_MODE} - json-dir = "Error: BA-6546 The environment variable CROMWELL_BUILD_RESOURCES_DIRECTORY must be set/export pointing to a valid path such as 'target/ci/resources'" + json-dir = "target/ci/resources" json-dir = ${?CROMWELL_BUILD_RESOURCES_DIRECTORY} auths = [ { diff --git a/centaur/src/main/resources/resultsCopyingTestCases/gcpWdlResultsCopying.test b/centaur/src/main/resources/resultsCopyingTestCases/gcpWdlResultsCopying.test deleted file mode 100644 index 9389551c8b0..00000000000 --- a/centaur/src/main/resources/resultsCopyingTestCases/gcpWdlResultsCopying.test +++ /dev/null @@ -1,22 +0,0 @@ -name: gcpWdlResultsCopying -testFormat: workflowsuccess -backends: [Papiv2] -tags: ["copyGcp"] - -files { - workflow: wdlResultsCopying/simpleWorkflow.wdl - options: wdlResultsCopying/gcp/options.json -} - -metadata { - status: Succeeded -} - -fileSystemCheck: "gcs" -outputExpectations: { - "gs://<>/wf_results/simpleWorkflow/<>/call-simpleStdoutTask/output.txt": 1 - "gs://<>/wf_logs/workflow.<>.log": 1 - "gs://<>/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/stderr": 1 - "gs://<>/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/stdout": 1 - "gs://<>/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/simpleStdoutTask.log": 1 -} diff --git a/centaur/src/main/resources/resultsCopyingTestCases/gcpWdlResultsCopyingRelative.test b/centaur/src/main/resources/resultsCopyingTestCases/gcpWdlResultsCopyingRelative.test deleted file mode 100644 index 5f7373929f4..00000000000 --- a/centaur/src/main/resources/resultsCopyingTestCases/gcpWdlResultsCopyingRelative.test +++ /dev/null @@ -1,22 +0,0 @@ -name: gcpWdlResultsCopyingRelative -testFormat: workflowsuccess -backends: [Papiv2] -tags: ["copyGcp"] - -files { - workflow: wdlResultsCopying/simpleWorkflow.wdl - options: wdlResultsCopying/gcp/optionsRelative.json -} - -metadata { - status: Succeeded -} - -fileSystemCheck: "gcs" -outputExpectations: { - "gs://<>/wf_results/output.txt": 1 - "gs://<>/wf_logs/workflow.<>.log": 1 - "gs://<>/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/stderr": 1 - "gs://<>/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/stdout": 1 - "gs://<>/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/simpleStdoutTask.log": 1 -} diff --git a/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/gcp/options.json b/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/gcp/options.json deleted file mode 100644 index 2ac0db86fdb..00000000000 --- a/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/gcp/options.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "use_relative_output_paths":false, - "final_workflow_outputs_dir":"gs://<>/wf_results", - "final_workflow_log_dir":"gs://<>/wf_logs", - "final_call_logs_dir":"gs://<>/cl_logs" -} diff --git a/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/gcp/optionsRelative.json b/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/gcp/optionsRelative.json deleted file mode 100644 index dd2deb3ff16..00000000000 --- a/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/gcp/optionsRelative.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "use_relative_output_paths":true, - "final_workflow_outputs_dir":"gs://<>/wf_results", - "final_workflow_log_dir":"gs://<>/wf_logs", - "final_call_logs_dir":"gs://<>/cl_logs" -} diff --git a/centaur/src/main/resources/standardTestCases/large_final_workflow_outputs_dir/large_final_workflow_outputs_dir.options.json b/centaur/src/main/resources/standardTestCases/large_final_workflow_outputs_dir/large_final_workflow_outputs_dir.options.json index d0a29996622..41484567d76 100644 --- a/centaur/src/main/resources/standardTestCases/large_final_workflow_outputs_dir/large_final_workflow_outputs_dir.options.json +++ b/centaur/src/main/resources/standardTestCases/large_final_workflow_outputs_dir/large_final_workflow_outputs_dir.options.json @@ -1,3 +1,3 @@ { - "final_workflow_outputs_dir": "gs://cloud-cromwell-dev-self-cleaning-fast" + "final_workflow_outputs_dir": "gs://centaur-ci-us-east1" } diff --git a/centaur/src/main/resources/standardTestCases/large_final_workflow_outputs_dir/large_final_workflow_outputs_dir.wdl b/centaur/src/main/resources/standardTestCases/large_final_workflow_outputs_dir/large_final_workflow_outputs_dir.wdl index a25cf907a08..417a17719ae 100644 --- a/centaur/src/main/resources/standardTestCases/large_final_workflow_outputs_dir/large_final_workflow_outputs_dir.wdl +++ b/centaur/src/main/resources/standardTestCases/large_final_workflow_outputs_dir/large_final_workflow_outputs_dir.wdl @@ -6,7 +6,7 @@ workflow large_final_workflow_outputs_dir { # In this case we're copying by using final_workflow_outputs_dir functionality. # # Because the file used in the test is large, via the workflow options we copy to - # gs://cloud-cromwell-dev-self-cleaning-fast which is setup with a short lifecycle for deletion of objects. + # gs://centaur-ci-us-east1 which is setup with a short lifecycle for deletion of objects. # # See also https://github.com/broadinstitute/rawls/blob/c39049945867d9d6d1bb5e1cbda30a09a19147f7/automation/src/test/scala/org/broadinstitute/dsde/test/api/RawlsApiSpec.scala#L768-L783 # diff --git a/centaur/src/main/resources/resultsCopyingTestCases/awsWdlResultsCopying.test b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/awsWdlResultsCopying.test similarity index 100% rename from centaur/src/main/resources/resultsCopyingTestCases/awsWdlResultsCopying.test rename to centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/awsWdlResultsCopying.test diff --git a/centaur/src/main/resources/resultsCopyingTestCases/awsWdlResultsCopyingRelative.test b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/awsWdlResultsCopyingRelative.test similarity index 100% rename from centaur/src/main/resources/resultsCopyingTestCases/awsWdlResultsCopyingRelative.test rename to centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/awsWdlResultsCopyingRelative.test diff --git a/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsCopying.test b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsCopying.test new file mode 100644 index 00000000000..dc81a00383e --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsCopying.test @@ -0,0 +1,31 @@ +name: gcpWdlResultsCopying +testFormat: workflowsuccess +tags: ["copyGcp"] + +# Will run on a Cromwell that supports any one of these backends +backendsMode: any + +# Asserting on the source file `gs://cloud-cromwell-dev-self-cleaning/.../simpleStdoutTask.log` currently fails on Batch. +# This is because Batch does not produce a `simpleStdoutTask.log` and instead sends logs go to Cloud Logging. Burwood is going to add a config to allow the old behavior. +# backends: [Papi, Papiv2, GCPBatch] +backends: [Papi, Papiv2] + +files { + workflow: wdlResultsCopying/simpleWorkflow.wdl + options: wdlResultsCopying/gcp/options.json +} + +metadata { + status: Succeeded +} + +fileSystemCheck: "gcs" +outputExpectations: { + "gs://centaur-ci-us-east1/wf_results/simpleWorkflow/<>/call-simpleStdoutTask/output.txt": 1 + "gs://centaur-ci-us-east1/wf_logs/workflow.<>.log": 1 + "gs://centaur-ci-us-east1/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/stderr": 1 + "gs://centaur-ci-us-east1/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/stdout": 1 + "gs://centaur-ci-us-east1/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/simpleStdoutTask.log": 1 + "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci/simpleWorkflow/<>/call-simpleStdoutTask/simpleStdoutTask.log": 1 + "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci/simpleWorkflow/<>/call-simpleStdoutTask/output.txt": 1 +} diff --git a/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsCopyingRelative.test b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsCopyingRelative.test new file mode 100644 index 00000000000..887c7906f98 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsCopyingRelative.test @@ -0,0 +1,31 @@ +name: gcpWdlResultsCopyingRelative +testFormat: workflowsuccess +tags: ["copyGcp"] + +# Will run on a Cromwell that supports any one of these backends +backendsMode: any + +# Asserting on the source file `gs://cloud-cromwell-dev-self-cleaning/.../simpleStdoutTask.log` currently fails on Batch. +# This is because Batch does not produce a `simpleStdoutTask.log` and instead sends logs go to Cloud Logging. Burwood is going to add a config to allow the old behavior. +# backends: [Papi, Papiv2, GCPBatch] +backends: [Papi, Papiv2] + +files { + workflow: wdlResultsCopying/simpleWorkflow.wdl + options: wdlResultsCopying/gcp/optionsRelative.json +} + +metadata { + status: Succeeded +} + +fileSystemCheck: "gcs" +outputExpectations: { + "gs://centaur-ci-us-east1/wf_results/output.txt": 1 + "gs://centaur-ci-us-east1/wf_logs/workflow.<>.log": 1 + "gs://centaur-ci-us-east1/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/stderr": 1 + "gs://centaur-ci-us-east1/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/stdout": 1 + "gs://centaur-ci-us-east1/cl_logs/simpleWorkflow/<>/call-simpleStdoutTask/simpleStdoutTask.log": 1 + "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci/simpleWorkflow/<>/call-simpleStdoutTask/simpleStdoutTask.log": 1 + "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci/simpleWorkflow/<>/call-simpleStdoutTask/output.txt": 1 +} diff --git a/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsMoving.test b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsMoving.test new file mode 100644 index 00000000000..2a863ee2b44 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsMoving.test @@ -0,0 +1,23 @@ +name: gcpWdlResultsMoving +testFormat: workflowsuccess +tags: ["copyGcp"] + +# Will run on a Cromwell that supports any one of these backends +backendsMode: any +backends: [Papi, Papiv2, GCPBatch] + +files { + workflow: wdlResultsCopying/simpleWorkflow.wdl + options: wdlResultsMoving/gcp/options.json +} + +metadata { + status: Succeeded +} + +# The `centaur-ci-us-east1` bucket is in a different region than the workflow runs +fileSystemCheck: "gcs" +outputExpectations: { + "gs://centaur-ci-us-east1/move_destination/simpleWorkflow/<>/call-simpleStdoutTask/output.txt": 1 + "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci/simpleWorkflow/<>/call-simpleStdoutTask/output.txt": 0 +} diff --git a/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsMovingFail.test b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsMovingFail.test new file mode 100644 index 00000000000..2961783cf12 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/gcpWdlResultsMovingFail.test @@ -0,0 +1,23 @@ +name: gcpWdlResultsMovingFail +testFormat: workflowfailure +tags: ["copyGcp"] + +# Will run on a Cromwell that supports any one of these backends +backendsMode: any +backends: [Papi, Papiv2, GCPBatch] + +files { + workflow: wdlResultsCopying/simpleWorkflow.wdl + options: wdlResultsMoving/gcp/options_fail.json +} + +metadata { + status: Failed +} + +# The copy to non-existent bucket failed so the delete should not have happened +# (compare to `gcpWdlResultsMoving.test`) +fileSystemCheck: "gcs" +outputExpectations: { + "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci/simpleWorkflow/<>/call-simpleStdoutTask/output.txt": 1 +} diff --git a/centaur/src/main/resources/resultsCopyingTestCases/localWdlResultsCopying.test b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/localWdlResultsCopying.test similarity index 97% rename from centaur/src/main/resources/resultsCopyingTestCases/localWdlResultsCopying.test rename to centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/localWdlResultsCopying.test index 2adc9ce4776..367b967122e 100644 --- a/centaur/src/main/resources/resultsCopyingTestCases/localWdlResultsCopying.test +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/localWdlResultsCopying.test @@ -2,6 +2,8 @@ name: localWdlResultsCopying testFormat: workflowsuccess tags: ["copyLocal"] +ignore: true + files { workflow: wdlResultsCopying/simpleWorkflow.wdl options: wdlResultsCopying/local/options.json diff --git a/centaur/src/main/resources/resultsCopyingTestCases/localWdlResultsCopyingRelative.test b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/localWdlResultsCopyingRelative.test similarity index 97% rename from centaur/src/main/resources/resultsCopyingTestCases/localWdlResultsCopyingRelative.test rename to centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/localWdlResultsCopyingRelative.test index a2feda61877..a8e5abbc01d 100644 --- a/centaur/src/main/resources/resultsCopyingTestCases/localWdlResultsCopyingRelative.test +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/localWdlResultsCopyingRelative.test @@ -2,6 +2,8 @@ name: localWdlResultsCopyingRelative testFormat: workflowsuccess tags: ["copyLocal"] +ignore: true + files { workflow: wdlResultsCopying/simpleWorkflow.wdl options: wdlResultsCopying/local/optionsRelative.json diff --git a/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/aws/options.json b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/aws/options.json similarity index 100% rename from centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/aws/options.json rename to centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/aws/options.json diff --git a/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/aws/optionsRelative.json b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/aws/optionsRelative.json similarity index 100% rename from centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/aws/optionsRelative.json rename to centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/aws/optionsRelative.json diff --git a/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/gcp/options.json b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/gcp/options.json new file mode 100644 index 00000000000..508b91e4d63 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/gcp/options.json @@ -0,0 +1,9 @@ +{ + "use_relative_output_paths": false, + "final_workflow_outputs_dir": "gs://centaur-ci-us-east1/wf_results", + "final_workflow_outputs_mode": "copy", + "final_workflow_log_dir": "gs://centaur-ci-us-east1/wf_logs", + "final_call_logs_dir": "gs://centaur-ci-us-east1/cl_logs", + "read_from_cache": false, + "write_to_cache": false +} diff --git a/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/gcp/optionsRelative.json b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/gcp/optionsRelative.json new file mode 100644 index 00000000000..c492ad6c5bd --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/gcp/optionsRelative.json @@ -0,0 +1,8 @@ +{ + "use_relative_output_paths":true, + "final_workflow_outputs_dir":"gs://centaur-ci-us-east1/wf_results", + "final_workflow_log_dir":"gs://centaur-ci-us-east1/wf_logs", + "final_call_logs_dir":"gs://centaur-ci-us-east1/cl_logs", + "read_from_cache": false, + "write_to_cache": false +} diff --git a/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/local/options.json b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/local/options.json similarity index 100% rename from centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/local/options.json rename to centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/local/options.json diff --git a/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/local/optionsRelative.json b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/local/optionsRelative.json similarity index 100% rename from centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/local/optionsRelative.json rename to centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/local/optionsRelative.json diff --git a/centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/simpleWorkflow.wdl b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/simpleWorkflow.wdl similarity index 100% rename from centaur/src/main/resources/resultsCopyingTestCases/wdlResultsCopying/simpleWorkflow.wdl rename to centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsCopying/simpleWorkflow.wdl diff --git a/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsMoving/gcp/options.json b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsMoving/gcp/options.json new file mode 100644 index 00000000000..8ec4bddb755 --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsMoving/gcp/options.json @@ -0,0 +1,7 @@ +{ + "jes_gcs_root": "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci", + "final_workflow_outputs_dir": "gs://centaur-ci-us-east1/move_destination", + "final_workflow_outputs_mode": "move", + "read_from_cache": false, + "write_to_cache": false +} diff --git a/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsMoving/gcp/options_fail.json b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsMoving/gcp/options_fail.json new file mode 100644 index 00000000000..90dc3c5c4ff --- /dev/null +++ b/centaur/src/main/resources/standardTestCases/resultsCopyingTestCases/wdlResultsMoving/gcp/options_fail.json @@ -0,0 +1,7 @@ +{ + "jes_gcs_root": "gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/ci", + "final_workflow_outputs_dir": "gs://non-existent-bucket/move_destination", + "final_workflow_outputs_mode": "move", + "read_from_cache": false, + "write_to_cache": false +} diff --git a/core/src/main/scala/cromwell/core/WorkflowOptions.scala b/core/src/main/scala/cromwell/core/WorkflowOptions.scala index cbdb1201986..653a766354f 100644 --- a/core/src/main/scala/cromwell/core/WorkflowOptions.scala +++ b/core/src/main/scala/cromwell/core/WorkflowOptions.scala @@ -56,6 +56,19 @@ object WorkflowOptions { case object FinalCallLogsDir extends WorkflowOption("final_call_logs_dir") case object FinalWorkflowOutputsDir extends WorkflowOption("final_workflow_outputs_dir") case object UseRelativeOutputPaths extends WorkflowOption(name = "use_relative_output_paths") + case object FinalWorkflowOutputsMode extends WorkflowOption("final_workflow_outputs_mode") { + // Default to Copy because that was originally the only behavior + def fromString(s: Option[String]): FinalWorkflowOutputsMode = + s match { + case Some("copy") => Copy + case Some("move") => Move + case _ => Copy + } + } + + sealed trait FinalWorkflowOutputsMode + case object Copy extends FinalWorkflowOutputsMode + case object Move extends FinalWorkflowOutputsMode // Misc. case object DefaultRuntimeOptions extends WorkflowOption("default_runtime_attributes") diff --git a/docs/developers/Centaur.md b/docs/developers/Centaur.md index bbbb011ba32..e5ba6a4e83b 100644 --- a/docs/developers/Centaur.md +++ b/docs/developers/Centaur.md @@ -5,7 +5,7 @@ Centaur is an integration testing suite for the [Cromwell](http://github.com/bro Centaur expects to find a Cromwell server properly configured and running in server mode, listening on port 8000. This can be configured by modifying the `cromwellUrl` parameter in `application.conf`. -You can get a build of your current cromwell code with [these instructions](Building.md). +You can get a build of your current Cromwell code with [these instructions](Building.md). The server can be run with `java -jar server`, checkout [this page](../CommandLine.md) for more detailed instructions. You can now run the tests from another terminal. diff --git a/docs/wf_options/Overview.md b/docs/wf_options/Overview.md index 9fcd7317eea..e981c9e25e5 100644 --- a/docs/wf_options/Overview.md +++ b/docs/wf_options/Overview.md @@ -75,12 +75,13 @@ Example `options.json`: ``` ## Output Copying -|Option|Value|Description| -|---|---|---| -|`final_workflow_outputs_dir`|A directory available to Cromwell|Specifies a path where final workflow outputs will be written. If this is not specified, workflow outputs will not be copied out of the Cromwell workflow execution directory/path.| -|`use_relative_output_paths`| A boolean | When set to `true` this will copy all the outputs relative to their execution directory. my_final_workflow_outputs_dir/~~MyWorkflow/af76876d8-6e8768fa/call-MyTask/execution/~~output_of_interest . Cromwell will throw an exception when this leads to collisions. When the option is not set it will default to `false`.| -|`final_workflow_log_dir`|A directory available to Cromwell|Specifies a path where per-workflow logs will be written. If this is not specified, per-workflow logs will not be copied out of the Cromwell workflow log temporary directory/path before they are deleted.| -|`final_call_logs_dir`|A directory available to Cromwell|Specifies a path where final call logs will be written. If this is not specified, call logs will not be copied out of the Cromwell workflow execution directory/path.| +|Option| Value | Description | +|---|-----------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +|`final_workflow_outputs_dir`| A directory available to Cromwell | Specifies a path where final workflow outputs will be written. If this is not specified, workflow outputs will not be copied out of the Cromwell workflow execution directory/path. | +|`final_workflow_outputs_mode`| `"copy"` or `"move"` | `"copy"` is the default and preserves the source files. `"move"` performs a copy-delete sequence to clean up the source.

Note: as of this writing, the `/outputs` endpoint points to the source location. It is planned that for the `"move"` option only, `/outputs` will point to the destination. +|`use_relative_output_paths`| A boolean | When set to `true` this will copy all the outputs relative to their execution directory. my_final_workflow_outputs_dir/~~MyWorkflow/af76876d8-6e8768fa/call-MyTask/execution/~~output_of_interest . Cromwell will throw an exception when this leads to collisions. When the option is not set it will default to `false`. | +|`final_workflow_log_dir`| A directory available to Cromwell | Specifies a path where per-workflow logs will be written. If this is not specified, per-workflow logs will not be copied out of the Cromwell workflow log temporary directory/path before they are deleted. | +|`final_call_logs_dir`| A directory available to Cromwell | Specifies a path where final call logs will be written. If this is not specified, call logs will not be copied out of the Cromwell workflow execution directory/path. | Note that these directories should be using the same filesystem as the workflow. Eg if you run on Google's PAPI, you should provide `gs://...` paths. @@ -88,6 +89,7 @@ Example `options.json`: ```json { "final_workflow_outputs_dir": "/Users/michael_scott/cromwell/outputs", + "final_workflow_outputs_mode": "copy", "use_relative_output_paths": true, "final_workflow_log_dir": "/Users/michael_scott/cromwell/wf_logs", "final_call_logs_dir": "/Users/michael_scott/cromwell/call_logs" @@ -107,7 +109,7 @@ final_workflow_outputs_dir/my_output_picture.jpg final_workflow_outputs_dir/created_subdir/submarine.txt ``` -This will create file collisions in `final_workflow_outputs_dir` when a workflow is run twice. When cromwell +This will create file collisions in `final_workflow_outputs_dir` when a workflow is run twice. When Cromwell detects file collisions it will throw an error and report the workflow as failed. ## Call Caching Options diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/CopyWorkflowOutputsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/CopyWorkflowOutputsActor.scala index 142a6623fcd..107c0048b8a 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/CopyWorkflowOutputsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/CopyWorkflowOutputsActor.scala @@ -66,10 +66,7 @@ class CopyWorkflowOutputsActor(workflowId: WorkflowId, } } - private def copyWorkflowOutputs(workflowOutputsFilePath: String): Future[Seq[Unit]] = { - val workflowOutputsPath = buildPath(workflowOutputsFilePath) - val outputFilePaths = getOutputFilePaths(workflowOutputsPath) - + private def markDuplicates(outputFilePaths: List[(Path, Path)]) = { // Check if there are duplicated destination paths and throw an exception if that is the case. // This creates a map of destinations and source paths which point to them in cases where there are multiple // source paths that point to the same destination. @@ -89,6 +86,13 @@ class CopyWorkflowOutputsActor(workflowId: WorkflowId, s" as multiple files will be copied to the same path: \n${formattedCollidingCopyOptions.mkString("\n")}" ) } + } + + private def copyWorkflowOutputs(workflowOutputsFilePath: String): Future[Seq[Unit]] = { + val workflowOutputsPath = buildPath(workflowOutputsFilePath) + val outputFilePaths = getOutputFilePaths(workflowOutputsPath) + + markDuplicates(outputFilePaths) val copies = outputFilePaths map { case (srcPath, dstPath) => asyncIo.copyAsync(srcPath, dstPath) @@ -97,6 +101,21 @@ class CopyWorkflowOutputsActor(workflowId: WorkflowId, Future.sequence(copies) } + private def moveWorkflowOutputs(workflowOutputsFilePath: String): Future[Seq[Unit]] = { + val workflowOutputsPath = buildPath(workflowOutputsFilePath) + val outputFilePaths = getOutputFilePaths(workflowOutputsPath) + + markDuplicates(outputFilePaths) + + val moves = outputFilePaths map { case (srcPath, dstPath) => + asyncIo.copyAsync(srcPath, dstPath) flatMap { _ => + asyncIo.deleteAsync(srcPath) + } + } + + Future.sequence(moves) + } + private def findFiles(values: Seq[WomValue]): Seq[WomSingleFile] = values flatMap { _.collectAsSeq { case file: WomSingleFile => @@ -150,9 +169,14 @@ class CopyWorkflowOutputsActor(workflowId: WorkflowId, /** * Happens after everything else runs */ - final def afterAll()(implicit ec: ExecutionContext): Future[FinalizationResponse] = - workflowDescriptor.getWorkflowOption(FinalWorkflowOutputsDir) match { - case Some(outputs) => copyWorkflowOutputs(outputs) map { _ => FinalizationSuccess } - case None => Future.successful(FinalizationSuccess) + final def afterAll()(implicit ec: ExecutionContext): Future[FinalizationResponse] = { + val maybeOutputsDir = workflowDescriptor.getWorkflowOption(FinalWorkflowOutputsDir) + val mode = FinalWorkflowOutputsMode.fromString(workflowDescriptor.getWorkflowOption(FinalWorkflowOutputsMode)) + + (maybeOutputsDir, mode) match { + case (Some(outputs), Copy) => copyWorkflowOutputs(outputs) map { _ => FinalizationSuccess } + case (Some(outputs), Move) => moveWorkflowOutputs(outputs) map { _ => FinalizationSuccess } + case _ => Future.successful(FinalizationSuccess) } + } }