Skip to content

Commit fb6e269

Browse files
Handle empty group 2 for WDL CWL and Nextflow workflows (#436)
* Use CWL version 1.2 * Add a when condition so Cavatica doesn't get stuck waiting * Preserve the order of input files for the post step in Nextflow
1 parent 023b528 commit fb6e269

File tree

9 files changed

+173
-93
lines changed

9 files changed

+173
-93
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,4 +407,3 @@ In rMATS-turbo, each alternative splicing pattern has a corresponding set of out
407407
- `[datetime]_[id].rmats`: Summary generated from processing a BAM file
408408
- `[datetime]_bam[sample_num]_[replicate_num]/Aligned.sortedByCoord.out.bam`: Result of mapping input FASTQ files
409409
- `[datetime]_read_outcomes_by_bam.txt`: Counts of the reads used from each BAM file along with counts of the reasons that reads could not be used
410-

cwl/pack_cwl_for_sbg.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,15 @@ def parse_args():
1818
return parser.parse_args()
1919

2020

21+
# Rcwl doesn't seem to support []. Instead [null] is converted to [] here.
22+
def replace_singleton_null_default(workflow):
23+
inputs = workflow['inputs']
24+
for value in inputs.values():
25+
default = value.get('default')
26+
if default == [None]:
27+
value['default'] = list()
28+
29+
2130
def convert_steps_to_list(steps):
2231
steps_list = list()
2332
for id_key, step_dict in steps.items():
@@ -107,6 +116,7 @@ def adapt_for_sbg(abs_cwl_path):
107116

108117
replace_steps(loaded)
109118
replace_value_from(loaded)
119+
replace_singleton_null_default(loaded)
110120
inline_other_cwl_files(loaded, base_path)
111121
return loaded
112122

cwl/pl_rMATS_bam.R

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
rmats_docker <- "xinglab/rmats:v4.3.0"
2+
cwl_version <- "v1.2"
23

34
get_array_type <- function(item_type) {
45
return(list(type = "array", items = item_type))
@@ -9,7 +10,9 @@ get_2d_array_type <- function(item_type) {
910

1011
## Workflow inputs
1112
wf_bam_g1_input <- InputParam(id = "wf_bam_g1", type = "File[]", position = -1)
12-
wf_bam_g2_input <- InputParam(id = "wf_bam_g2", type = "File[]", position = -1)
13+
## Rcwl doesn't seem to support []. Convert [null] to [] later
14+
wf_bam_g2_input <- InputParam(id = "wf_bam_g2", type = "File[]?", default = list(NULL),
15+
position = -1)
1316
wf_gtf_input <- InputParam(id = "wf_gtf", type = "File", position = -1)
1417
wf_is_single_end_input <- InputParam(id = "wf_is_single_end", type = "boolean?", default = FALSE,
1518
position = -1)
@@ -90,19 +93,20 @@ prep_read_outcome_output <- OutputParam(id = "prep_read_outcome", type = "File",
9093
glob = "$('prep_' + inputs.prep_bam_id + '_read_outcomes_by_bam.txt')")
9194
prep_bam_name_output <- OutputParam(id = "prep_bam_name", type = "string",
9295
outputEval = "$(inputs.prep_bam.path.split('/').pop())")
93-
rmats_prep <- cwlProcess(baseCommand = "bash script.sh",
94-
requirements = list(prep_docker_req, prep_js_req, prep_init_work_dir_req,
95-
prep_resource_req),
96-
inputs = InputParamList(prep_bam_input, prep_bam_id_input, prep_gtf_input,
97-
prep_is_single_end_input, prep_readLength_input,
98-
prep_out_dir_input, prep_lib_type_input,
99-
prep_variable_read_length_input,
100-
prep_anchorLength_input, prep_novelSS_input,
101-
prep_mil_input, prep_mel_input,
102-
prep_allow_clipping_input, prep_machine_mem_gb_input,
103-
prep_disk_space_gb_input),
104-
outputs = OutputParamList(prep_out_rmats_output, prep_read_outcome_output,
105-
prep_bam_name_output))
96+
rmats_prep <- cwlProcess(cwlVersion = cwl_version,
97+
baseCommand = "bash script.sh",
98+
requirements = list(prep_docker_req, prep_js_req, prep_init_work_dir_req,
99+
prep_resource_req),
100+
inputs = InputParamList(prep_bam_input, prep_bam_id_input, prep_gtf_input,
101+
prep_is_single_end_input, prep_readLength_input,
102+
prep_out_dir_input, prep_lib_type_input,
103+
prep_variable_read_length_input,
104+
prep_anchorLength_input, prep_novelSS_input,
105+
prep_mil_input, prep_mel_input,
106+
prep_allow_clipping_input, prep_machine_mem_gb_input,
107+
prep_disk_space_gb_input),
108+
outputs = OutputParamList(prep_out_rmats_output, prep_read_outcome_output,
109+
prep_bam_name_output))
106110

107111
## Expression tool steps to convert Files to locations.
108112
## This avoids loading all the Files to the disk of a single worker machine.
@@ -112,7 +116,8 @@ exp_file_to_loc_file_input <- InputParam(id = "exp_file_to_loc_file", type = "Fi
112116
exp_file_to_loc_js_req <- requireJS()
113117
exp_file_to_loc_js <- "${return({'exp_file_to_loc_loc': inputs.exp_file_to_loc_file.location})}"
114118
exp_file_to_loc_loc_output <- OutputParam(id = "exp_file_to_loc_loc", type = "string")
115-
exp_file_to_loc <- cwlProcess(cwlClass = "ExpressionTool",
119+
exp_file_to_loc <- cwlProcess(cwlVersion = cwl_version,
120+
cwlClass = "ExpressionTool",
116121
requirements = list(exp_file_to_loc_js_req),
117122
inputs = InputParamList(exp_file_to_loc_file_input),
118123
outputs = OutputParamList(exp_file_to_loc_loc_output),
@@ -142,7 +147,8 @@ exp_bam_id_ids_js <- paste(sep = "\n",
142147
"}",
143148
"return({'exp_bam_id_ids': id_strings})}")
144149
exp_bam_id_ids_output <- OutputParam(id = "exp_bam_id_ids", type = "string[]")
145-
exp_bam_id <- cwlProcess(cwlClass = "ExpressionTool",
150+
exp_bam_id <- cwlProcess(cwlVersion = cwl_version,
151+
cwlClass = "ExpressionTool",
146152
requirements = list(exp_bam_id_js_req),
147153
inputs = InputParamList(exp_bam_id_bams_input, exp_bam_id_prefix_input),
148154
outputs = OutputParamList(exp_bam_id_ids_output),
@@ -151,9 +157,20 @@ exp_bam_id <- cwlProcess(cwlClass = "ExpressionTool",
151157
step_exp_prep_g1 <- cwlStep(id = "step_exp_prep_g1", run = exp_bam_id,
152158
In = list(exp_bam_id_bams = "step_exp_file_to_loc_g1/exp_file_to_loc_loc",
153159
exp_bam_id_prefix = list(valueFrom = "g1_")))
160+
161+
## The 'when' expression will cause this step to be skipped if
162+
## there are no group 2 bams.
163+
## Cavatica would otherwise hang waiting for this step to complete.
164+
## According to the CWL specification it seems like it should be fine to
165+
## run this anyway and get an empty list.
166+
## If it is skipped then the output will be null.
167+
## That should not be an issue for step_prep_g2 since it scatters on
168+
## "prep_bam" and "prep_bam_id" and if any scatter param evaluates to []
169+
## then the step is skipped and produces [] as output.
154170
step_exp_prep_g2 <- cwlStep(id = "step_exp_prep_g2", run = exp_bam_id,
155171
In = list(exp_bam_id_bams = "step_exp_file_to_loc_g2/exp_file_to_loc_loc",
156-
exp_bam_id_prefix = list(valueFrom = "g2_")))
172+
exp_bam_id_prefix = list(valueFrom = "g2_")),
173+
when = "$(inputs.exp_bam_id_bams.length > 0)")
157174

158175
## Prep step scatter over bam_g1
159176
step_prep_g1 <- cwlStep(id = "step_prep_g1",
@@ -225,6 +242,9 @@ post_disk_space_gb_input <- InputParam(id = "post_disk_space_gb", type = "int",
225242

226243
post_script_string <- paste(sep = "\n",
227244
"${",
245+
"var has_g2 = inputs.post_bam_name_g2.length > 0",
246+
"var b2_opt = has_g2 ? '--b2' : ''",
247+
"var b2_val = has_g2 ? 'bam_g2.txt' : ''",
228248
"var anchorLength_opt = inputs.post_anchorLength != null ? '--anchorLength' : ''",
229249
"var anchorLength_string = inputs.post_anchorLength != null ? inputs.post_anchorLength : ''",
230250
"var is_default_stats = (!inputs.post_paired_stats) && (!inputs.post_darts_model)",
@@ -287,7 +307,7 @@ post_script_string <- paste(sep = "\n",
287307
" }",
288308
"}",
289309
"script += ' > bam_g2.txt\\n'",
290-
"script += 'python /rmats/rmats.py --b1 bam_g1.txt --b2 bam_g2.txt --gtf ' + inputs.post_gtf.path + ' --readLength ' + inputs.post_readLength + ' --nthread ' + inputs.post_nthread + ' --od ' + inputs.post_out_dir + ' --tmp fd_rmats --task post ' + anchorLength_opt + ' ' + anchorLength_string + ' --tstat ' + inputs.post_tstat + ' ' + cstat_opt + ' ' + cstat_val + ' ' + statoff_opt + ' ' + paired_stats_opt + ' ' + darts_model_opt + ' ' + darts_cutoff_opt + ' ' + darts_cutoff_val + ' ' + novelSS_opt + ' ' + mil_opt + ' ' + mil_val + ' ' + mel_opt + ' ' + mel_val + ' ' + individual_counts_opt + '\\n'",
310+
"script += 'python /rmats/rmats.py --b1 bam_g1.txt ' + b2_opt + ' ' + b2_val + ' --gtf ' + inputs.post_gtf.path + ' --readLength ' + inputs.post_readLength + ' --nthread ' + inputs.post_nthread + ' --od ' + inputs.post_out_dir + ' --tmp fd_rmats --task post ' + anchorLength_opt + ' ' + anchorLength_string + ' --tstat ' + inputs.post_tstat + ' ' + cstat_opt + ' ' + cstat_val + ' ' + statoff_opt + ' ' + paired_stats_opt + ' ' + darts_model_opt + ' ' + darts_cutoff_opt + ' ' + darts_cutoff_val + ' ' + novelSS_opt + ' ' + mil_opt + ' ' + mil_val + ' ' + mel_opt + ' ' + mel_val + ' ' + individual_counts_opt + '\\n'",
291311
"script += 'tar czf ' + inputs.post_out_dir + '.tar.gz ' + inputs.post_out_dir + '\\n'",
292312
"return(script)}")
293313

@@ -300,7 +320,8 @@ post_resource_req <- requireResource(coresMin = "$(inputs.post_nthread)",
300320
outdirMin = "$(inputs.post_disk_space_gb * 1024)")
301321
post_out_tar_output <- OutputParam(id = "post_out_tar", type = "File",
302322
glob = "$(inputs.post_out_dir + '.tar.gz')")
303-
rmats_post <- cwlProcess(baseCommand = "bash script.sh",
323+
rmats_post <- cwlProcess(cwlVersion = cwl_version,
324+
baseCommand = "bash script.sh",
304325
requirements = list(post_docker_req, post_js_req, post_init_work_dir_req,
305326
post_resource_req),
306327
inputs = InputParamList(post_bam_name_g1_input, post_bam_name_g2_input,
@@ -360,7 +381,8 @@ exp_read_outcome_outcomes_js <- paste(sep = "\n",
360381
"}",
361382
"return({'exp_read_outcome_outcomes': outcomes})}")
362383
exp_read_outcome_outcomes_output <- OutputParam(id = "exp_read_outcome_outcomes", type = "File[]")
363-
exp_read_outcome <- cwlProcess(cwlClass = "ExpressionTool",
384+
exp_read_outcome <- cwlProcess(cwlVersion = cwl_version,
385+
cwlClass = "ExpressionTool",
364386
requirements = list(exp_read_outcome_js_req),
365387
inputs = InputParamList(exp_read_outcome_prep_g1_input,
366388
exp_read_outcome_prep_g2_input),
@@ -378,7 +400,7 @@ wf_out_tar_output <- OutputParam(id = "wf_out_tar", type = "File",
378400
outputSource = "step_post/post_out_tar")
379401
wf_scatter_req <- requireScatter()
380402
wf_step_input_exp_req <- requireStepInputExpression()
381-
workflow <- cwlWorkflow(cwlVersion = "v1.0",
403+
workflow <- cwlWorkflow(cwlVersion = cwl_version,
382404
requirements = list(wf_scatter_req, wf_step_input_exp_req),
383405
inputs = InputParamList(wf_bam_g1_input, wf_bam_g2_input, wf_gtf_input,
384406
wf_is_single_end_input, wf_readLength_input,

cwl/rMATS_bam_packed.cwl

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
class: Workflow
2-
cwlVersion: v1.0
2+
cwlVersion: v1.2
33
inputs:
44
wf_allow_clipping:
55
default: false
@@ -9,7 +9,8 @@ inputs:
99
wf_bam_g1:
1010
type: File[]
1111
wf_bam_g2:
12-
type: File[]
12+
default: []
13+
type: File[]?
1314
wf_cstat:
1415
default: '0.0001'
1516
type: string?
@@ -82,7 +83,7 @@ steps:
8283
- exp_file_to_loc_loc
8384
run:
8485
class: ExpressionTool
85-
cwlVersion: v1.0
86+
cwlVersion: v1.2
8687
expression: '${return({''exp_file_to_loc_loc'': inputs.exp_file_to_loc_file.location})}'
8788
inputs:
8889
exp_file_to_loc_file:
@@ -102,7 +103,7 @@ steps:
102103
- exp_file_to_loc_loc
103104
run:
104105
class: ExpressionTool
105-
cwlVersion: v1.0
106+
cwlVersion: v1.2
106107
expression: '${return({''exp_file_to_loc_loc'': inputs.exp_file_to_loc_file.location})}'
107108
inputs:
108109
exp_file_to_loc_file:
@@ -125,7 +126,7 @@ steps:
125126
- exp_bam_id_ids
126127
run:
127128
class: ExpressionTool
128-
cwlVersion: v1.0
129+
cwlVersion: v1.2
129130
expression: "${\nvar id_strings = new Array(inputs.exp_bam_id_bams.length)\nfor\
130131
\ (var i = 0; i < id_strings.length; i++) {\n id_strings[i] = inputs.exp_bam_id_prefix\
131132
\ + i\n}\nreturn({'exp_bam_id_ids': id_strings})}"
@@ -149,7 +150,7 @@ steps:
149150
- exp_bam_id_ids
150151
run:
151152
class: ExpressionTool
152-
cwlVersion: v1.0
153+
cwlVersion: v1.2
153154
expression: "${\nvar id_strings = new Array(inputs.exp_bam_id_bams.length)\nfor\
154155
\ (var i = 0; i < id_strings.length; i++) {\n id_strings[i] = inputs.exp_bam_id_prefix\
155156
\ + i\n}\nreturn({'exp_bam_id_ids': id_strings})}"
@@ -163,6 +164,7 @@ steps:
163164
type: string[]
164165
requirements:
165166
- class: InlineJavascriptRequirement
167+
when: $(inputs.exp_bam_id_bams.length > 0)
166168
- id: step_prep_g1
167169
in:
168170
prep_allow_clipping: wf_allow_clipping
@@ -187,7 +189,7 @@ steps:
187189
run:
188190
baseCommand: bash script.sh
189191
class: CommandLineTool
190-
cwlVersion: v1.0
192+
cwlVersion: v1.2
191193
inputs:
192194
prep_allow_clipping:
193195
type: boolean
@@ -319,7 +321,7 @@ steps:
319321
run:
320322
baseCommand: bash script.sh
321323
class: CommandLineTool
322-
cwlVersion: v1.0
324+
cwlVersion: v1.2
323325
inputs:
324326
prep_allow_clipping:
325327
type: boolean
@@ -455,7 +457,7 @@ steps:
455457
run:
456458
baseCommand: bash script.sh
457459
class: CommandLineTool
458-
cwlVersion: v1.0
460+
cwlVersion: v1.2
459461
inputs:
460462
post_anchorLength:
461463
type: int?
@@ -518,9 +520,11 @@ steps:
518520
- class: InlineJavascriptRequirement
519521
- class: InitialWorkDirRequirement
520522
listing:
521-
- entry: "${\nvar anchorLength_opt = inputs.post_anchorLength != null ? '--anchorLength'\
522-
\ : ''\nvar anchorLength_string = inputs.post_anchorLength != null ? inputs.post_anchorLength\
523-
\ : ''\nvar is_default_stats = (!inputs.post_paired_stats) && (!inputs.post_darts_model)\n\
523+
- entry: "${\nvar has_g2 = inputs.post_bam_name_g2.length > 0\nvar b2_opt =\
524+
\ has_g2 ? '--b2' : ''\nvar b2_val = has_g2 ? 'bam_g2.txt' : ''\nvar anchorLength_opt\
525+
\ = inputs.post_anchorLength != null ? '--anchorLength' : ''\nvar anchorLength_string\
526+
\ = inputs.post_anchorLength != null ? inputs.post_anchorLength : ''\nvar\
527+
\ is_default_stats = (!inputs.post_paired_stats) && (!inputs.post_darts_model)\n\
524528
var cstat_opt = is_default_stats ? '--cstat' : ''\nvar cstat_val = is_default_stats\
525529
\ ? inputs.post_cstat : ''\nvar statoff_opt = inputs.post_statoff ? '--statoff'\
526530
\ : ''\nvar paired_stats_opt = inputs.post_paired_stats ? '--paired-stats'\
@@ -549,15 +553,16 @@ steps:
549553
\ += 'echo '\nfor (var i = 0; i < inputs.post_bam_name_g2.length; i++) {\n\
550554
\ script += inputs.post_bam_name_g2[i]\n if (i != (inputs.post_bam_name_g2.length\
551555
\ - 1)) {\n script += ','\n }\n}\nscript += ' > bam_g2.txt\\n'\nscript\
552-
\ += 'python /rmats/rmats.py --b1 bam_g1.txt --b2 bam_g2.txt --gtf ' + inputs.post_gtf.path\
553-
\ + ' --readLength ' + inputs.post_readLength + ' --nthread ' + inputs.post_nthread\
554-
\ + ' --od ' + inputs.post_out_dir + ' --tmp fd_rmats --task post ' + anchorLength_opt\
555-
\ + ' ' + anchorLength_string + ' --tstat ' + inputs.post_tstat + ' ' +\
556-
\ cstat_opt + ' ' + cstat_val + ' ' + statoff_opt + ' ' + paired_stats_opt\
557-
\ + ' ' + darts_model_opt + ' ' + darts_cutoff_opt + ' ' + darts_cutoff_val\
558-
\ + ' ' + novelSS_opt + ' ' + mil_opt + ' ' + mil_val + ' ' + mel_opt +\
559-
\ ' ' + mel_val + ' ' + individual_counts_opt + '\\n'\nscript += 'tar czf\
560-
\ ' + inputs.post_out_dir + '.tar.gz ' + inputs.post_out_dir + '\\n'\nreturn(script)}"
556+
\ += 'python /rmats/rmats.py --b1 bam_g1.txt ' + b2_opt + ' ' + b2_val +\
557+
\ ' --gtf ' + inputs.post_gtf.path + ' --readLength ' + inputs.post_readLength\
558+
\ + ' --nthread ' + inputs.post_nthread + ' --od ' + inputs.post_out_dir\
559+
\ + ' --tmp fd_rmats --task post ' + anchorLength_opt + ' ' + anchorLength_string\
560+
\ + ' --tstat ' + inputs.post_tstat + ' ' + cstat_opt + ' ' + cstat_val\
561+
\ + ' ' + statoff_opt + ' ' + paired_stats_opt + ' ' + darts_model_opt +\
562+
\ ' ' + darts_cutoff_opt + ' ' + darts_cutoff_val + ' ' + novelSS_opt +\
563+
\ ' ' + mil_opt + ' ' + mil_val + ' ' + mel_opt + ' ' + mel_val + ' ' +\
564+
\ individual_counts_opt + '\\n'\nscript += 'tar czf ' + inputs.post_out_dir\
565+
\ + '.tar.gz ' + inputs.post_out_dir + '\\n'\nreturn(script)}"
561566
entryname: script.sh
562567
writable: false
563568
- class: ResourceRequirement
@@ -572,7 +577,7 @@ steps:
572577
- exp_read_outcome_outcomes
573578
run:
574579
class: ExpressionTool
575-
cwlVersion: v1.0
580+
cwlVersion: v1.2
576581
expression: "${\nvar outcomes = new Array()\nfor (var i = 0; i < inputs.exp_read_outcome_prep_g1.length;\
577582
\ i++) {\n outcomes.push(inputs.exp_read_outcome_prep_g1[i])\n}\nfor (var i\
578583
\ = 0; i < inputs.exp_read_outcome_prep_g2.length; i++) {\n outcomes.push(inputs.exp_read_outcome_prep_g2[i])\n\

nextflow/nextflow.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ params {
33

44
// input bam files
55
bam_g1 = null
6-
bam_g2 = null
6+
bam_g2 = []
77

88
// reference
99
gtf = null

0 commit comments

Comments
 (0)