1
1
2
-
2
+ configfile : "config.json"
3
3
# want to get this from the command line, or a directory
4
4
5
5
@@ -17,23 +17,21 @@ BASE = "small_sample"
17
17
18
18
# wildcars
19
19
VARTYPES = ["missense" , "LOF" , "indels" ]
20
-
20
+ ALL_THREADS = [num for num in range (config ["num_threads" ])]
21
+ NUM_THREADS = max (ALL_THREADS ) + 1
21
22
22
23
# final output is the input
23
24
# for glob_wildcard, will likely need an expand here
25
+
24
26
rule all :
25
27
input :
26
- MAIN_DIR + "intermediates/annovar/" + BASE + ".full.avinput" ,
27
- MAIN_DIR + "intermediates/annovar/" + BASE + ".exonic_variant_function" ,
28
- expand (MAIN_DIR + "intermediates/splits/" + BASE + ".{vartype}_0.exonic_variant_function" , vartype = VARTYPES ),
29
- expand (MAIN_DIR + "intermediates/faa/" + BASE + ".{vartype}_0.faa" , vartype = VARTYPES ),
30
- MAIN_DIR + "intermediates/scores/" + BASE + ".missense_0.csv" ,
31
- MAIN_DIR + "intermediates/scores/" + BASE + ".LOF_0_output.txt" ,
32
- MAIN_DIR + "intermediates/scores/" + BASE + ".indels_0_output.txt" ,
33
- MAIN_DIR + "data/" + BASE + ".vcf.tmp"
28
+ expand (MAIN_DIR + "intermediates/faa/" + BASE + ".{vartype}_{num_threads}.faa" , vartype = VARTYPES , num_threads = ALL_THREADS ),
29
+ expand (MAIN_DIR + "intermediates/scores/" + BASE + ".{vartype}_{num_threads}_output.txt" , vartype = VARTYPES , num_threads = ALL_THREADS ),
30
+ MAIN_DIR + "data/" + BASE + ".annotated.vcf" ,
31
+ MAIN_DIR + "data/" + BASE + ".scored.vcf"
34
32
35
33
36
- ruleorder : annovar_convert > annovar_annotate > splitter > coding_change > MutPred2 > MutPred_LOF > MutPred_indel > merge
34
+ ruleorder : annovar_convert > annovar_annotate > splitter > coding_change > MutPred2 > MutPred_LOF > MutPred_indel > Merge
37
35
38
36
# first run annovar - there are two steps
39
37
rule annovar_convert :
@@ -67,61 +65,71 @@ rule splitter:
67
65
input :
68
66
rules .annovar_annotate .output .var_fxn
69
67
output :
70
- splits = MAIN_DIR + "intermediates/splits/" + BASE + ".{vartype}_0.exonic_variant_function"
68
+ expand (MAIN_DIR + "intermediates/splits/" + BASE + ".{vartype}_{num_threads}.exonic_variant_function" , vartype = VARTYPES , num_threads = ALL_THREADS )
69
+ threads :
70
+ NUM_THREADS
71
71
shell :
72
- "{params.cmd} --target {input} --output {params.output_folder}"
72
+ "{params.cmd} -threads " + str ( NUM_THREADS ) + " - -target {input} --output {params.output_folder}"
73
73
74
74
rule coding_change :
75
- params :
76
- cmd = "tools/annovar/coding_change.pl" ,
77
- ops = "-includesnp" ,
78
- refGeneMrna = "tools/annovar/humandb/hg19_refGeneMrna.fa" ,
79
- refGene = "tools/annovar/humandb/hg19_refGene.txt"
80
- input :
81
- rules .splitter .output .splits
82
- output :
83
- faa_file = MAIN_DIR + "intermediates/faa/" + BASE + ".{vartype}_0.faa"
84
- shell :
85
- "{params.cmd} {params.ops} {input} {params.refGene} {params.refGeneMrna} > {output}"
75
+ params :
76
+ cmd = "perl tools/annovar/coding_change.pl" ,
77
+ ops = "-includesnp" ,
78
+ refGeneMrna = "tools/annovar/humandb/hg19_refGeneMrna.fa" ,
79
+ refGene = "tools/annovar/humandb/hg19_refGene.txt"
80
+ input :
81
+ MAIN_DIR + "intermediates/splits/" + BASE + ".{vartype}_{num_threads}.exonic_variant_function"
82
+ output :
83
+ MAIN_DIR + "intermediates/faa/" + BASE + ".{vartype}_{num_threads}.faa"
84
+ threads :
85
+ 1
86
+ shell :
87
+ "{params.cmd} {input} {params.refGene} {params.refGeneMrna} {params.ops} > {output}"
86
88
87
89
88
90
rule MutPred2 :
89
91
input :
90
- MAIN_DIR + "intermediates/faa/" + BASE + ".missense_0 .faa"
92
+ MAIN_DIR + "intermediates/faa/" + BASE + ".missense_{num_threads} .faa"
91
93
output :
92
- MAIN_DIR + "intermediates/scores/" + BASE + ".missense_0.csv"
94
+ MP2 = MAIN_DIR + "intermediates/scores/" + BASE + ".missense_{num_threads}_output.txt"
95
+ threads :
96
+ 2
93
97
shell :
94
- "tools/mutpred2.0/run_mutpred2.sh -i {input} -p 1 -c 1 -b 0 -t 0.05 -f 2 -o {output}"
98
+ "cd tools/mutpred2.0 && . /run_mutpred2.sh -i {input} -p 1 -c 1 -b 0 -t 0.05 -f 2 -o {output}"
95
99
96
100
97
101
rule MutPred_LOF :
98
102
params :
99
- outfile_prefix = MAIN_DIR + "intermediates/scores/" + BASE + ".LOF_0 "
103
+ outfile_prefix = MAIN_DIR + "intermediates/scores/" + BASE + ".LOF_{num_threads} "
100
104
input :
101
- MAIN_DIR + "intermediates/faa/" + BASE + ".LOF_0 .faa"
105
+ MAIN_DIR + "intermediates/faa/" + BASE + ".LOF_{num_threads} .faa"
102
106
output :
103
- MAIN_DIR + "intermediates/scores/" + BASE + ".LOF_0_output.txt"
107
+ MPL = MAIN_DIR + "intermediates/scores/" + BASE + ".LOF_{num_threads}_output.txt"
108
+ threads :
109
+ 6
104
110
shell :
105
111
"cd tools/MutPredLOF && ./run_MutPredLOF.sh v91/ {input} {params.outfile_prefix}"
106
112
107
113
rule MutPred_indel :
108
114
params :
109
- outfile_prefix = MAIN_DIR + "intermediates/scores/" + BASE + ".indels_0 "
115
+ outfile_prefix = MAIN_DIR + "intermediates/scores/" + BASE + ".indels_{num_threads} "
110
116
input :
111
- MAIN_DIR + "intermediates/faa/" + BASE + ".indels_0 .faa"
117
+ MAIN_DIR + "intermediates/faa/" + BASE + ".indels_{num_threads} .faa"
112
118
output :
113
- MAIN_DIR + "intermediates/scores/" + BASE + ".indels_0_output.txt"
119
+ MPI = MAIN_DIR + "intermediates/scores/" + BASE + ".indels_{num_threads}_output.txt"
120
+ threads :
121
+ 6
114
122
shell :
115
123
"cd tools/MutPredIndel_compiled && ./run_MutPredIndel.sh v91/ {input} {params.outfile_prefix}"
116
124
117
-
118
- rule merge :
125
+ rule Merge :
119
126
input :
120
- MAIN_DIR + "intermediates/scores/" + BASE + ".LOF_0_output.txt" ,
121
- MAIN_DIR + "intermediates/scores/" + BASE + ".indels_0_output.txt" ,
122
- MAIN_DIR + "intermediates/scores/" + BASE + ".missense_0.csv"
127
+ expand (MAIN_DIR + "intermediates/scores/" + BASE + ".{vartype}_{num_threads}_output.txt" , vartype = VARTYPES , num_threads = ALL_THREADS )
123
128
output :
124
- MAIN_DIR + "data/" + BASE + ".vcf.tmp"
129
+ MAIN_DIR + "data/" + BASE + ".annotated.vcf" ,
130
+ MAIN_DIR + "data/" + BASE + ".scored.vcf"
131
+ threads :
132
+ NUM_THREADS
125
133
shell :
126
- "python mutpred_merge.py --base " + BASE
134
+ "python mutpred_merge.py --vcf " + VCFFILE
127
135
0 commit comments