Skip to content

Commit f1e1f69

Browse files
author
davidstew
committed
Add mix and match functionality for mutation calling (resolves #265)
1 parent 101565a commit f1e1f69

File tree

11 files changed

+79
-21
lines changed

11 files changed

+79
-21
lines changed

src/protect/mutation_annotation/snpeff.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ def run_snpeff(job, merged_mutation_file, univ_options, snpeff_options):
3939
:return: fsID for the snpeffed vcf
4040
:rtype: toil.fileStore.FileID
4141
"""
42+
if merged_mutation_file is None:
43+
return None
4244
work_dir = os.getcwd()
4345
input_files = {
4446
'merged_mutations.vcf': merged_mutation_file,
4547
'snpeff_index.tar.gz': snpeff_options['index']}
4648
input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
4749
input_files['snpeff_index'] = untargz(input_files['snpeff_index.tar.gz'], work_dir)
4850
input_files = {key: docker_path(path) for key, path in input_files.items()}
49-
5051
parameters = ['eff',
5152
'-dataDir', input_files['snpeff_index'],
5253
'-c', '/'.join([input_files['snpeff_index'],

src/protect/mutation_calling/common.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,28 @@ def run_mutation_aggregator(job, mutation_results, univ_options):
6363
"""
6464
# Setup an input data structure for the merge function
6565
out = {}
66-
for chrom in mutation_results['mutect'].keys():
67-
out[chrom] = job.addChildJobFn(merge_perchrom_mutations, chrom, mutation_results,
68-
univ_options).rv()
69-
merged_snvs = job.addFollowOnJobFn(merge_perchrom_vcfs, out, 'merged', univ_options)
70-
job.fileStore.logToMaster('Aggregated mutations for %s successfully' % univ_options['patient'])
71-
return merged_snvs.rv()
66+
chroms = {}
67+
for caller in mutation_results:
68+
if mutation_results[caller] is None:
69+
continue
70+
else:
71+
if caller == 'strelka':
72+
if mutation_results['strelka']['snvs'] is None:
73+
continue
74+
chroms = mutation_results['strelka']['snvs'].keys()
75+
else:
76+
chroms = mutation_results[caller].keys()
77+
break
78+
if chroms:
79+
for chrom in chroms:
80+
out[chrom] = job.addChildJobFn(merge_perchrom_mutations, chrom, mutation_results,
81+
univ_options).rv()
82+
merged_snvs = job.addFollowOnJobFn(merge_perchrom_vcfs, out, 'merged', univ_options)
83+
job.fileStore.logToMaster('Aggregated mutations for %s successfully' % univ_options['patient'])
84+
return merged_snvs.rv()
85+
else:
86+
return None
87+
7288

7389

7490
def merge_perchrom_mutations(job, chrom, mutations, univ_options):
@@ -105,25 +121,27 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options):
105121
# For now, let's just say 2 out of n need to call it.
106122
# num_preds = len(mutations)
107123
# majority = int((num_preds + 0.5) / 2)
108-
majority = {'snvs': 2,
109-
'indels': 1}
110-
111124
accepted_hits = defaultdict(dict)
112125

113126
for mut_type in vcf_processor.keys():
114127
# Get input files
115128
perchrom_mutations = {caller: vcf_processor[mut_type][caller](job, mutations[caller][chrom],
116129
work_dir, univ_options)
117-
for caller in vcf_processor[mut_type]}
130+
for caller in vcf_processor[mut_type]
131+
if mutations[caller] is not None}
118132
# Process the strelka key
119-
perchrom_mutations['strelka'] = perchrom_mutations['strelka_' + mut_type]
120-
perchrom_mutations.pop('strelka_' + mut_type)
133+
if 'strelka' + mut_type in perchrom_mutations:
134+
perchrom_mutations['strelka'] = perchrom_mutations['strelka_' + mut_type]
135+
perchrom_mutations.pop('strelka_' + mut_type)
136+
if not perchrom_mutations:
137+
continue
138+
majority = 1 if len(perchrom_mutations) <= 2 else len(perchrom_mutations) / 2
121139
# Read in each file to a dict
122140
vcf_lists = {caller: read_vcf(vcf_file) for caller, vcf_file in perchrom_mutations.items()}
123141
all_positions = list(set(itertools.chain(*vcf_lists.values())))
124142
for position in sorted(all_positions):
125143
hits = {caller: position in vcf_lists[caller] for caller in perchrom_mutations.keys()}
126-
if sum(hits.values()) >= majority[mut_type]:
144+
if sum(hits.values()) >= majority:
127145
callers = ','.join([caller for caller, hit in hits.items() if hit])
128146
assert position[1] not in accepted_hits[position[0]]
129147
accepted_hits[position[0]][position[1]] = (position[2], position[3], callers)

src/protect/mutation_calling/indel.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def run_indel_caller(job, tumor_bam, normal_bam, univ_options, indel_options):
2727
:return: fsID to the merged fusion calls
2828
:rtype: toil.fileStore.FileID
2929
"""
30-
job.fileStore.logToMaster('INDELs are currently unsupported.... Skipping.')
30+
job.fileStore.logToMaster('INDELs currently occur only through strelka.')
31+
if indel_options['run'] is False:
32+
return None
3133
indel_file = job.fileStore.getLocalTempFile()
3234
output_file = job.fileStore.writeGlobalFile(indel_file)
3335
job.fileStore.logToMaster('Ran INDEL on %s successfully' % univ_options['patient'])

src/protect/mutation_calling/muse.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ def run_muse(job, tumor_bam, normal_bam, univ_options, muse_options):
7878
+- 'chrM': fsID
7979
:rtype: dict
8080
"""
81+
82+
if muse_options['run'] is False:
83+
return None
8184
# Get a list of chromosomes to handle
8285
if muse_options['chromosomes']:
8386
chromosomes = muse_options['chromosomes']

src/protect/mutation_calling/mutect.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
7575
+- 'chrM': fsID
7676
:rtype: dict
7777
"""
78+
79+
if mutect_options['run'] is False:
80+
return None
7881
# Get a list of chromosomes to handle
7982
if mutect_options['chromosomes']:
8083
chromosomes = mutect_options['chromosomes']

src/protect/mutation_calling/radia.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options):
8787
+- 'chrM': fsID
8888
:rtype: dict
8989
"""
90+
if radia_options['run'] is False:
91+
return None
9092
if 'rna_genome' in rna_bam.keys():
9193
rna_bam = rna_bam['rna_genome']
9294
elif set(rna_bam.keys()) == {'rna_genome_sorted.bam', 'rna_genome_sorted.bam.bai'}:

src/protect/mutation_calling/somaticsniper.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ def run_somaticsniper(job, tumor_bam, normal_bam, univ_options, somaticsniper_op
8585
+- 'chrM': fsID
8686
:rtype: toil.fileStore.FileID|dict
8787
"""
88+
89+
if somaticsniper_options['run'] is False:
90+
return None
8891
# Get a list of chromosomes to handle
8992
if somaticsniper_options['chromosomes']:
9093
chromosomes = somaticsniper_options['chromosomes']

src/protect/mutation_calling/strelka.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ def run_strelka_with_merge(job, tumor_bam, normal_bam, univ_options, strelka_opt
4545
:param dict univ_options: Dict of universal options used by almost all tools
4646
:param dict strelka_options: Options specific to strelka
4747
:return: fsID to the merged strelka calls
48-
:rtype: toil.fileStore.FileID
48+
:rtype: dict(str, toil.fileStore.FileID)
49+
|-'snvs': fsID
50+
|
51+
+-'indels': fsID
4952
"""
5053
spawn = job.wrapJobFn(run_strelka, tumor_bam, normal_bam, univ_options,
5154
strelka_options, split=False).encapsulate()
@@ -63,8 +66,9 @@ def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split
6366
:param dict univ_options: Dict of universal options used by almost all tools
6467
:param dict strelka_options: Options specific to strelka
6568
:param bool split: Should the results be split into perchrom vcfs?
66-
:return: Either the fsID to the genome-level vcf or a dict of results from running strelka
67-
on every chromosome
69+
:return: Either the a dict of results from running strelka on every chromosome, or a dict of running strelka on the
70+
whole genome
71+
6872
perchrom_strelka:
6973
|- 'chr1':
7074
| |-'snvs': fsID
@@ -77,8 +81,16 @@ def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split
7781
+- 'chrM':
7882
|-'snvs': fsID
7983
+-'indels': fsID
80-
:rtype: toil.fileStore.FileID|dict
84+
85+
genome_strelka:
86+
|-'snvs': fsID
87+
|
88+
+-'indels': fsID
89+
90+
:rtype: dict
8191
"""
92+
if strelka_options['run'] is False:
93+
return {'snvs': None, 'indels': None}
8294
if strelka_options['chromosomes']:
8395
chromosomes = strelka_options['chromosomes']
8496
else:

src/protect/pipeline/ProTECT.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def _parse_config_file(job, config_file, max_cores=None):
353353
input_config = _add_default_entries(input_config, protect_defaults)
354354

355355
# Flags to check for presence of encryption keys if required
356-
gdc_inputs = ssec_encrypted = False
356+
gdc_inputs = ssec_encrypted = sample_without_variants = False
357357
for key in input_config.keys():
358358
if key == 'patients':
359359
# Ensure each patient contains the required entries
@@ -373,6 +373,8 @@ def _parse_config_file(job, config_file, max_cores=None):
373373
raise ParameterError('Cannot run ProTECT using GDC RNA bams. Please fix '
374374
'sample %s' % sample_name)
375375
gdc_inputs = True
376+
if 'mutation_vcf' not in sample_set[sample_name] and 'fusion_bedpe' not in sample_set[sample_name]:
377+
sample_without_variants = True
376378
else:
377379
# Ensure the required entries exist for this key
378380
_ensure_set_contains(input_config[key], required_keys[key], key)
@@ -427,6 +429,8 @@ def _parse_config_file(job, config_file, max_cores=None):
427429
'token.')
428430
# Get all the tool inputs
429431
job.fileStore.logToMaster('Obtaining tool inputs')
432+
if sample_without_variants and all(tool_options[k]['run'] is False for k in mutation_caller_list if k not in ('indexes', 'fusion_inspector')):
433+
raise RuntimeError("Cannot run mutation callers if all callers are set to run = False.")
430434
process_tool_inputs = job.addChildJobFn(get_all_tool_inputs, tool_options,
431435
mutation_caller_list=mutation_caller_list)
432436
job.fileStore.logToMaster('Obtained tool inputs')
@@ -670,7 +674,7 @@ def launch_protect(job, patient_data, univ_options, tool_options):
670674
bam_files['normal_dna'].rv(), univ_options,
671675
tool_options['strelka']).encapsulate(),
672676
'indels': job.wrapJobFn(run_indel_caller, bam_files['tumor_dna'].rv(),
673-
bam_files['normal_dna'].rv(), univ_options, 'indel_options',
677+
bam_files['normal_dna'].rv(), univ_options, {'run': False},
674678
disk='100M', memory='100M', cores=1)}
675679
for sample_type in 'tumor_dna', 'normal_dna':
676680
for caller in mutations:

src/protect/pipeline/defaults.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,22 @@ mutation_calling:
6060
mutect:
6161
java_Xmx: 2G
6262
version: 1.1.7
63+
run: True
6364
muse:
6465
version: 1.0rc_submission_b391201
66+
run: True
6567
radia:
6668
version: bcda721fc1f9c28d8b9224c2f95c440759cd3a03
69+
run: True
6770
somaticsniper:
71+
run: True
6872
version: 1.0.4
6973
samtools:
7074
version: 0.1.8
7175
bam_readcount:
7276
version: 0.7.4
7377
strelka:
78+
run: True
7479
version: 1.0.15
7580

7681
star_fusion:

0 commit comments

Comments
 (0)