Skip to content

Commit 466c800

Browse files
author
davidstew
committed
Add mix and match functionality for mutation calling (resolves #265)
1 parent 101565a commit 466c800

File tree

12 files changed

+87
-26
lines changed

12 files changed

+87
-26
lines changed

MANUAL.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,10 @@ be substituted with S3 links. Descriptions for creating all files can be found i
362362
java_Xmx: 5G -> The heap size to use for MuTect
363363
per job (i.e. per chromosome)
364364
version: 1.1.7
365+
run: True -> Switch to skip calling mutect
365366
muse:
366367
version: 1.0rc_submission_b391201
368+
run: True -> Switch to skip calling muse
367369
radia: -> Radia uses perchrom bed files in
368370
folders as references.
369371
cosmic_beds: /path/to/radia_cosmic.tar.gz
@@ -372,17 +374,20 @@ be substituted with S3 links. Descriptions for creating all files can be found i
372374
pseudogene_beds: /path/to/radia_pseudogenes.tar.gz
373375
gencode_beds: /path/to/radia_gencode.tar.gz
374376
version: bcda721fc1f9c28d8b9224c2f95c440759cd3a03
377+
run: True -> Switch to skip calling radia
375378
somaticsniper:
376379
version: 1.0.4
377380
samtools: -> pileup reads
378381
version: 0.1.8
379382
bam_readcount: -> obtain readcounts
380383
version: 0.7.4
384+
run: True -> Switch to skip calling somaticsniper
381385
strelka:
382386
version: 1.0.15
383387
config_file: /path/to/strelka_config.ini.tar.gz -> The Strelka config file for a
384388
bwa run (modified for a WXS run
385389
if necessary)
390+
run: True -> Switch to skip calling strelka
386391
star_fusion:
387392
run: True -> Switch to skip fusion calling
388393
version: 1.0.0

src/protect/mutation_annotation/snpeff.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ def run_snpeff(job, merged_mutation_file, univ_options, snpeff_options):
3939
:return: fsID for the snpeffed vcf
4040
:rtype: toil.fileStore.FileID
4141
"""
42+
if merged_mutation_file is None:
43+
return None
4244
work_dir = os.getcwd()
4345
input_files = {
4446
'merged_mutations.vcf': merged_mutation_file,
4547
'snpeff_index.tar.gz': snpeff_options['index']}
4648
input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
4749
input_files['snpeff_index'] = untargz(input_files['snpeff_index.tar.gz'], work_dir)
4850
input_files = {key: docker_path(path) for key, path in input_files.items()}
49-
5051
parameters = ['eff',
5152
'-dataDir', input_files['snpeff_index'],
5253
'-c', '/'.join([input_files['snpeff_index'],

src/protect/mutation_calling/common.py

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,30 @@ def run_mutation_aggregator(job, mutation_results, univ_options):
6363
"""
6464
# Setup an input data structure for the merge function
6565
out = {}
66-
for chrom in mutation_results['mutect'].keys():
67-
out[chrom] = job.addChildJobFn(merge_perchrom_mutations, chrom, mutation_results,
68-
univ_options).rv()
69-
merged_snvs = job.addFollowOnJobFn(merge_perchrom_vcfs, out, 'merged', univ_options)
70-
job.fileStore.logToMaster('Aggregated mutations for %s successfully' % univ_options['patient'])
71-
return merged_snvs.rv()
66+
chroms = {}
67+
# Extract the chromosomes from a mutation caller if at least one mutation caller is selected. All callers should
68+
# have the same chromosomes.
69+
for caller in mutation_results:
70+
if mutation_results[caller] is None:
71+
continue
72+
else:
73+
if caller == 'strelka':
74+
if mutation_results['strelka']['snvs'] is None:
75+
continue
76+
chroms = mutation_results['strelka']['snvs'].keys()
77+
else:
78+
chroms = mutation_results[caller].keys()
79+
break
80+
if chroms:
81+
for chrom in chroms:
82+
out[chrom] = job.addChildJobFn(merge_perchrom_mutations, chrom, mutation_results,
83+
univ_options).rv()
84+
merged_snvs = job.addFollowOnJobFn(merge_perchrom_vcfs, out, 'merged', univ_options)
85+
job.fileStore.logToMaster('Aggregated mutations for %s successfully' % univ_options['patient'])
86+
return merged_snvs.rv()
87+
else:
88+
return None
89+
7290

7391

7492
def merge_perchrom_mutations(job, chrom, mutations, univ_options):
@@ -100,30 +118,26 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options):
100118
'indels': {'strelka_indels': process_strelka_vcf
101119
}
102120
}
103-
# 'fusions': lambda x: None,
104-
# 'indels': lambda x: None}
105-
# For now, let's just say 2 out of n need to call it.
106-
# num_preds = len(mutations)
107-
# majority = int((num_preds + 0.5) / 2)
108-
majority = {'snvs': 2,
109-
'indels': 1}
110-
111121
accepted_hits = defaultdict(dict)
112-
113122
for mut_type in vcf_processor.keys():
114123
# Get input files
115124
perchrom_mutations = {caller: vcf_processor[mut_type][caller](job, mutations[caller][chrom],
116125
work_dir, univ_options)
117-
for caller in vcf_processor[mut_type]}
126+
for caller in vcf_processor[mut_type]
127+
if mutations[caller] is not None}
128+
if not perchrom_mutations:
129+
continue
118130
# Process the strelka key
119-
perchrom_mutations['strelka'] = perchrom_mutations['strelka_' + mut_type]
120-
perchrom_mutations.pop('strelka_' + mut_type)
131+
if 'strelka' + mut_type in perchrom_mutations:
132+
perchrom_mutations['strelka'] = perchrom_mutations['strelka_' + mut_type]
133+
perchrom_mutations.pop('strelka_' + mut_type)
134+
majority = 1 if len(perchrom_mutations) <= 2 else (len(perchrom_mutations) + 1) / 2
121135
# Read in each file to a dict
122136
vcf_lists = {caller: read_vcf(vcf_file) for caller, vcf_file in perchrom_mutations.items()}
123137
all_positions = list(set(itertools.chain(*vcf_lists.values())))
124138
for position in sorted(all_positions):
125139
hits = {caller: position in vcf_lists[caller] for caller in perchrom_mutations.keys()}
126-
if sum(hits.values()) >= majority[mut_type]:
140+
if sum(hits.values()) >= majority:
127141
callers = ','.join([caller for caller, hit in hits.items() if hit])
128142
assert position[1] not in accepted_hits[position[0]]
129143
accepted_hits[position[0]][position[1]] = (position[2], position[3], callers)

src/protect/mutation_calling/indel.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def run_indel_caller(job, tumor_bam, normal_bam, univ_options, indel_options):
2727
:return: fsID to the merged fusion calls
2828
:rtype: toil.fileStore.FileID
2929
"""
30-
job.fileStore.logToMaster('INDELs are currently unsupported.... Skipping.')
30+
job.fileStore.logToMaster('INDELs currently occur only through strelka.')
31+
if indel_options['run'] is False:
32+
return None
3133
indel_file = job.fileStore.getLocalTempFile()
3234
output_file = job.fileStore.writeGlobalFile(indel_file)
3335
job.fileStore.logToMaster('Ran INDEL on %s successfully' % univ_options['patient'])

src/protect/mutation_calling/muse.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ def run_muse(job, tumor_bam, normal_bam, univ_options, muse_options):
7878
+- 'chrM': fsID
7979
:rtype: dict
8080
"""
81+
if muse_options['run'] is False:
82+
return None
8183
# Get a list of chromosomes to handle
8284
if muse_options['chromosomes']:
8385
chromosomes = muse_options['chromosomes']

src/protect/mutation_calling/mutect.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ def run_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
7575
+- 'chrM': fsID
7676
:rtype: dict
7777
"""
78+
if mutect_options['run'] is False:
79+
return None
7880
# Get a list of chromosomes to handle
7981
if mutect_options['chromosomes']:
8082
chromosomes = mutect_options['chromosomes']

src/protect/mutation_calling/radia.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ def run_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options):
8787
+- 'chrM': fsID
8888
:rtype: dict
8989
"""
90+
if radia_options['run'] is False:
91+
return None
9092
if 'rna_genome' in rna_bam.keys():
9193
rna_bam = rna_bam['rna_genome']
9294
elif set(rna_bam.keys()) == {'rna_genome_sorted.bam', 'rna_genome_sorted.bam.bai'}:

src/protect/mutation_calling/somaticsniper.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ def run_somaticsniper(job, tumor_bam, normal_bam, univ_options, somaticsniper_op
8585
+- 'chrM': fsID
8686
:rtype: toil.fileStore.FileID|dict
8787
"""
88+
if somaticsniper_options['run'] is False:
89+
return None
8890
# Get a list of chromosomes to handle
8991
if somaticsniper_options['chromosomes']:
9092
chromosomes = somaticsniper_options['chromosomes']

src/protect/mutation_calling/strelka.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ def run_strelka_with_merge(job, tumor_bam, normal_bam, univ_options, strelka_opt
4545
:param dict univ_options: Dict of universal options used by almost all tools
4646
:param dict strelka_options: Options specific to strelka
4747
:return: fsID to the merged strelka calls
48-
:rtype: toil.fileStore.FileID
48+
:rtype: dict(str, toil.fileStore.FileID)
49+
|-'snvs': fsID
50+
|
51+
+-'indels': fsID
4952
"""
5053
spawn = job.wrapJobFn(run_strelka, tumor_bam, normal_bam, univ_options,
5154
strelka_options, split=False).encapsulate()
@@ -63,8 +66,9 @@ def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split
6366
:param dict univ_options: Dict of universal options used by almost all tools
6467
:param dict strelka_options: Options specific to strelka
6568
:param bool split: Should the results be split into perchrom vcfs?
66-
:return: Either the fsID to the genome-level vcf or a dict of results from running strelka
67-
on every chromosome
69+
:return: Either the a dict of results from running strelka on every chromosome, or a dict of running strelka on the
70+
whole genome
71+
6872
perchrom_strelka:
6973
|- 'chr1':
7074
| |-'snvs': fsID
@@ -77,8 +81,16 @@ def run_strelka(job, tumor_bam, normal_bam, univ_options, strelka_options, split
7781
+- 'chrM':
7882
|-'snvs': fsID
7983
+-'indels': fsID
80-
:rtype: toil.fileStore.FileID|dict
84+
85+
genome_strelka:
86+
|-'snvs': fsID
87+
|
88+
+-'indels': fsID
89+
90+
:rtype: dict
8191
"""
92+
if strelka_options['run'] is False:
93+
return {'snvs': None, 'indels': None}
8294
if strelka_options['chromosomes']:
8395
chromosomes = strelka_options['chromosomes']
8496
else:

src/protect/pipeline/ProTECT.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,8 @@ def _parse_config_file(job, config_file, max_cores=None):
354354

355355
# Flags to check for presence of encryption keys if required
356356
gdc_inputs = ssec_encrypted = False
357+
# Flag to check if a sample without an input vcf/bedpe was provided
358+
sample_without_variants = False
357359
for key in input_config.keys():
358360
if key == 'patients':
359361
# Ensure each patient contains the required entries
@@ -373,6 +375,9 @@ def _parse_config_file(job, config_file, max_cores=None):
373375
raise ParameterError('Cannot run ProTECT using GDC RNA bams. Please fix '
374376
'sample %s' % sample_name)
375377
gdc_inputs = True
378+
if ('mutation_vcf' not in sample_set[sample_name]
379+
and 'fusion_bedpe' not in sample_set[sample_name]):
380+
sample_without_variants = True
376381
else:
377382
# Ensure the required entries exist for this key
378383
_ensure_set_contains(input_config[key], required_keys[key], key)
@@ -427,6 +432,10 @@ def _parse_config_file(job, config_file, max_cores=None):
427432
'token.')
428433
# Get all the tool inputs
429434
job.fileStore.logToMaster('Obtaining tool inputs')
435+
if (sample_without_variants and all(tool_options[k]['run'] is False
436+
for k in mutation_caller_list
437+
if k not in ('indexes', 'fusion_inspector'))):
438+
raise RuntimeError("Cannot run mutation callers if all callers are set to run = False.")
430439
process_tool_inputs = job.addChildJobFn(get_all_tool_inputs, tool_options,
431440
mutation_caller_list=mutation_caller_list)
432441
job.fileStore.logToMaster('Obtained tool inputs')
@@ -670,7 +679,7 @@ def launch_protect(job, patient_data, univ_options, tool_options):
670679
bam_files['normal_dna'].rv(), univ_options,
671680
tool_options['strelka']).encapsulate(),
672681
'indels': job.wrapJobFn(run_indel_caller, bam_files['tumor_dna'].rv(),
673-
bam_files['normal_dna'].rv(), univ_options, 'indel_options',
682+
bam_files['normal_dna'].rv(), univ_options, {'run': False},
674683
disk='100M', memory='100M', cores=1)}
675684
for sample_type in 'tumor_dna', 'normal_dna':
676685
for caller in mutations:

0 commit comments

Comments
 (0)