Skip to content

Commit b018a59

Browse files
author
davidstew
committed
Make majority a user provided option (resolves #286)
resolves #286 Added snv_majority and indel_majority in a consensus tab under mutation_calling in input_parameters.yaml. If values are not provided, the majority will be dynamically figured out.
1 parent 1945252 commit b018a59

6 files changed

Lines changed: 46 additions & 11 deletions

File tree

MANUAL.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,11 @@ be substituted with S3 links. Descriptions for creating all files can be found i
335335
version: 1.2.0
336336

337337
mutation_calling:
338+
consensus:
339+
indel_majority: -> Number of callers required to accept an indel.
340+
Will dynamically compute the indel majority by default.
341+
snv_majority: -> Number of callers required to accept an snv.
342+
Will dynamically compute the snv majority by default.
338343
indexes:
339344
chromsosomes: canonical, canonical_chr, chr1, Y -> A list of chromosomes to process.
340345
This options overrides the

src/protect/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ def email_report(job, univ_options):
638638
server = smtplib.SMTP('localhost')
639639
except socket.error as e:
640640
if e.errno == 111:
641-
print('No mail utils on this maachine')
641+
print('No mail utils on this machine')
642642
else:
643643
print('Unexpected error while attempting to send an email.')
644644
print('Could not send email report')

src/protect/mutation_calling/common.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,14 @@ def chromosomes_from_fai(genome_fai):
5151
return chromosomes
5252

5353

54-
def run_mutation_aggregator(job, mutation_results, univ_options):
54+
def run_mutation_aggregator(job, mutation_results, univ_options, consensus_options):
5555
"""
5656
Aggregate all the called mutations.
5757
5858
:param dict mutation_results: Dict of dicts of the various mutation callers in a per chromosome
5959
format
6060
:param dict univ_options: Dict of universal options used by almost all tools
61+
:param dict consensus_options: options specific for consensus mutation calling
6162
:returns: fsID for the merged mutations file
6263
:rtype: toil.fileStore.FileID
6364
"""
@@ -80,7 +81,7 @@ def run_mutation_aggregator(job, mutation_results, univ_options):
8081
if chroms:
8182
for chrom in chroms:
8283
out[chrom] = job.addChildJobFn(merge_perchrom_mutations, chrom, mutation_results,
83-
univ_options).rv()
84+
univ_options, consensus_options).rv()
8485
merged_snvs = job.addFollowOnJobFn(merge_perchrom_vcfs, out, 'merged', univ_options)
8586
job.fileStore.logToMaster('Aggregated mutations for %s successfully' % univ_options['patient'])
8687
return merged_snvs.rv()
@@ -89,14 +90,15 @@ def run_mutation_aggregator(job, mutation_results, univ_options):
8990

9091

9192

92-
def merge_perchrom_mutations(job, chrom, mutations, univ_options):
93+
def merge_perchrom_mutations(job, chrom, mutations, univ_options, consensus_options):
9394
"""
9495
Merge the mutation calls for a single chromosome.
9596
9697
:param str chrom: Chromosome to process
9798
:param dict mutations: dict of dicts of the various mutation caller names as keys, and a dict of
9899
per chromosome job store ids for vcfs as value
99100
:param dict univ_options: Dict of universal options used by almost all tools
101+
:param dict consensus_options: options specific for consensus mutation calling
100102
:returns fsID for vcf contaning merged calls for the given chromosome
101103
:rtype: toil.fileStore.FileID
102104
"""
@@ -109,13 +111,13 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options):
109111
mutations.pop('indels')
110112
mutations['strelka_indels'] = mutations['strelka']['indels']
111113
mutations['strelka_snvs'] = mutations['strelka']['snvs']
112-
vcf_processor = {'snvs': {'mutect': process_mutect_vcf,
114+
vcf_processor = {'snv': {'mutect': process_mutect_vcf,
113115
'muse': process_muse_vcf,
114116
'radia': process_radia_vcf,
115117
'somaticsniper': process_somaticsniper_vcf,
116118
'strelka_snvs': process_strelka_vcf
117119
},
118-
'indels': {'strelka_indels': process_strelka_vcf
120+
'indel': {'strelka_indels': process_strelka_vcf
119121
}
120122
}
121123
accepted_hits = defaultdict(dict)
@@ -131,7 +133,12 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options):
131133
if 'strelka_' + mut_type in perchrom_mutations:
132134
perchrom_mutations['strelka'] = perchrom_mutations['strelka_' + mut_type]
133135
perchrom_mutations.pop('strelka_' + mut_type)
134-
majority = 1 if len(perchrom_mutations) <= 2 else (len(perchrom_mutations) + 1) / 2
136+
if consensus_options[mut_type + '_majority'] is not None:
137+
majority = consensus_options[mut_type + '_majority']
138+
elif len(perchrom_mutations) <= 2:
139+
majority = 1
140+
else:
141+
majority = (len(perchrom_mutations) + 1) / 2
135142
# Read in each file to a dict
136143
vcf_lists = {caller: read_vcf(vcf_file) for caller, vcf_file in perchrom_mutations.items()}
137144
all_positions = list(set(itertools.chain(*vcf_lists.values())))
@@ -171,7 +178,7 @@ def read_vcf(vcf_file):
171178
if line.startswith('#'):
172179
continue
173180
line = line.strip().split()
174-
vcf_dict.append((line[0], line[1], line[3], line[4]))
181+
vcf_dict.append((line[0], line[1], line[3].upper(), line[4].upper()))
175182
return vcf_dict
176183

177184

src/protect/pipeline/ProTECT.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,8 +434,25 @@ def _parse_config_file(job, config_file, max_cores=None):
434434
job.fileStore.logToMaster('Obtaining tool inputs')
435435
if (sample_without_variants and all(tool_options[k]['run'] is False
436436
for k in mutation_caller_list
437-
if k not in ('indexes', 'fusion_inspector'))):
437+
if k not in ('indexes', 'fusion_inspector', 'consensus'))):
438438
raise RuntimeError("Cannot run mutation callers if all callers are set to run = False.")
439+
440+
for mut_type in ('snv', 'indel'):
441+
if tool_options['consensus'][mut_type + '_majority'] is not None:
442+
if not isinstance(tool_options['consensus'][mut_type + '_majority'], int):
443+
raise RuntimeError('Majorities have to be integers. Got %s for %s_majority.' %
444+
(tool_options['consensus'][mut_type + '_majority'], mut_type))
445+
if mut_type == 'snv':
446+
count = sum(tool_options[k]['run'] is True
447+
for k in ('muse', 'mutect', 'somaticsniper', 'radia', 'strelka'))
448+
else:
449+
count = 1 if tool_options['strelka']['run'] is True else 0
450+
if tool_options['consensus'][mut_type + '_majority'] > count:
451+
raise RuntimeError('Majority cannot be greater than the number of callers. '
452+
'Got number of %s callers = %s majority = %s.' %
453+
(mut_type, count,
454+
tool_options['consensus'][mut_type + '_majority']))
455+
439456
process_tool_inputs = job.addChildJobFn(get_all_tool_inputs, tool_options,
440457
mutation_caller_list=mutation_caller_list)
441458
job.fileStore.logToMaster('Obtained tool inputs')
@@ -687,7 +704,7 @@ def launch_protect(job, patient_data, univ_options, tool_options):
687704
bam_files['tumor_rna'].addChild(mutations['radia'])
688705
get_mutations = job.wrapJobFn(run_mutation_aggregator,
689706
{caller: cjob.rv() for caller, cjob in mutations.items()},
690-
univ_options, disk='100M', memory='100M',
707+
univ_options, tool_options['consensus'], disk='100M', memory='100M',
691708
cores=1).encapsulate()
692709
for caller in mutations:
693710
mutations[caller].addChild(get_mutations)
@@ -777,7 +794,7 @@ def get_all_tool_inputs(job, tools, outer_key='', mutation_caller_list=None):
777794
indexes = tools.pop('indexes')
778795
indexes['chromosomes'] = parse_chromosome_string(job, indexes['chromosomes'])
779796
for mutation_caller in mutation_caller_list:
780-
if mutation_caller == 'indexes':
797+
if mutation_caller in ('indexes', 'consensus'):
781798
continue
782799
tools[mutation_caller].update(indexes)
783800
return tools

src/protect/pipeline/defaults.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ expression_estimation:
5555
version: 1.2.20
5656

5757
mutation_calling:
58+
consensus:
59+
indel_majority:
60+
snv_majority:
5861
indexes:
5962
chromosomes:
6063
mutect:

src/protect/pipeline/input_parameters.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ expression_estimation:
8383
# version: 1.2.0
8484

8585
mutation_calling:
86+
consensus:
87+
indel_majority: 1
88+
snv_majority: 2
8689
indexes:
8790
chromosomes: canonical_chr, chrM
8891
genome_fasta: S3://protect-data/hg38_references/hg38.fa.tar.gz

0 commit comments

Comments
 (0)