@@ -63,12 +63,30 @@ def run_mutation_aggregator(job, mutation_results, univ_options):
6363 """
6464 # Setup an input data structure for the merge function
6565 out = {}
66- for chrom in mutation_results ['mutect' ].keys ():
67- out [chrom ] = job .addChildJobFn (merge_perchrom_mutations , chrom , mutation_results ,
68- univ_options ).rv ()
69- merged_snvs = job .addFollowOnJobFn (merge_perchrom_vcfs , out , 'merged' , univ_options )
70- job .fileStore .logToMaster ('Aggregated mutations for %s successfully' % univ_options ['patient' ])
71- return merged_snvs .rv ()
66+ chroms = {}
67+ # Extract the chromosomes from a mutation caller if at least one mutation caller is selected. All callers should
68+ # have the same chromosomes.
69+ for caller in mutation_results :
70+ if mutation_results [caller ] is None :
71+ continue
72+ else :
73+ if caller == 'strelka' :
74+ if mutation_results ['strelka' ]['snvs' ] is None :
75+ continue
76+ chroms = mutation_results ['strelka' ]['snvs' ].keys ()
77+ else :
78+ chroms = mutation_results [caller ].keys ()
79+ break
80+ if chroms :
81+ for chrom in chroms :
82+ out [chrom ] = job .addChildJobFn (merge_perchrom_mutations , chrom , mutation_results ,
83+ univ_options ).rv ()
84+ merged_snvs = job .addFollowOnJobFn (merge_perchrom_vcfs , out , 'merged' , univ_options )
85+ job .fileStore .logToMaster ('Aggregated mutations for %s successfully' % univ_options ['patient' ])
86+ return merged_snvs .rv ()
87+ else :
88+ return None
89+
7290
7391
7492def merge_perchrom_mutations (job , chrom , mutations , univ_options ):
@@ -100,30 +118,26 @@ def merge_perchrom_mutations(job, chrom, mutations, univ_options):
100118 'indels' : {'strelka_indels' : process_strelka_vcf
101119 }
102120 }
103- # 'fusions': lambda x: None,
104- # 'indels': lambda x: None}
105- # For now, let's just say 2 out of n need to call it.
106- # num_preds = len(mutations)
107- # majority = int((num_preds + 0.5) / 2)
108- majority = {'snvs' : 2 ,
109- 'indels' : 1 }
110-
111121 accepted_hits = defaultdict (dict )
112-
113122 for mut_type in vcf_processor .keys ():
114123 # Get input files
115124 perchrom_mutations = {caller : vcf_processor [mut_type ][caller ](job , mutations [caller ][chrom ],
116125 work_dir , univ_options )
117- for caller in vcf_processor [mut_type ]}
126+ for caller in vcf_processor [mut_type ]
127+ if mutations [caller ] is not None }
128+ if not perchrom_mutations :
129+ continue
118130 # Process the strelka key
119- perchrom_mutations ['strelka' ] = perchrom_mutations ['strelka_' + mut_type ]
120- perchrom_mutations .pop ('strelka_' + mut_type )
131+ if 'strelka' + mut_type in perchrom_mutations :
132+ perchrom_mutations ['strelka' ] = perchrom_mutations ['strelka_' + mut_type ]
133+ perchrom_mutations .pop ('strelka_' + mut_type )
134+ majority = 1 if len (perchrom_mutations ) <= 2 else (len (perchrom_mutations ) + 1 ) / 2
121135 # Read in each file to a dict
122136 vcf_lists = {caller : read_vcf (vcf_file ) for caller , vcf_file in perchrom_mutations .items ()}
123137 all_positions = list (set (itertools .chain (* vcf_lists .values ())))
124138 for position in sorted (all_positions ):
125139 hits = {caller : position in vcf_lists [caller ] for caller in perchrom_mutations .keys ()}
126- if sum (hits .values ()) >= majority [ mut_type ] :
140+ if sum (hits .values ()) >= majority :
127141 callers = ',' .join ([caller for caller , hit in hits .items () if hit ])
128142 assert position [1 ] not in accepted_hits [position [0 ]]
129143 accepted_hits [position [0 ]][position [1 ]] = (position [2 ], position [3 ], callers )
0 commit comments