Skip to content

Commit 7d52205

Browse files
committed
Add fusion filters for transgene (resolves #237)
Added all fusion parameters to the input config so users can control what fusion filters are applied.
1 parent 101565a commit 7d52205

File tree

4 files changed

+26
-1
lines changed

4 files changed

+26
-1
lines changed

MANUAL.md

+5
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,11 @@ be substituted with S3 links. Descriptions for creating all files can be found i
414414
gencode_transcript_fasta : /path/to/gencode_transcripts.faa -> The transcript file for the gencode gtf.
415415
gencode_annotation_gtf : /path/to/gencode_annotation.gtf -> The gencode genome annotation file.
416416
genome_fasta : /path/to/hg19.faa -> The gencode genome fasta file
417+
filter_mt_fusions: True -> Switch to filter mitochondrial gene pairs
418+
filter_ig_pairs: True -> Switch to filter immunoglobulin gene pairs
419+
filter_rna_gene_fusions: True -> Switch to filter rna-gene pairs
420+
filter_readthroughs: True -> Switch to filter readthroughs
421+
readthrough_threshold: 500000 -> Threshold below which pairs will be called readthroughs
417422
version: 2.2.2
418423

419424
haplotyping:

src/protect/mutation_translation.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ def run_transgene(job, snpeffed_file, rna_bam, univ_options, transgene_options,
9090
'--pep_lens', '9,10,15',
9191
'--cores', str(transgene_options['n']),
9292
'--genome', input_files['genome.fa'],
93-
'--annotation', input_files['annotation.gtf']]
93+
'--annotation', input_files['annotation.gtf'],
94+
'--log_file', '/data/transgene.log']
9495

9596
if snpeffed_file is not None:
9697
parameters.extend(['--snpeff', input_files['snpeffed_muts.vcf']])
@@ -110,6 +111,15 @@ def run_transgene(job, snpeffed_file, rna_bam, univ_options, transgene_options,
110111
fusion_files = {key: docker_path(path) for key, path in fusion_files.items()}
111112
parameters += ['--transcripts', fusion_files['transcripts.fa'],
112113
'--fusions', fusion_files['fusion_calls']]
114+
if transgene_options['filter_mt_fusions'] is True:
115+
parameters.append('--filter_mt')
116+
if transgene_options['filter_ig_pairs'] is True:
117+
parameters.append('--filter_ig')
118+
if transgene_options['filter_rg'] is True:
119+
parameters.append('--filter_rna_gene_fusions')
120+
if transgene_options['filter_readthroughs'] is True:
121+
parameters.append('--filter_rt')
122+
parameters.extend(['--rt_threshold', transgene_options['readthrough_threshold']])
113123

114124
docker_call(tool='transgene',
115125
tool_parameters=parameters,

src/protect/pipeline/defaults.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ mutation_annotation:
8888

8989
mutation_translation:
9090
transgene:
91+
filter_mt_fusions: True
92+
filter_ig_pairs: True
93+
filter_rna_gene_fusions: True
94+
filter_readthroughs: True
95+
readthrough_threshold: 500000
9196
version: 2.5.0
9297

9398
haplotyping:

src/protect/pipeline/input_parameters.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,11 @@ mutation_translation:
133133
gencode_peptide_fasta : S3://protect-data/hg38_references/gencode.v25.pc_translations_NOPARY.fa.tar.gz
134134
gencode_transcript_fasta : S3://protect-data/hg38_references/gencode.v25.pc_transcripts_NOPARY.fa.tar.gz
135135
gencode_annotation_gtf : S3://protect-data/hg38_references/gencode.v25.annotation_NOPARY.gtf.tar.gz
136+
filter_mt_fusions: True
137+
filter_ig_pairs: True
138+
filter_rna_gene_fusions: True
139+
filter_readthroughs: True
140+
readthrough_threshold: 500000
136141
genome_fasta : S3://protect-data/hg38_references/hg38.fa.tar.gz
137142
# version: 2.2.2
138143

0 commit comments

Comments
 (0)