Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .test/config-chm-eval/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ mutational_signatures:
events:
- some_id

hla_typing:
activate: false
loci:
- "A"
- "B"
- "C"
- "DQB1"

calling:
delly:
activate: true
Expand Down
8 changes: 8 additions & 0 deletions .test/config-giab/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ mutational_signatures:
events:
- some_id

hla_typing:
activate: false
loci:
- "A"
- "B"
- "C"
- "DQB1"

# printing of variants in a matrix, sorted by recurrence
report:
# if stratificatio is deactivated, one oncoprint for all
Expand Down
8 changes: 8 additions & 0 deletions .test/config-no-candidate-filtering/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ mutational_signatures:
events:
- some_id

hla_typing:
activate: false
loci:
- "A"
- "B"
- "C"
- "DQB1"

calling:
delly:
activate: true
Expand Down
8 changes: 8 additions & 0 deletions .test/config-simple/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ mutational_signatures:
events:
- some_id

hla_typing:
activate: false
loci:
- "A"
- "B"
- "C"
- "DQB1"

calling:
delly:
activate: true
Expand Down
8 changes: 8 additions & 0 deletions .test/config-sra/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ mutational_signatures:
events:
- some_id

hla_typing:
activate: false
loci:
- "A"
- "B"
- "C"
- "DQB1"

calling:
delly:
activate: true
Expand Down
8 changes: 8 additions & 0 deletions .test/config-target-regions/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ mutational_signatures:
events:
- some_id

hla_typing:
activate: false
loci:
- "A"
- "B"
- "C"
- "DQB1"

calling:
delly:
activate: true
Expand Down
8 changes: 8 additions & 0 deletions .test/config-target-regions/config_multiple_beds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ mutational_signatures:
events:
- some_id

hla_typing:
activate: false
loci:
- "A"
- "B"
- "C"
- "DQB1"

calling:
delly:
activate: true
Expand Down
8 changes: 8 additions & 0 deletions .test/config_primers/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ mutational_signatures:
events:
- some_id

hla_typing:
activate: false
loci:
- "A"
- "B"
- "C"
- "DQB1"

calling:
delly:
activate: true
Expand Down
9 changes: 9 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ mutational_signatures:
# select events (callsets, defined under calling/fdr-control/events) to evaluate
- some_id


hla_typing:
activate: false
loci:
- "A"
- "B"
- "C"
- "DQB1"

# Sets the minimum average coverage for each gene.
# Genes with lower average coverage will not be concidered in gene coverage datavzrd report
# If not present min_avg_coverage will be set to 0 rendering all genes.
Expand Down
1 change: 1 addition & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ include: "rules/fusion_calling.smk"
include: "rules/testcase.smk"
include: "rules/population.smk"
include: "rules/mutational_signatures.smk"
include: "rules/hla_typing.smk"


batches = "all"
Expand Down
5 changes: 5 additions & 0 deletions workflow/envs/orthanq.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- orthanq =1.15.0
60 changes: 53 additions & 7 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ delly_excluded_regions = {
("homo_sapiens", "GRCh37"): "human.hg19",
}

# hla typing
hla_loci = config.get("hla_typing", {}).get("loci", [])
orthanq_callers = [f"orthanq-{locus}" for locus in hla_loci]


def _group_or_sample(row):
group = row.get("group", None)
Expand Down Expand Up @@ -163,6 +167,18 @@ def get_final_output(wildcards):
)
)

if is_activated("hla_typing"):
loci = [f"orthanq-{locus}" for locus in config["hla_typing"].get("loci", [])]
for group in samples["group"].unique():
final_output.extend(
expand(
"results/hla-typing/{group}-{locus}/{alias}/",
alias=get_group_aliases(group),
group=group,
locus=loci,
)
)

for calling_type in calling_types:
if config["report"]["activate"]:
for event in get_calling_events(calling_type):
Expand Down Expand Up @@ -702,7 +718,20 @@ def get_mutational_signature_targets():

def get_scattered_calls(ext="bcf"):
def inner(wildcards):
caller = "arriba" if wildcards.calling_type == "fusions" else variant_caller
match wildcards.calling_type:
case "fusions":
caller = "arriba"
case "variants":
caller = variant_caller
case "hla-variants":
caller = [
f"orthanq-{locus}" for locus in config["hla_typing"].get("loci", [])
]
case _:
raise ValueError(
f"Unexpected wildcard value for 'calling_type': {wildcards.calling_type}"
)

return expand(
"results/calls/{{group}}.{caller}.{{scatteritem}}.{ext}",
caller=caller,
Expand Down Expand Up @@ -751,12 +780,25 @@ def get_gather_annotated_calls_input(ext="bcf"):
return inner


# def get_scatter_candidates_input(wildcards):
# if wildcards.caller == "orthanq":
# return "results/candidate-calls/{caller}.{scatteritem}.bcf"
# elif config.get("target_regions"):
# return "results/candidate-calls/{group}.{caller}.filtered.bcf"
# else:
# return get_fixed_candidate_calls("bcf")


def get_candidate_calls(wc):
filter = config["calling"]["filter"].get("candidates")
# only use orthanq path if caller is one of the orthanq loci
if is_activated("hla_typing") and wc.caller.startswith("orthanq-"):
return "results/orthanq-candidate-calls/{caller}.hla-variants.{scatteritem}.bcf"

if filter and wc.caller != "arriba":
return "results/candidate-calls/{group}.{caller}.{scatteritem}.filtered.bcf"
else:
return "results/candidate-calls/{group}.{caller}.{scatteritem}.bcf"

return "results/candidate-calls/{group}.{caller}.{scatteritem}.bcf"


def _get_report_batch(calling_type, batch):
Expand Down Expand Up @@ -950,7 +992,9 @@ def get_candidate_filter_aux_files():
return []
else:
# aux files are considered as part of the config, hence local
return [local(path) for name, path in get_filter_aux_entries("candidates").items()]
return [
local(path) for name, path in get_filter_aux_entries("candidates").items()
]


def get_candidate_filter_aux(wildcards, input):
Expand All @@ -959,7 +1003,9 @@ def get_candidate_filter_aux(wildcards, input):
else:
return [
f"--aux {name}={path}"
for name, path in zip(get_filter_aux_entries("candidates").keys(), input.aux)
for name, path in zip(
get_filter_aux_entries("candidates").keys(), input.aux
)
]


Expand All @@ -979,7 +1025,7 @@ def get_varlociraptor_params(wildcards, params):
wildcard_constraints:
group="|".join(groups),
sample="|".join(samples["sample_name"]),
caller="|".join(["freebayes", "delly", "arriba"]),
caller="|".join(["freebayes", "delly", "arriba"] + orthanq_callers),
filter="|".join(config["calling"]["filter"]),
event="|".join(config["calling"]["fdr-control"]["events"].keys()),
regions_type="|".join(["expanded", "covered"]),
Expand Down Expand Up @@ -1481,7 +1527,7 @@ def get_primer_extra(wc, input):
min_primer_len = get_shortest_primer_length(input.reads)
# Check if shortest primer is below default values
if min_primer_len < 32:
extra += f" -T {min_primer_len - 2}"
extra += f" -T {min_primer_len-2}"
if min_primer_len < 19:
extra += f" -k {min_primer_len}"
return extra
Expand Down
109 changes: 109 additions & 0 deletions workflow/rules/hla_typing.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# dowloads latest IMGT/HLA database version
rule get_hla_genes_and_xml:
output:
genes="results/preparation/hla_gen.fasta",
xml="results/preparation/hla.xml.zip",
log:
"logs/get_hla_genes_and_xml.log",
params:
genes_link="ftp://ftp.ebi.ac.uk/pub/databases/ipd/imgt/hla/hla_gen.fasta",
xml_link="ftp://ftp.ebi.ac.uk/pub/databases/ipd/imgt/hla/xml/hla.xml.zip",
shell:
"wget -c {params.genes_link} -O {output.genes} && "
"wget -c {params.xml_link} -O {output.xml} 2> {log}"


rule unzip_xml:
input:
"results/preparation/hla.xml.zip",
output:
xml="results/preparation/hla.xml",
log:
"logs/unzip_xml.log",
params:
path_to_unzip=lambda wc, output: os.path.dirname(output.xml),
shell:
"unzip -o {input} -d {params.path_to_unzip}"


rule orthanq_candidate_variants:
input:
genome=genome,
xml="results/preparation/hla.xml",
alleles="results/preparation/hla_gen.fasta",
output:
temp(directory("results/orthanq-candidate-calls-temp/"))
log:
"logs/orthanq_candidates/candidates.log",
threads: 8
params:
command="candidates",
subcommand="hla",
output_bcf=True
wrapper:
"v7.6.2/bio/orthanq"


rule rename_candidate_bcf:
input:
"results/orthanq-candidate-calls-temp/"
output:
"results/orthanq-candidate-calls/orthanq-{locus}.hla-variants.bcf"
log:
"logs/scatter-candidates/rename_candidates_{locus}.log",
shell:
"cp {input}/{wildcards.locus}.bcf {output}"


# The scatter_candidates rule does not work without group wildcards.
rule scatter_candidates_orthanq:
input:
"results/orthanq-candidate-calls/{orthanq_locus}.hla-variants.bcf",
output:
scatter.calling(
"results/orthanq-candidate-calls/{{orthanq_locus}}.hla-variants.{scatteritem}.bcf"
),
log:
"logs/scatter-candidates/{orthanq_locus}.log",
conda:
"../envs/rbt.yaml"
shell:
"rbt vcf-split {input} {output}"


rule gather_annotated_calls_orthanq:
input:
calls=gather.calling("results/calls/{{group}}.{{caller}}.{scatteritem}.bcf"),
idx=gather.calling("results/calls/{{group}}.{{caller}}.{scatteritem}.bcf.csi"),
output:
"results/calls/{group}.{caller}.bcf",
log:
"logs/gather-hla-variants/{group}.{caller}.log",
params:
extra="-a",
group:
"annotation"
wrapper:
"v2.3.2/bio/bcftools/concat"


rule orthanq_call_hla:
input:
haplotype_calls="results/calls/{group}.{caller}.bcf",
haplotype_variants="results/orthanq-candidate-calls/{caller}.hla-variants.bcf",
xml="results/preparation/hla.xml",
output:
directory("results/hla-typing/{group}-{caller}/{alias}")
log:
"logs/orthanq/{group}-{alias}-{caller}.log",
params:
command="call",
subcommand="hla",
prior="diploid",
extra=""
wrapper:
"v7.6.2/bio/orthanq"

# TODO add other outputs (plots), fill missing inputs and commands
# TODO decide how to handle deactivation of biases in varlociraptor:
# my current favorite is a special INFO tag in the orthanq candidates.
Loading