Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing GPU support with parabricks #106

Merged
merged 1 commit into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 78 additions & 27 deletions workflow/rules/star-haplotypecaller.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,24 @@ rule star_index:
fasta=config["reference"]["genome"].rstrip(".gz"),
gff= config['reference']['gff'],
output:
directory("resources/reference/star_index"),
index = directory("resources/reference/star_index"),
threads: 8
log:
"logs/star_index/index.log",
params:
extra="--sjdbGTFtagExonParentTranscript Parent"
shell:
"""
mkdir -p {output.index}
STAR \
--runThreadN {threads} \
--runMode genomeGenerate \
--genomeFastaFiles {input.fasta} \
--sjdbGTFfile {input.gff} \
--sjdbGTFtagExonParentTranscript Parent \
--genomeDir {output} 2> {log} \
{params.extra} \
--genomeDir {output.index} \
--outFileNamePrefix {output.index}/ \
> {log} 2>&1
"""

rule fq2bam:
Expand All @@ -31,62 +36,110 @@ rule fq2bam:
star_index= "resources/reference/star_index",
output:
bam="results/alignments/{sample}.star.bam",
recal="results/alignments/recal_gpu_{sample}.txt"
bai="results/alignments/{sample}.star.bam.bai"
# recal="results/alignments/recal_gpu_{sample}.txt"
log:
"logs/parabricks/fq2bam-{sample}.log",
threads: 1
resources:
all_gpus = 1
params:
wkdir=wkdir
shell:
"""
docker run --rm --gpus all --volume {wkdir} --volume {wkdir}/results/alignments/star \
--workdir {wkdir} \
nvcr.io/nvidia/clara/clara-parabricks:4.3.0-1 \
docker run --user 1006:1006 --rm --gpus all --volume {wkdir}:/workdir --workdir /workdir nvcr.io/nvidia/clara/clara-parabricks:4.3.0-1 \
pbrun rna_fq2bam \
--in-fq {input.reads} \
--genome-lib-dir resources/reference/star_index\
--output-dir results/alignments/star \
--ref {input.ref_fasta} \
--out-bam {output.bam} \
--out-recal-file results/alignments/recal_gpu_{wildcards.sample}.txt \
--read-files-command zcat
--in-fq /workdir/{input.reads[0]} /workdir/{input.reads[1]} \
--genome-lib-dir /workdir/resources/reference/star_index\
--output-dir /workdir \
--ref /workdir/{input.ref_fasta} \
--out-bam /workdir/{output.bam} \
--read-files-command zcat 2> {log}
"""


rule haplotype_caller:
"""
Run a GPU-accelerated haplotypeCaller algorithm.
"""
input:
bam = "results/alignments/{sample}.star.bam",
ref_fasta=config["reference"]["genome"].rstrip(".gz"),
recal="results/alignments/recal_gpu_{sample}.txt",
# recal="results/alignments/recal_gpu_{sample}.txt",
output:
vcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf"
log:
"logs/parabricks/haplotypecaller-{contig}-{sample}.log",
threads: 1
resources:
all_gpus = 1
params:
wkdir=wkdir,
ploidy=config['VariantAnalysis']['ploidy']
shell:
"""
docker run --rm --gpus all --volume {wkdir} --volume {wkdir}
-w {wkdir} \
nvcr.io/nvidia/clara/clara-parabricks:4.3.0-1 \
docker run --user 1006:1006 --rm --gpus all --volume {wkdir}:/workdir -w /workdir nvcr.io/nvidia/clara/clara-parabricks:4.3.0-1 \
pbrun haplotypecaller \
--ref {input.ref_fasta} \
--intervals {wildcards.contig} \
--in-bam {input.bam} \
--in-recal-file {input.recal} \
--out-variants {output.vcf} \
--ploidy {params.ploidy}
--rna \
--ref /workdir/{input.ref_fasta} \
--interval {wildcards.contig} \
--in-bam /workdir/{input.bam} \
--out-variants /workdir/{output.vcf} \
--ploidy {params.ploidy} 2> {log}
"""


rule create_dict:
input:
ref_fasta=config["reference"]["genome"].rstrip(".gz"),
output:
ref_dict=config["reference"]["genome"].rstrip(".gz").rstrip(".fa").rstrip(".fasta") + ".dict",
log:
"logs/gatk/create_dict.log",
params:
extra="", # optional: extra arguments for picard.
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
resources:
mem_mb=1024,
wrapper:
"v4.3.0/bio/picard/createsequencedictionary"


rule bgzip:
input:
vcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf",
output:
tbi="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf.gz",
params:
extra="", # optional
threads: 1
log:
"logs/gatk/bgzip/{contig}.{sample}.log",
wrapper:
"v4.3.0-25-g7d3aa9d/bio/bgzip"

rule tabix:
input:
vcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf.gz",
output:
tbi="results/variantAnalysis/vcfs/haplotypecaller/{contig}/{sample}.g.vcf.gz.tbi",
log:
"logs/gatk/tabix/{contig}.{sample}.log",
params:
"-p vcf",
wrapper:
"v4.3.0/bio/tabix/index"

rule combine_calls:
input:
ref=config['reference']['genome'].rstrip(".gz"),
gvcfs=expand(
"results/variantAnalysis/vcfs/haplotypecaller/{{contig}}/{sample}.g.vcf", sample=samples
"results/variantAnalysis/vcfs/haplotypecaller/{{contig}}/{sample}.g.vcf.gz", sample=samples
),
tbi=expand("results/variantAnalysis/vcfs/haplotypecaller/{{contig}}/{sample}.g.vcf.gz.tbi", sample=samples)
output:
gvcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/variants.g.vcf.gz",
log:
Expand All @@ -101,8 +154,6 @@ rule genotype_variants:
gvcf="results/variantAnalysis/vcfs/haplotypecaller/{contig}/variants.g.vcf.gz",
output:
vcf=temp("results/variantAnalysis/vcfs/haplotypecaller/variants.{contig}.vcf"),
params:
extra=config['reference']['genome'].rstrip(".gz"),
log:
"logs/gatk/genotypegvcfs.{contig}.log",
wrapper:
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/utilities.smk
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ rule IndexBams:
Index bams with samtools
"""
input:
bam="results/alignments/{sample}.star.bam" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam",
bam="results/alignments/{sample}.hisat2.bam",
output:
idx="results/alignments/{sample}.star.bam.bai" if config['pipeline'] == 'parabricks' else "results/alignments/{sample}.hisat2.bam.bai",
idx="results/alignments/{sample}.hisat2.bam.bai",
log:
"logs/IndexBams/{sample}.log",
wrapper:
Expand Down
Loading