Skip to content

Commit 4ae2168

Browse files
authored
Merge pull request #6 from harvardinformatics/bam_resources
Bam resources
2 parents 55f0082 + 3055102 commit 4ae2168

File tree

7 files changed

+11
-7
lines changed

7 files changed

+11
-7
lines changed

Snakefile_bam2vcf_gatk

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ bamDir = config["bamsForGatk"]
1818
gvcfDir = gatkDir + config["gvcfDir"]
1919
dbDir = gatkDir + config["dbDir"]
2020
vcfDir = gatkDir + config["vcfDir_gatk"]
21-
intDir = config["intDir"]
21+
intDir = config["intDir"]
2222
maxIntervalLen = int(config["maxIntervalLen"])
2323
maxBpPerList = int(config["maxBpPerList"])
2424
maxIntervalsPerList = int(config["maxIntervalsPerList"])
@@ -27,7 +27,6 @@ minNmer = config["minNmer"]
2727
refBaseName = helperFun.getRefBaseName(config["ref"])
2828

2929
# grab all samples for R1 to get list of names, no need to look at R2 which should have identical names
30-
#SAMPLES = ["ERR1013163"]
3130
SAMPLES = helperFun.getBamSampleNames(bamDir, bam_suffix)
3231

3332
if not os.path.isdir(config["gatkDir"]):

Snakefile_fastq2bam

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ res_config = yaml.load(open("resources.yaml"))
1111
# rename variables from config file for clarity downstream
1212
fastq_suffix1 = config["fastq_suffix1"]
1313
fastq_suffix2 = config["fastq_suffix2"]
14-
fastqDir = config["fastqDir"]
1514

1615
# this is where Snakemake output will go, specify with baseDir in config.yml
1716
fastqFilterDir = config["fastq2bamDir"] + config["fastqFilterDir"]

Snakefile_intervals

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ SAMPLES = helperFun.getBamSampleNames(bamDir, bam_suffix)
2929
if not os.path.isdir(intDir + "gatkLists"):
3030
os.system("mkdir -p " + intDir + "gatkLists")
3131

32-
_, sample_dict, _ = helperFun.create_sample_dict("samples.csv")
32+
_, sample_dict, _ = helperFun.create_sample_dict(config["samples"])
3333

3434
GENOMES = {sample_dict[k]["refGenome"]: sample_dict[k]["Organism"] for k in sample_dict.keys()}
3535
print(GENOMES)

config.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
##############################
44

55
samples: "samples.csv" # name of the sample metadata CSV
6-
ref: "data/BHduck/genome/MU014702.1.fa" # location of the reference genome
6+
ref: "data/BHduck/genome/MU014702.1.fa" # location of the reference genome
77
spp: "hetAtr" # species name/code for the final VCF output file name
8+
genome: "MU014702.1" # name of the reference genome without the path or file extension
89

910
# If using the fastq -> BAM workflow change these, otherwise ignore them
10-
fastqDir: "data/BHduck/fastq/" # location of fastq files; must be followed by a "/"
11+
#fastqDir: "data/BHduck/fastq/" # location of fastq files; must be followed by a "/"
1112
fastq_suffix1: "_1.fastq.gz" # the suffix for the forward reads that follows all the sample names, e.g. "_1.fastq.gz" for sample "sampleName_1.fastq.gz"
1213
fastq_suffix2: "_2.fastq.gz" # the suffix for the reverse reads that follows all the sample names, e.g. "_2.fastq.gz" for sample "sampleName_2.fastq.gz"
1314

resources.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ dedup:
2828
# calculate BAM summaries with samtools and picard
2929
bam_sumstats:
3030
mem: 9000
31+
merge_bams:
32+
mem: 9000
3133

3234
###
3335
# BAM -> VCF workflow

rules/bam2vcf_gatk.smk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ rule gatherVcfs:
148148
rule vcftools:
149149
input:
150150
vcf = config["gatkDir"] + config['spp'] + "_final.vcf.gz",
151-
int = intDir + "intervals_fb.bed"
151+
int = intDir + config["genome"] + "_intervals_fb.bed"
152152
output:
153153
missing = gatkDir + "missing_data_per_ind.txt",
154154
SNPsPerInt = gatkDir + "SNP_per_interval.txt"

rules/fastq2bam.smk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,11 @@ rule merge_bams:
120120
bai = bamDir + "{sample}_sorted.bam.bai"
121121
conda:
122122
"../envs/fastq2bam.yml"
123+
resources:
124+
mem_mb = lambda wildcards, attempt: attempt * res_config['merge_bams']['mem']
123125
shell:
124126
"samtools merge {output.bam} {input} && samtools index {output.bam}"
127+
125128
rule dedup:
126129
input:
127130
bamDir + "{sample}_sorted.bam",

0 commit comments

Comments
 (0)