Calculate coverage statistics after mapping short-read sequenced isolates to a reference sequence
git clone https://github.com/kylegontjes/CoverageStats.git
module load Bioinformatics
module load bwa
bwa index [reference genome]
module load samtools
samtools faidx [reference genome]
module load gatk
gatk CreateSequenceDictionary -R [reference genome]
path="/nfs/esnitkin/Project_Penn_KPC/Sequence_data/fastq/Penn/SRA_submission/"
sample_id="sample_id"
sample_names=$(ls -1 $path | grep _R1 | cut -d. -f1 | sed 's/_R1//')
echo -e\n $sample_id $sample_names | tr ' ' '\n' > config/sample.tsv
module load singularity
module load snakemake
snakemake -s CoverageStats.smk --dryrun -p
snakemake -s CoverageStats.smk --use-singularity -j 999 --cluster "sbatch -A {cluster.account} -p {cluster.partition} -N {cluster.nodes} -t {cluster.walltime} -c {cluster.procs} --mem-per-cpu {cluster.pmem} --output=slurm_out/slurm-%j.out" --cluster-config config/cluster.json --configfile config/config.yaml --latency-wait 30 --keep-going
srun --account=esnitkin1 --nodes=1 --ntasks-per-node=1 --mem-per-cpu=5GB --cpus-per-task=1 --time=12:00:00 --pty /bin/bash