Skip to content

Commit 45f7476

Browse files
committedOct 25, 2022
added kraken2
1 parent e963d10 commit 45f7476

12 files changed

+205
-6
lines changed
 

‎analysis.nf

+4
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ workflow centrifuge_database_wf {
200200
include { bakta_wf } from './workflows/bakta_wf'
201201
include { checkm_wf } from './workflows/checkm_wf'
202202
include { transposon_compare_wf } from './workflows/transposon_compare_wf'
203+
include { read_classification_illumina_pe_wf } from './workflows/taxonomic_read_class_wf'
203204

204205
/**************************
205206
* SUB WORKFLOWS
@@ -472,6 +473,7 @@ workflow {
472473
if (params.bakta && params.fasta) { bakta_wf(fasta_input_ch) }
473474
if (params.checkm && params.dir) { checkm_wf(dir_input_ch) }
474475
if (params.searchterm && params.fasta) { transposon_compare_wf(fasta_input_ch)}
476+
if (params.kraken2 && params.fastqPair) { read_classification_illumina_pe_wf(fastqPair_input_ch) }
475477

476478
// live workflows
477479
if (params.watchFast5 && params.samplename && params.fasta) { live_analysis_wf(sample_name_ch, fast5_live_input_ch, fasta_input_ch) }
@@ -531,6 +533,8 @@ def helpMSG() {
531533
${c_dim} ..option flags: [--centrifuge_db] path to your own DB instead, either .tar or .tar.gz ${c_reset}
532534
${c_blue} --metamaps ${c_reset} metagenomic class. of long reads ${c_green}[--fastq]${c_reset}
533535
${c_dim} ..mandatory flags: [--memory] [--tax_db] e.g. --memory 100 --tax_db /databases/miniSeq+H
536+
${c_blue} --kraken2 ${c_reset} metagenomic classification of reads ${c_green}[--fastqPair]${c_reset}
537+
${c_dim} ..option flags: [--krakendb] path to your own DB instead. Format: .tar.gz ${c_reset}
534538
535539
${c_yellow}Nanopore specific Workflows:${c_reset}
536540
${c_blue} --guppygpu ${c_reset} basecalling via guppy-gpu-nvidia ${c_green} [--dir]${c_reset}

‎configs/docker.config

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ process {
33
withLabel: artic { container = 'nanozoo/artic-ncov2019:0.0--44566ac' }
44
withLabel: bakta { container = 'nanozoo/bakta:1.2.1--bf38720' }
55
withLabel: baloonplot { container = 'nanozoo/r_ggpubr:0.2.5--4b52011' }
6+
withLabel: bracken { container = 'nanozoo/bracken:2.8--dcb3e47' }
67
withLabel: bedtools { container = 'quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0' }
78
withLabel: blast { container = 'nanozoo/blast:2.9.0--ded80ad' }
89
withLabel: bokeh { container = 'quay.io/biocontainers/cami-opal:1.0.5--py_2' }
@@ -19,7 +20,9 @@ process {
1920
withLabel: flye { container = 'nanozoo/flye:2.8.3--2769e9b' }
2021
withLabel: ggplot2 { container = 'nanozoo/ggplot2:3.3.1--303f617' }
2122
withLabel: gtdbtk { container = 'nanozoo/gtdb:1.6.0--5383545' }
22-
withLabel: krona { container = 'nanozoo/krona:2.7.1--658845d' }
23+
withLabel: kraken2 { container = 'nanozoo/kraken2:2.1.1--d5ded30'}
24+
withLabel: krakentools { container = 'nanozoo/krakentools:1.2--13d5ba5'}
25+
withLabel: krona { container = 'nanozoo/krona:2.7.1--e7615f7'}
2326
withLabel: mafft { container = 'nanozoo/mafft:7.455--a988e44'}
2427
withLabel: medaka { container = 'nanozoo/medaka:1.5.0--853c9aa' }
2528
withLabel: metamaps { container = 'nanozoo/metamaps:latest' }

‎configs/gcloud.config

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
process {
33
withLabel: abricate { cpus = 6 ; memory = '14 GB' }
44
withLabel: bakta { cpus = 34 ; memory = '150 GB' }
5+
withLabel: bracken { cpus = 24; memory = '48 GB' }
56
withLabel: baloonplot { cpus = 2 ; memory = '6 GB' }
67
withLabel: bedtools { cpus = 8 ; memory = '16 GB' }
78
withLabel: blast { cpus = 8 ; memory = '16 GB' }
@@ -19,6 +20,7 @@ process {
1920
withLabel: flye { cpus = 20 ; memory = '40 GB' }
2021
withLabel: ggplot2 { cpus = 2 ; memory = '6 GB' }
2122
withLabel: gtdbtk { cpus = 36 ; memory = '226 GB' }
23+
withLabel: kraken2 { cpus = 24; memory = '64 GB' }
2224
withLabel: krona { cpus = 2 ; memory = '6 GB' }
2325
withLabel: mafft { cpus = 16 ; memory = '20 GB' }
2426
withLabel: medaka { cpus = 16 ; memory = '20 GB' }

‎configs/local.config

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ process {
44
withLabel: abricate { cpus = 1 }
55
withLabel: blast { cpus = params.cores }
66
withLabel: bokeh { cpus = 1 }
7+
withLabel: bracken { cpus = params.cores }
78
withLabel: bwa { cpus = params.cores }
89
withLabel: cd_hit { cpus = params.cores }
910
withLabel: centrifuge { cpus = params.cores }
@@ -14,6 +15,7 @@ process {
1415
withLabel: flye { cpus = params.cores }
1516
withLabel: ggplot2 { cpus = 1 }
1617
withLabel: gtdbtk { cpus = params.cores }
18+
withLabel: kraken2 { cpus = params.cores }
1719
withLabel: krona { cpus = params.cores }
1820
withLabel: medaka { cpus = params.cores }
1921
withLabel: metamaps { cpus = params.cores }

‎modules/centrifuge_illumina.nf

-5
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,6 @@ process centrifuge_illumina {
33
publishDir "${params.output}/${name}/centrifuge", mode: 'copy', pattern: "${name}.out"
44
label 'centrifuge'
55

6-
//errorStrategy { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
7-
//errorStrategy { task.exitStatus == 14 ? 1 : task.attempt }
8-
//cpus { 12 * task.attempt }
9-
//memory { 70.GB * task.attempt }
10-
//maxRetries 2
116

127
input:
138
tuple val(name), file(fastq)

‎nextflow.config

+3
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@ params {
2020
list = false
2121
searchterm = ''
2222
range = '5000'
23+
readlength = '150'
2324

2425
// databases
2526
centrifuge_db = false
2627
gtdbtk_db = false
2728
sour_db = false
2829
tax_db = false
2930
bakta_db = false
31+
krakendb = false
3032

3133
// tools
3234
abricate = false
@@ -50,6 +52,7 @@ params {
5052
tree_aa = false
5153
bakta = false
5254
checkm = false
55+
kraken2 = false
5356

5457
// settings
5558
update = false

‎workflows/process/bracken.nf

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
process bracken {
2+
label 'bracken'
3+
publishDir "${params.output}/${name}/Read_classification", mode: 'copy'
4+
input:
5+
tuple val(name), path(krakenout), path(kreport)
6+
path(database)
7+
output:
8+
tuple val(name), path("${name}.bracken"), path("${name}.breport")
9+
script:
10+
"""
11+
mkdir -p kraken_db && tar xzf ${database} -C kraken_db
12+
13+
bracken -d kraken_db -i ${name}.kreport -r ${params.readlength} -l S -t ${task.cpus} \
14+
-o ${name}.bracken -w ${name}.breport
15+
16+
17+
# cleanup to reduce footprint
18+
rm -rf kraken_db/
19+
"""
20+
stub:
21+
"""
22+
touch ${name}.bracken ${name}.breport
23+
"""
24+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
process download_database_kraken2 {
2+
label "ubuntu"
3+
storeDir "${params.databases}/kraken2_k2standard_20220926"
4+
errorStrategy 'retry'
5+
maxRetries 1
6+
output:
7+
path("kraken.tar.gz")
8+
script:
9+
if (task.attempt.toInteger() == 1)
10+
"""
11+
echo ${task.attempt}
12+
wget --no-check-certificate https://genome-idx.s3.amazonaws.com/kraken/k2_standard_20220926.tar.gz -O kraken.tar.gz
13+
"""
14+
else if (task.attempt.toInteger() > 1)
15+
"""
16+
echo ${task.attempt}
17+
wget --no-check-certificate https://genome-idx.s3.amazonaws.com/kraken/k2_standard_20220926.tar.gz -O kraken.tar.gz
18+
"""
19+
stub:
20+
"""
21+
touch kraken.tar.gz
22+
"""
23+
}
24+
25+
26+
/*
27+
28+
DATABASES
29+
30+
https://benlangmead.github.io/aws-indexes/k2
31+
32+
*/

‎workflows/process/kraken2.nf

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
process kraken2_illumina_pe {
2+
label 'kraken2'
3+
publishDir "${params.output}/${name}/Read_classification", mode: 'copy'
4+
input:
5+
tuple val(name), path(reads)
6+
path(database)
7+
output:
8+
tuple val(name), path("${name}.kraken.out"), path("${name}.kreport")
9+
script:
10+
"""
11+
mkdir -p kraken_db && tar xzf ${database} -C kraken_db
12+
13+
14+
kraken2 --db kraken_db --threads ${task.cpus} --paired --output ${name}.kraken.out --report ${name}.kreport ${reads}
15+
16+
# kraken has the opertunity to emit also unclassified reads!
17+
# kraken2 --paired --classified-out cseqs#.fq seqs_1.fq seqs_2.fq
18+
19+
# cleanup to reduce footprint
20+
rm -rf kraken_db/
21+
"""
22+
stub:
23+
"""
24+
touch ${name}.kraken.out ${name}.kreport
25+
"""
26+
}

‎workflows/process/krakentools.nf

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
process krakentools {
2+
label 'krakentools'
3+
publishDir "${params.output}/${name}/Read_classification/alpha_diversity", mode: 'copy', pattern: "${name}_alpha-diversity.txt"
4+
publishDir "${params.output}/${name}/Read_classification", mode: 'copy', pattern: "${name}.b.krona.txt"
5+
input:
6+
tuple val(name), path(brackenout), path(breport)
7+
output:
8+
tuple val(name), path("${name}_alpha-diversity.txt"), path("${name}.b.krona.txt")
9+
script:
10+
"""
11+
alpha_diversity.py -f ${brackenout} -a BP > ${name}_alpha-diversity.txt
12+
alpha_diversity.py -f ${brackenout} -a Sh >> ${name}_alpha-diversity.txt
13+
alpha_diversity.py -f ${brackenout} -a F >> ${name}_alpha-diversity.txt
14+
alpha_diversity.py -f ${brackenout} -a Si >> ${name}_alpha-diversity.txt
15+
alpha_diversity.py -f ${brackenout} -a ISi >> ${name}_alpha-diversity.txt
16+
17+
# krona report
18+
kreport2krona.py -r ${breport} -o ${name}.b.krona.txt --no-intermediate-ranks
19+
20+
"""
21+
stub:
22+
"""
23+
touch ${name}_alpha-diversity.txt ${name}.b.krona.txt
24+
"""
25+
}

‎workflows/process/krona.nf

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
process krona {
2+
label 'krona'
3+
publishDir "${params.output}/${params.readqcdir}/${name}/", mode: 'copy'
4+
input:
5+
tuple val(name), path(kraken2), path(kreport)
6+
output:
7+
tuple val(name), file("${name}_krona.html")
8+
script:
9+
"""
10+
cat ${kreport} | cut -f 3,5 > file.krona
11+
ktImportTaxonomy file.krona -m 1
12+
mv *.html ${name}_krona.html
13+
"""
14+
stub:
15+
"""
16+
touch ${name}_krona.html
17+
"""
18+
}
19+
20+
process krona_from_bracken {
21+
label 'krona'
22+
publishDir "${params.output}/${name}/Read_classification/", mode: 'copy'
23+
input:
24+
tuple val(name), path(alphadiversity), path(kronatextinput)
25+
output:
26+
tuple val(name), file("${name}.krona.html")
27+
script:
28+
"""
29+
ktImportText ${kronatextinput} -o ${name}.krona.html
30+
"""
31+
stub:
32+
"""
33+
touch ${name}_krona.html
34+
"""
35+
}
36+
37+
38+
/*
39+
40+
python KrakenTools/kreport2krona.py -r breports/SRR14143424.breport -o b_krona_txt/SRR14143424.b.krona.txt --no-intermediate-ranks
41+
KronaScripts/ktImportText b_krona_txt/SRR14143424.b.krona.txt \ -o krona_html/SRR14143424.krona.html
42+
43+
44+
*/

‎workflows/taxonomic_read_class_wf.nf

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
include { kraken2_illumina_pe } from './process/kraken2.nf'
2+
include { krona; krona_from_bracken } from './process/krona.nf'
3+
include { download_database_kraken2 } from './process/download_database_kraken2.nf'
4+
include { bracken } from './process/bracken.nf'
5+
include { krakentools } from './process/krakentools.nf'
6+
7+
workflow read_classification_illumina_pe_wf {
8+
take:
9+
fastq
10+
main:
11+
12+
// database download
13+
if (params.krakendb) { kraken_db = file("${params.krakendb}") }
14+
else { download_database_kraken2(); kraken_db = download_database_kraken2.out }
15+
16+
// classification
17+
kraken2_illumina_pe(fastq, kraken_db)
18+
19+
// alpha diversity, abundance and korna plots
20+
krona_from_bracken(krakentools(bracken(kraken2_illumina_pe.out, kraken_db)))
21+
22+
emit:
23+
kraken = kraken2_illumina_pe.out
24+
}
25+
26+
27+
28+
29+
30+
/*
31+
Protocoll here
32+
33+
34+
you might want to add the option to calculate beta diversity here?
35+
36+
37+
https://www.nature.com/articles/s41596-022-00738-y
38+
39+
*/

0 commit comments

Comments
 (0)
Please sign in to comment.