Skip to content

Commit cf8c58b

Browse files
committed
live sequencing and flye simple
1 parent 0a8c786 commit cf8c58b

16 files changed

+157
-67
lines changed

analysis.nf

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ workflow centrifuge_database_wf {
143143
/**************************
144144
* MODULES
145145
**************************/
146+
include artic from './modules/artic'
146147
include abricate from './modules/abricate'
147148
include abricateBatch from './modules/abricateBatch'
148149
include abricateParser from './modules/PARSER/abricateParser'
@@ -168,13 +169,20 @@ workflow centrifuge_database_wf {
168169
include fastqTofasta from './modules/fastqTofasta'
169170
include fasttree from './modules/fasttree'
170171
include filter_fasta_by_length from './modules/filter_fasta_by_length'
172+
include flye from './modules/flye'
173+
include racon from './modules/racon'
174+
include medaka from './modules/medaka'
175+
include minimap2_polish from './modules/minimap2'
171176
include gtdbtk from './modules/gtdbtk'
172177
include gtdbtk_download_db from './modules/gtdbtkgetdatabase'
173178
include guppy_gpu from './modules/guppy_gpu'
179+
include gviz from './modules/PLOTS/gviz'
174180
include krona from './modules/krona'
181+
include live_guppy_gpu from './modules/guppy_gpu'
175182
include mafft from './modules/mafft'
176183
include mafft_supp from './modules/mafft_supp'
177184
include metamaps from './modules/metamaps'
185+
include minimap2 from './modules/minimap2'
178186
include nanoplot from './modules/nanoplot'
179187
include overview_parser from './modules/PARSER/overview_parser'
180188
include parse_plasmidinfo from './modules/PARSER/parse_plasmidinfo'
@@ -185,20 +193,15 @@ workflow centrifuge_database_wf {
185193
include prokka from './modules/prokka'
186194
include removeViaMapping from './modules/removeViaMapping'
187195
include rmetaplot from './modules/rmetaplot'
196+
include samtools from './modules/samtools'
188197
include sourclusterPlot from './modules/PLOTS/sourclusterPlot'
189198
include sourmash_download_db from './modules/sourmashgetdatabase'
190199
include sourmashclassification from './modules/sourclass'
191200
include sourmashclusterdir from './modules/sourclusterdir'
192201
include sourmashclusterfasta from './modules/sourclusterfasta'
193202
include sourmashmeta from './modules/sourmeta'
194-
include toytree from './modules/toytree'
195-
196-
// new
197-
include live_guppy_gpu from './modules/guppy_gpu'
198-
include minimap2 from './modules/minimap2'
199-
include samtools from './modules/samtools'
200-
include gviz from './modules/PLOTS/gviz'
201-
203+
include toytree from './modules/toytree'
204+
include filter_fastq_by_length from './modules/filter_fastq_by_length'
202205

203206
/**************************
204207
* SUB WORKFLOWS
@@ -381,10 +384,18 @@ workflow plasmid_comparision_wf {
381384
chromomap(parse_samtools(parse_plasmidinfo(group_by_sample).join(fastas)))
382385
}
383386

387+
workflow assembly_ont_wf {
388+
take: fastq
389+
main: medaka(racon(minimap2_polish(flye(fastq))))
390+
emit: medaka.out
391+
}
392+
393+
384394
/**************************
385395
* Work in Progress section
386396
**************************/
387-
397+
// Not sure about this one: its mainly implemented in the other workflow
398+
// prokka is hard to parse here
388399
workflow plasmid_annotate_wf {
389400
take:
390401
fastas //val(name), path(file)
@@ -401,6 +412,9 @@ workflow plasmid_annotate_wf {
401412
chromomap(parse_samtools(parse_prokka(group_by_sample).join(fastas)))
402413
}
403414

415+
// TODO: fastq files are not correctly stored, its just one - some "overwrite" bug i guess??
416+
// could be that my links have the wrong name or so ?
417+
// txt was working so you could add the PWD hast
404418
workflow live_analysis_wf {
405419
take:
406420
sample_name
@@ -449,6 +463,8 @@ workflow {
449463
if (params.sourmeta && params.fastq) { sourmash_WIMP_FASTQ_wf(fastq_input_ch, sourmash_database_wf()) }
450464
if (params.tree_aa && params.dir && !params.fasta) { amino_acid_tree_wf(dir_input_ch) }
451465
if (params.tree_aa && params.dir && params.fasta) { amino_acid_tree_supp_wf(dir_input_ch, fasta_input_ch) }
466+
if (params.assembly_ont && params.fastq) { assembly_ont_wf(fastq_input_ch) }
467+
if (params.artic_ncov19 && params.fastq) { artic_nCov19_wf(fastq_input_ch) }
452468

453469
// live workflows
454470
if (params.watchFast5 && params.samplename && params.fasta) { live_analysis_wf(sample_name_ch, fast5_live_input_ch, fasta_input_ch) }
@@ -483,7 +499,7 @@ def helpMSG() {
483499
${c_blue} --abricate ${c_reset} antibiotic and plasmid screening ${c_green}[--fasta]${c_reset} or ${c_green}[--fastq]${c_reset}
484500
${c_blue} --mobile ${c_reset} screens for IS elements ${c_green}[--fasta]${c_reset}
485501
${c_blue} --res_compare ${c_reset} detailed assembly resistance comparision of 2 or more assemblies ${c_green} [--fasta]${c_reset}
486-
${c_dim} ..option flags: [--coverage] include coverage info written in fasta headers on last position e.g. > name_cov_9.3354 ${c_reset}
502+
${c_dim} ..option flags: [--coverage] use coverage info in fasta headers on last position e.g. > name_cov_9.3354 ${c_reset}
487503
${c_blue} --plasmid_analysis ${c_reset} analysis of plasmids with plots ${c_green}[--fasta]${c_reset}
488504
489505
${c_yellow}Cluster and Classifications:${c_reset}
@@ -498,25 +514,32 @@ def helpMSG() {
498514
${c_dim} ..option flags: [--gtdbtk_db] path to your own DB instead ${c_reset}
499515
500516
${c_yellow}Metagenomic Workflows:${c_reset}
501-
${c_blue} --centrifuge ${c_reset} metagenomic classification of reads${c_green} [--fastq]${c_reset} or ${c_green}[--fastqPair]${c_reset}
517+
${c_blue} --centrifuge ${c_reset} metagenomic classification of reads ${c_green}[--fastq]${c_reset} or ${c_green}[--fastqPair]${c_reset}
502518
${c_dim} ..option flags: [--centrifuge_db] path to your own DB instead, either .tar or .tar.gz ${c_reset}
503-
${c_blue} --metamaps ${c_reset} metagenomic class. of long reads ${c_green} [--fastq]${c_reset}
519+
${c_blue} --metamaps ${c_reset} metagenomic class. of long reads ${c_green}[--fastq]${c_reset}
504520
${c_dim} ..mandatory flags: [--memory] [--tax_db] e.g. --memory 100 --tax_db /databases/miniSeq+H
505521
506522
${c_yellow}Nanopore specific Workflows:${c_reset}
507523
${c_blue} --guppygpu ${c_reset} basecalling via guppy-gpu-nvidia ${c_green} [--dir]${c_reset}
508524
${c_dim} ..option flags: [--flowcell] [--kit] [--barcode] [--modbase]
509-
..default settings: [--flowcell $params.flowcell] [--kit $params.kit] [--modbase FALSE] ${c_reset}
525+
..default settings: [--flowcell $params.flowcell] [--kit $params.kit] [--modbase FALSE] ${c_reset}
510526
${c_dim} ..config files: turn on via [--config], modify config type via [--configtype]
511-
..default config type: [--configtype $params.configtype] ${c_reset}
527+
..default config type: [--configtype $params.configtype] ${c_reset}
512528
${c_blue} --nanoplot ${c_reset} read quality via nanoplot ${c_green}[--fastq]${c_reset}
529+
${c_blue} --assembly_ont ${c_reset} simple nanopore assembly ${c_green}[--fastq]${c_reset}
530+
${c_dim} ..option flags: [--gsize ${params.gsize}] [--model ${params.model}] [--overlap ${params.overlap}]
531+
532+
${c_yellow}Nanopore live analysis Workflows (WIP):${c_reset}
533+
${c_blue} --watchFast5 ${c_reset} watch a dir for fast5 files, basecall them and map against reference
534+
Needs: ${c_green}[--samplename]${c_reset} and one multifasta file via ${c_green}[--fasta]${c_reset}
513535
514536
${c_yellow}Other Workflows:${c_reset}
515-
${c_blue} --deepHumanPathogen ${c_reset} pathogen identification in human ${c_green} [--fastqPair '*_R{1,2}.fastq.gz']${c_reset}
516-
${c_blue} --plasflow ${c_reset} predicts & seperates plasmid-seqs${c_green} [--fasta]${c_reset}
517-
${c_blue} --tree_aa ${c_reset} build a aminoacid tree of a dir with aa seqs ${c_green}[--dir]${c_reset}
518-
${c_dim} ..option flags: [--filenames] use filenames as labels instead of contig names${c_reset}
519-
[--fasta] add one multi protein file as "tree enhancer"e.g. [--fasta multipleProteins.aa]${c_reset}
537+
${c_blue} --deepHumanPathogen ${c_reset} pathogen identification in human ${c_green}[--fastqPair '*_R{1,2}.fastq.gz']${c_reset}
538+
${c_blue} --plasflow ${c_reset} predicts & seperates plasmid-seqs ${c_green}[--fasta]${c_reset}
539+
${c_blue} --tree_aa ${c_reset} aminoacid tree of a dir with aa seq ${c_green}[--dir]${c_reset}
540+
${c_dim} ..option flags: [--filenames] use filenames as labels instead of contig names
541+
${c_dim} [--fasta] add one multi protein file as "tree enhancer"
542+
${c_dim} e.g. [--fasta multipleProteins.aa]${c_reset}
520543
521544
${c_reset}Options:
522545
--cores max cores for local use [default: $params.cores]

configs/docker.config

100644100755
Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,33 @@
11
docker { enabled = true }
2-
process {
3-
withLabel: abricate { container = 'nanozoo/abricate:0.9.8--97b9f1e' }
4-
withLabel: baloonplot { container = 'nanozoo/r_ggpubr:0.2.5--4b52011' }
5-
withLabel: blast { container = 'nanozoo/blast:2.9.0--ded80ad' }
6-
withLabel: bokeh { container = 'quay.io/biocontainers/cami-opal:1.0.5--py_2' }
7-
withLabel: bwa { container = 'quay.io/biocontainers/shovill:1.0.4--0' }
8-
withLabel: centrifuge { container = 'nanozoo/centrifuge:v1.0.4-beta--fab181c' }
9-
withLabel: chromomap { container = 'nanozoo/r_fungi:0.1--097b1bb' }
10-
withLabel: emboss { container = 'quay.io/biocontainers/emboss:6.5.7--4' }
11-
withLabel: fargene { container = 'nanozoo/fargene:0.1--df641ae'}
12-
withLabel: fasttree { container = 'nanozoo/fasttree:2.1.10--1473542' }
13-
withLabel: filtlong { container = 'nanozoo/filtlong:v0.2.0--afa175e' }
14-
withLabel: ggplot2 { container = 'nanozoo/r_fungi:0.1--097b1bb' }
15-
withLabel: gtdbtk { container = 'nanozoo/gtdbtk:0.3.2--676fab7' }
16-
withLabel: krona { container = 'nanozoo/krona:2.7.1--658845d' }
17-
withLabel: mafft { container = 'nanozoo/mafft:7.455--a988e44'}
18-
withLabel: metamaps { container = 'nanozoo/metamaps:latest' }
19-
withLabel: nanoplot { container = 'nanozoo/nanoplot:1.25.0--4e2882f' }
20-
withLabel: plasflow { container = 'quay.io/biocontainers/plasflow:1.1.0--py35_0' }
21-
withLabel: rmetaplot { container = 'replikation/r-metasourmash:v0.2' }
22-
withLabel: samtools { container = 'nanozoo/pilon:1.23--b21026d' }
23-
withLabel: seqtk { container = 'quay.io/biocontainers/fusioncatcher-seqtk:1.2--h84994c4_0'}
24-
withLabel: sourmash { container = 'nanozoo/sourmash:2.3.0--4257650' }
25-
withLabel: toytree { container = 'nanozoo/toytree:1.1.2--1295ae6' }
26-
withLabel: prokka { container = 'nanozoo/prokka:1.14.5--33be639' }
272

28-
withLabel: minimap2 { container = 'nanozoo/minimap2:2.17--caba7af' }
29-
withLabel: bedtools { container = 'quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0' }
30-
31-
}
3+
process {
4+
withLabel: abricate { container = 'nanozoo/abricate:0.9.8--97b9f1e' }
5+
withLabel: baloonplot { container = 'nanozoo/r_ggpubr:0.2.5--4b52011' }
6+
withLabel: bedtools { container = 'quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0' }
7+
withLabel: blast { container = 'nanozoo/blast:2.9.0--ded80ad' }
8+
withLabel: bokeh { container = 'quay.io/biocontainers/cami-opal:1.0.5--py_2' }
9+
withLabel: bwa { container = 'quay.io/biocontainers/shovill:1.0.4--0' }
10+
withLabel: centrifuge { container = 'nanozoo/centrifuge:v1.0.4-beta--fab181c' }
11+
withLabel: chromomap { container = 'nanozoo/r_fungi:0.1--097b1bb' }
12+
withLabel: emboss { container = 'quay.io/biocontainers/emboss:6.5.7--4' }
13+
withLabel: fargene { container = 'nanozoo/fargene:0.1--df641ae'}
14+
withLabel: fasttree { container = 'nanozoo/fasttree:2.1.10--1473542' }
15+
withLabel: filtlong { container = 'nanozoo/filtlong:v0.2.0--afa175e' }
16+
withLabel: flye { container = 'nanozoo/flye:2.5--bae51d9' }
17+
withLabel: ggplot2 { container = 'nanozoo/r_fungi:0.1--097b1bb' }
18+
withLabel: gtdbtk { container = 'nanozoo/gtdbtk:0.3.2--676fab7' }
19+
withLabel: krona { container = 'nanozoo/krona:2.7.1--658845d' }
20+
withLabel: mafft { container = 'nanozoo/mafft:7.455--a988e44'}
21+
withLabel: medaka { container = 'nanozoo/medaka:0.10.0--1e71fdd' }
22+
withLabel: metamaps { container = 'nanozoo/metamaps:latest' }
23+
withLabel: minimap2 { container = 'nanozoo/minimap2:2.17--caba7af' }
24+
withLabel: nanoplot { container = 'nanozoo/nanoplot:1.25.0--4e2882f' }
25+
withLabel: plasflow { container = 'quay.io/biocontainers/plasflow:1.1.0--py35_0' }
26+
withLabel: prokka { container = 'nanozoo/prokka:1.14.5--33be639' }
27+
withLabel: rmetaplot { container = 'replikation/r-metasourmash:v0.2' }
28+
withLabel: samtools { container = 'nanozoo/pilon:1.23--b21026d' }
29+
withLabel: seqtk { container = 'quay.io/biocontainers/fusioncatcher-seqtk:1.2--h84994c4_0'}
30+
withLabel: sourmash { container = 'nanozoo/sourmash:2.3.0--4257650' }
31+
withLabel: toytree { container = 'nanozoo/toytree:1.1.2--1295ae6' }
32+
withLabel: artic { container = 'nanozoo/artic-ncov2019:0.0--44566ac' }
33+
}

configs/gcloud.config

100644100755
File mode changed.

configs/local.config

100644100755
Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,25 @@ process.executor = 'local'
22

33
process {
44
withLabel: abricate { cpus = 1 }
5+
withLabel: blast { cpus = params.cores }
6+
withLabel: bokeh { cpus = 1 }
57
withLabel: bwa { cpus = params.cores }
68
withLabel: centrifuge { cpus = params.cores }
79
withLabel: emboss { cpus = params.cores }
10+
withLabel: fasttree { cpus = params.cores }
811
withLabel: filtlong { cpus = 1 }
12+
withLabel: flye { cpus = params.cores }
913
withLabel: ggplot2 { cpus = 1 }
1014
withLabel: gtdbtk { cpus = params.cores }
1115
withLabel: krona { cpus = params.cores }
16+
withLabel: medaka { cpus = params.cores }
1217
withLabel: metamaps { cpus = params.cores }
18+
withLabel: minimap { cpus = params.cores }
1319
withLabel: nanoplot { cpus = params.cores }
1420
withLabel: plasflow { cpus = 4 }
21+
withLabel: racon { cpus = params.cores }
1522
withLabel: rmetaplot { cpus = params.cores }
1623
withLabel: seqtk { cpus = params.cores }
1724
withLabel: sourmash { cpus = 4 }
1825
withLabel: ubuntu { cpus = 1 }
19-
withLabel: bokeh { cpus = 1 }
20-
withLabel: fasttree { cpus = params.cores }
21-
22-
withLabel: blast { cpus = params.cores }
2326
}

data/IS.fna

100644100755
File mode changed.

modules/PLOTS/chromomap.nf

100644100755
File mode changed.

modules/bedtools.nf

100644100755
Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,3 @@ process bedtools {
1212
}
1313

1414

15-
16-
/*
17-
process minimap2_to_bam introduces a Math.random() function to add a random long number to the output.
18-
This is introduced as a quick fix, because otherweise we create to much files of the same name which will be a issue in
19-
the process later on when we use all of them together (differential binning)
20-
*/

modules/fasttree.nf

100644100755
File mode changed.

modules/filter_fastq_by_length.nf

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
process filter_fastq_by_length {
2+
label 'ubuntu'
3+
input:
4+
tuple val(name), path(reads)
5+
output:
6+
tuple val(name), path("${name}_filtered.fastq.gz")
7+
script:
8+
"""
9+
case "${reads}" in
10+
*.fastq.gz )
11+
zcat ${reads} | paste - - - - | awk -F"\\t" 'length(\$2) >= 400' | sed 's/\\t/\\n/g' |\
12+
awk -F"\\t" 'length(\$2) <= 700' | sed 's/\\t/\\n/g' | gzip > "${name}_filtered.fastq.gz"
13+
;;
14+
*.fastq)
15+
cat ${reads} | paste - - - - | awk -F"\\t" 'length(\$2) >= 400' | sed 's/\\t/\\n/g' |\
16+
awk -F"\\t" 'length(\$2) <= 700' | sed 's/\\t/\\n/g' | gzip > "${name}_filtered.fastq.gz"
17+
;;
18+
esac
19+
"""
20+
}
21+
22+
/* Comments:
23+
This is a super fast process to remove short reads.
24+
25+
it can take .fastq or .fastq.gz
26+
*/

modules/flye.nf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
process flye {
2+
label 'flye'
3+
input:
4+
tuple val(name), path(read)
5+
output:
6+
tuple val(name), path(read), path("${name}.fasta")
7+
script:
8+
"""
9+
flye -g ${params.gsize} -t ${task.cpus} --nano-raw ${read} -o assembly --min-overlap ${params.overlap}
10+
mv assembly/assembly.fasta ${name}.fasta
11+
"""
12+
}

0 commit comments

Comments
 (0)