diff --git a/modules/nf-core/centrifuger/build/main.nf b/modules/nf-core/centrifuger/build/main.nf index 39964a411f2d..c1c7bcbabce6 100644 --- a/modules/nf-core/centrifuger/build/main.nf +++ b/modules/nf-core/centrifuger/build/main.nf @@ -5,7 +5,7 @@ process CENTRIFUGER_BUILD { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/centrifuger:1.1.0--hf426362_0': - 'biocontainers/centrifuger:1.1.0--hf426362_0' }" + 'quay.io/biocontainers/centrifuger:1.1.0--hf426362_0' }" input: tuple val(meta), path(references, stageAs: 'genomes/*') diff --git a/modules/nf-core/centrifuger/centrifuger/environment.yml b/modules/nf-core/centrifuger/centrifuger/environment.yml new file mode 100644 index 000000000000..56bb36e80863 --- /dev/null +++ b/modules/nf-core/centrifuger/centrifuger/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::centrifuger=1.1.0" diff --git a/modules/nf-core/centrifuger/centrifuger/main.nf b/modules/nf-core/centrifuger/centrifuger/main.nf new file mode 100644 index 000000000000..f5f7bb2ba9a1 --- /dev/null +++ b/modules/nf-core/centrifuger/centrifuger/main.nf @@ -0,0 +1,88 @@ +process CENTRIFUGER_CENTRIFUGER { + tag "$meta.id" + label 'process_single' + + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/centrifuger:1.1.0--hf426362_0': + 'quay.io/biocontainers/centrifuger:1.1.0--hf426362_0' }" + + input: + tuple val(meta), path(reads) + path db + val save_unclassified + val save_classified + path barcode + path umi + + + output: + tuple val(meta), path("${meta.id}.tsv"), emit: classification_file + tuple val(meta), path("${meta.id}.classified*"), optional: true, emit: fastq_classified + tuple val(meta), path("${meta.id}.unclassified*"), optional: true, emit: fastq_unclassified + tuple val("${task.process}"), val('centrifuger'), eval("centrifuger -v 2>&1 | head -n 1 | cut -d ' ' -f 2 | sed 's/^v//'"),emit: versions_centrifuger, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "-u ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + // Optional outputs + def unclassified_arg = save_unclassified ? "--un ${prefix}.unclassified" : "" + def classified_arg = save_classified ? "--cl ${prefix}.classified" : "" + def barcode_arg = barcode ? "--barcode ${barcode}" : "" + def umi_arg = umi ? "--UMI ${umi}" : "" + + + """ + db_name=`find -L ${db} -name "*.1.cfr" -not -name "._*" | sed 's/\\.1.cfr\$//'` + + centrifuger \\ + -x \$db_name \\ + ${paired} \\ + ${unclassified_arg} \\ + ${classified_arg} \\ + ${barcode_arg} \\ + ${umi_arg} \\ + -t ${task.cpus} \\ + ${args} > ${prefix}.tsv + + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo ${args} + #main output + touch ${prefix}.tsv + + #Optional outputs + if ${save_unclassified}; then + if ${meta.single_end}; then + echo "" | gzip > ${prefix}.unclassified.fq.gz + else + echo "" | gzip > ${prefix}.unclassified_1.fq.gz + echo "" | gzip > ${prefix}.unclassified_2.fq.gz + fi + fi + + if ${save_classified}; then + if ${meta.single_end}; then + echo "" | gzip > ${prefix}.classified.fq.gz + else + echo "" | gzip > ${prefix}.classified_1.fq.gz + echo "" | gzip > ${prefix}.classified_2.fq.gz + fi + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuger: \$(centrifuger -v 2>&1 | head -n 1 | cut -d ' ' -f 2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/centrifuger/centrifuger/meta.yml b/modules/nf-core/centrifuger/centrifuger/meta.yml new file mode 100644 index 000000000000..460abd2798b7 --- /dev/null +++ b/modules/nf-core/centrifuger/centrifuger/meta.yml @@ -0,0 +1,114 @@ +name: "centrifuger_centrifuger" +description: Classification of sequencing reads using the Centrifuger tool. +keywords: + - metagenomics + - classification + - centrifuger +tools: + - "centrifuger": + description: "Lossless compression of microbial genomes for efficient and accurate + metagenomic sequence classification." + homepage: "https://github.com/mourisl/centrifuger" + documentation: "https://github.com/mourisl/centrifuger" + tool_dev_url: "https://github.com/mourisl/centrifuger" + doi: "10.1186/s13059-024-03244-4" + licence: + - "MIT" + identifier: biotools:centrifuger +input: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1', single_end:false ]` + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end, respectively. + pattern: "*.{fastq,fq,fastq.gz,fq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 #FASTQ + - db: + type: directory + description: Path to directory containing Centrifuger database files. + - save_unclassified: + type: boolean + description: | + Optional - if true, output unclassified reads. + - save_classified: + type: boolean + description: | + Optional - if true, output classified reads. + - barcode: + type: file + description: | + Optional barcode file. + - umi: + type: file + description: | + Optional UMI file. +output: + classification_file: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1', single_end:false ]` + - ${meta.id}.tsv: + type: file + description: | + File contαining classification results + pattern: "${prefix}.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 #TSV + + fastq_classified: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${meta.id}.classified*: + type: file + description: FASTQ file(s) containing classified reads + pattern: "*.{fastq,fq,fastq.gz,fq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 + + fastq_unclassified: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${meta.id}.unclassified*: + type: file + description: FASTQ file(s) containing unclassified reads + pattern: "*.{fastq,fq,fastq.gz,fq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 + versions_centrifuger: + - - ${task.process}: + type: string + description: The name of the process + - centrifuger: + type: string + description: The name of the tool + - centrifuger -v 2>&1 | head -n 1 | cut -d ' ' -f 2 | sed 's/^v//': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - centrifuger: + type: string + description: The name of the tool + - centrifuger -v 2>&1 | head -n 1 | cut -d ' ' -f 2 | sed 's/^v//': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@haris18s" +maintainers: + - "@haris18s" + - "@sofstam" + - "@jfy133" diff --git a/modules/nf-core/centrifuger/centrifuger/tests/main.nf.test b/modules/nf-core/centrifuger/centrifuger/tests/main.nf.test new file mode 100644 index 000000000000..24484c0d3897 --- /dev/null +++ b/modules/nf-core/centrifuger/centrifuger/tests/main.nf.test @@ -0,0 +1,114 @@ +nextflow_process { + + name "Test Process CENTRIFUGER_CENTRIFUGER" + script "../main.nf" + process "CENTRIFUGER_CENTRIFUGER" + + tag "modules" + tag "modules_nfcore" + tag "centrifuger" + tag "centrifuger/centrifuger" + tag "untar" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/cfr_files_short.tar.gz', checkIfExists: true) ] + """ + } + } + } + + test("sarscov2 - fastq_single_end") { + when { + process { + """ + input[0] = [[ id:'test', single_end: true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + input[4] = [] + input[5] = [] + """ + } + } + then { + assertAll( + {assert process.success }, + { assert snapshot( + file(process.out.classification_file[0][1]).name, + file(process.out.fastq_classified[0][1]).name, + file(process.out.fastq_unclassified[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + test("sarscov2 - fastq_paired_end") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end: false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + input[4] = [] + input[5] = [] + """ + } + } + then { + assertAll( + {assert process.success }, + { assert snapshot( + file(process.out.classification_file[0][1]).name, + process.out.fastq_classified[0][1].collect { file(it).name }, + process.out.fastq_unclassified[0][1].collect {file (it).name }, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + test("sarscov2 - fastq_single_end_stub") { + options "-stub" + + when { + process { + """ + input[0] = [[ id:'test', single_end: true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + input[4] = [] + input[5] = [] + """ + } + } + then { + assertAll( + {assert process.success }, + { assert snapshot( + file(process.out.classification_file[0][1]).name, + file(process.out.fastq_classified[0][1]).name, + file(process.out.fastq_unclassified[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/centrifuger/centrifuger/tests/main.nf.test.snap b/modules/nf-core/centrifuger/centrifuger/tests/main.nf.test.snap new file mode 100644 index 000000000000..a0f84845c682 --- /dev/null +++ b/modules/nf-core/centrifuger/centrifuger/tests/main.nf.test.snap @@ -0,0 +1,71 @@ +{ + "sarscov2 - fastq_paired_end": { + "content": [ + "test.tsv", + [ + "test.classified_1.fq.gz", + "test.classified_2.fq.gz" + ], + [ + "test.unclassified_1.fq.gz", + "test.unclassified_2.fq.gz" + ], + { + "versions_centrifuger": [ + [ + "CENTRIFUGER_CENTRIFUGER", + "centrifuger", + "1.1.0-r299" + ] + ] + } + ], + "timestamp": "2026-04-20T10:50:42.284983", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 - fastq_single_end": { + "content": [ + "test.tsv", + "test.classified.fq.gz", + "test.unclassified.fq.gz", + { + "versions_centrifuger": [ + [ + "CENTRIFUGER_CENTRIFUGER", + "centrifuger", + "1.1.0-r299" + ] + ] + } + ], + "timestamp": "2026-04-20T10:50:36.957143", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 - fastq_single_end_stub": { + "content": [ + "test.tsv", + "test.classified.fq.gz", + "test.unclassified.fq.gz", + { + "versions_centrifuger": [ + [ + "CENTRIFUGER_CENTRIFUGER", + "centrifuger", + "1.1.0-r299" + ] + ] + } + ], + "timestamp": "2026-04-20T10:50:46.78207", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/centrifuger/quantification/environment.yml b/modules/nf-core/centrifuger/quantification/environment.yml new file mode 100644 index 000000000000..56bb36e80863 --- /dev/null +++ b/modules/nf-core/centrifuger/quantification/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::centrifuger=1.1.0" diff --git a/modules/nf-core/centrifuger/quantification/main.nf b/modules/nf-core/centrifuger/quantification/main.nf new file mode 100644 index 000000000000..34223d74bade --- /dev/null +++ b/modules/nf-core/centrifuger/quantification/main.nf @@ -0,0 +1,67 @@ +process CENTRIFUGER_QUANTIFICATION { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/centrifuger:1.1.0--hf426362_0': + 'quay.io/biocontainers/centrifuger:1.1.0--hf426362_0' }" + + input: + tuple val(meta), path(classification_file) + path db + path taxonomy_nodes + path taxonomy_names + path size_table + + output: + tuple val(meta), path("${meta.id}.tsv"), emit: report_file + tuple val("${task.process}"), val('centrifuger'), eval("centrifuger -v 2>&1 | sed 's/Centrifuger v//'"), emit: versions_centrifuger, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // include -x option with index prrefix or use specified files + def db_arg = "" + if (db) { + db_arg= " -x `find -L ${db} -name '*.1.cfr' -not -name '._*' | sed 's/\\.1.cfr\$//'`" + } + else { + def tax_arg = taxonomy_nodes ? "--taxonomy-tree ${taxonomy_nodes}" : "" + def name_arg = taxonomy_names ? "--name-table ${taxonomy_names}" : "" + def size_arg = size_table ? "--size-table ${size_table}" : "" + db_arg = "${tax_arg} ${name_arg} ${size_arg}" + } + + """ + centrifuger-quant \\ + ${db_arg} \\ + -c ${classification_file} \\ + ${args} > ${prefix}.tsv + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def db_arg = "" + if (db) { + db_arg= " -x `find -L ${db} -name '*.1.cfr' -not -name '._*' | sed 's/\\.1.cfr\$//'`" + } + else { + def tax_arg = taxonomy_nodes ? "--taxonomy-tree ${taxonomy_nodes}" : "" + def name_arg = taxonomy_names ? "--name-table ${taxonomy_names}" : "" + def size_arg = size_table ? "--size-table ${size_table}" : "" + db_arg = "${tax_arg} ${name_arg} ${size_arg}" + } + + """ + echo $args + + #output + echo "" > ${prefix}.tsv + """ +} diff --git a/modules/nf-core/centrifuger/quantification/meta.yml b/modules/nf-core/centrifuger/quantification/meta.yml new file mode 100644 index 000000000000..62cd6935c383 --- /dev/null +++ b/modules/nf-core/centrifuger/quantification/meta.yml @@ -0,0 +1,79 @@ +name: "centrifuger_quantification" +description: Quantification (taxonomic profiling) of Centrifuger model +keywords: + - metagenomics + - quantification + - Centrifuger +tools: + - "centrifuger": + description: "Centrifuger is an efficient taxonomic classification method that + compares sequencing reads against a microbial genome database." + homepage: "https://github.com/mourisl/centrifuger" + documentation: "https://github.com/mourisl/centrifuger" + tool_dev_url: "https://github.com/mourisl/centrifuger" + doi: "10.1186/s13059-024-03244-4" + licence: + - "MIT" + identifier: biotools:centrifuger +input: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' single_end:false ]` + - classification_file: + type: file + description: Path to file containing classification results + - db: + type: directory + description: Path to directory containing Centrifuger index files + - taxonomy_nodes: + type: file + description: File describing parent-child relationships of a taxonomic tree in NCBI nodes.dmp format + ontologies: + - edam: http://edamontology.org/format_1964 + - taxonomy_names: + type: file + description: File describing individual members of a taxonomic tree in NCBI names.dmp format + ontologies: + - edam: http://edamontology.org/format_1964 + - size_table: + type: file + description: Optional - Table of contig (or genome) sizes. +output: + report_file: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` + - "${meta.id}.tsv": + type: file + description: | + File containing taxonomic profiling results + pattern: "${prefix}.tsv" + versions_centrifuger: + - - ${task.process}: + type: string + description: The name of the process + - centrifuger: + type: string + description: The name of the tool + - centrifuger -v 2>&1 | sed 's/Centrifuger v//': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - centrifuger: + type: string + description: The name of the tool + - centrifuger -v 2>&1 | sed 's/Centrifuger v//': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@haris18s" +maintainers: + - "@haris18s" + - "@sofstam" + - "@jfy133" diff --git a/modules/nf-core/centrifuger/quantification/tests/main.nf.test b/modules/nf-core/centrifuger/quantification/tests/main.nf.test new file mode 100644 index 000000000000..baeff66a108c --- /dev/null +++ b/modules/nf-core/centrifuger/quantification/tests/main.nf.test @@ -0,0 +1,92 @@ + +nextflow_process { + + name "Test Process CENTRIFUGER_QUANTIFICATION" + script "../main.nf" + process "CENTRIFUGER_QUANTIFICATION" + + tag "modules" + tag "modules_nfcore" + tag "centrifuger" + tag "centrifuger/quantification" + tag "untar" + tag "centrifuger/centrifuger" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0]=[ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/cfr_files_short.tar.gz', checkIfExists: true) ] + """ + } + } + run("CENTRIFUGER_CENTRIFUGER") { + + script "../../../centrifuger/centrifuger/main.nf" + process { + """ + input[0] = [ + [id: 'test', single_end: true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + input[4] = [] + input[5] = [] + """ + } + } + } + test("sarscov2 - fastq_se") { + when { + process { + """ + input[0] = CENTRIFUGER_CENTRIFUGER.out.classification_file + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + then { + assert process.success + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.report_file[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() + } + + ) + } + + } + test("sarscov2 - quantification - stub") { + + options "-stub" + + when { + process { + """ + input[0] = CENTRIFUGER_CENTRIFUGER.out.classification_file + input[1] = UNTAR.out.untar.map{it[1]} + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/centrifuger/quantification/tests/main.nf.test.snap b/modules/nf-core/centrifuger/quantification/tests/main.nf.test.snap new file mode 100644 index 000000000000..aa7627c81798 --- /dev/null +++ b/modules/nf-core/centrifuger/quantification/tests/main.nf.test.snap @@ -0,0 +1,64 @@ +{ + "sarscov2 - quantification - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + "CENTRIFUGER_QUANTIFICATION", + "centrifuger", + "1.1.0-r299" + ] + ], + "report_file": [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_centrifuger": [ + [ + "CENTRIFUGER_QUANTIFICATION", + "centrifuger", + "1.1.0-r299" + ] + ] + } + ], + "timestamp": "2026-04-24T14:19:43.036808", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 - fastq_se": { + "content": [ + "test.tsv", + { + "versions_centrifuger": [ + [ + "CENTRIFUGER_QUANTIFICATION", + "centrifuger", + "1.1.0-r299" + ] + ] + } + ], + "timestamp": "2026-04-22T12:08:55.368311", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/taxpasta/merge/environment.yml b/modules/nf-core/taxpasta/merge/environment.yml index 2dc4820a1e00..109ec8c9e60f 100644 --- a/modules/nf-core/taxpasta/merge/environment.yml +++ b/modules/nf-core/taxpasta/merge/environment.yml @@ -5,4 +5,7 @@ channels: - bioconda dependencies: - bioconda::taxpasta=0.7.0 - - conda-forge::python=3.11 + - conda-forge::python=3.13 + - conda-forge::pandas=2.3.3 + - conda-forge::pandera=0.26.1 + - conda-forge::numpy=2.3.5 diff --git a/modules/nf-core/taxpasta/standardise/environment.yml b/modules/nf-core/taxpasta/standardise/environment.yml index 2dc4820a1e00..109ec8c9e60f 100644 --- a/modules/nf-core/taxpasta/standardise/environment.yml +++ b/modules/nf-core/taxpasta/standardise/environment.yml @@ -5,4 +5,7 @@ channels: - bioconda dependencies: - bioconda::taxpasta=0.7.0 - - conda-forge::python=3.11 + - conda-forge::python=3.13 + - conda-forge::pandas=2.3.3 + - conda-forge::pandera=0.26.1 + - conda-forge::numpy=2.3.5