Skip to content

Commit c8ad076

Browse files
sahunoclaude
andcommitted
feat: add modkit/validate module
Add new nf-core module wrapping `modkit validate`, which benchmarks modified-base calls in one or more mod-BAMs against paired ground-truth BED files and emits a tab-separated summary of correct / incorrect / filtered calls per class. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 135d883 commit c8ad076

6 files changed

Lines changed: 282 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- "bioconda::ont-modkit=0.6.1"
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
process MODKIT_VALIDATE {
2+
tag "$meta.id"
3+
label 'process_medium'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
7+
'https://depot.galaxyproject.org/singularity/ont-modkit:0.6.1--hcdda2d0_0':
8+
'quay.io/biocontainers/ont-modkit:0.6.1--hcdda2d0_0' }"
9+
10+
input:
11+
// Multiple sample pairs can be supplied as list inputs; BAMs and BEDs are
12+
// paired by index and passed as repeated `--bam-and-bed <BAM> <BED>`.
13+
tuple val(meta), path(bams, stageAs: "bams/?/*"), path(bais, stageAs: "bais/?/*"), path(truth_beds, stageAs: "truth/?/*")
14+
15+
output:
16+
tuple val(meta), path("*.tsv"), emit: tsv
17+
tuple val(meta), path("*.log"), emit: log, optional: true
18+
tuple val("${task.process}"), val('modkit'), eval("modkit --version | sed 's/modkit //'"), emit: versions_modkit, topic: versions
19+
20+
when:
21+
task.ext.when == null || task.ext.when
22+
23+
script:
24+
def args = task.ext.args ?: ''
25+
def prefix = task.ext.prefix ?: "${meta.id}"
26+
def bam_list = bams instanceof List ? bams : [bams]
27+
def bed_list = truth_beds instanceof List ? truth_beds : [truth_beds]
28+
if (bam_list.size() != bed_list.size()) {
29+
error "MODKIT_VALIDATE: bams and truth_beds must have the same length (got ${bam_list.size()} vs ${bed_list.size()})"
30+
}
31+
def pair_args = [bam_list, bed_list].transpose().collect { b, g -> "--bam-and-bed ${b} ${g}" }.join(' ')
32+
"""
33+
modkit \\
34+
validate \\
35+
$args \\
36+
--threads ${task.cpus} \\
37+
${pair_args} \\
38+
--out-filepath ${prefix}.tsv
39+
"""
40+
41+
stub:
42+
def prefix = task.ext.prefix ?: "${meta.id}"
43+
"""
44+
touch ${prefix}.tsv
45+
"""
46+
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
name: modkit_validate
2+
description: |
3+
Validate base modification calls in one or more mod-BAMs against a matched
4+
ground-truth BED file containing the expected modification state at
5+
reference positions. Reports per-sample and combined confusion-matrix style
6+
statistics (accuracy, filtered calls, probability distribution) as a
7+
tab-separated machine-parseable table.
8+
keywords:
9+
- modkit
10+
- methylation
11+
- validate
12+
- nanopore
13+
- ont
14+
- modbam
15+
- ground-truth
16+
- benchmark
17+
tools:
18+
- "modkit":
19+
description: A bioinformatics tool for working with modified bases in Oxford Nanopore
20+
sequencing data.
21+
homepage: https://github.com/nanoporetech/modkit
22+
documentation: https://nanoporetech.github.io/modkit/
23+
tool_dev_url: https://github.com/nanoporetech/modkit
24+
licence:
25+
- "Oxford Nanopore Technologies PLC. Public License Version 1.0"
26+
identifier: ""
27+
input:
28+
- - meta:
29+
type: map
30+
description: |
31+
Groovy Map containing sample (or cohort) information
32+
e.g. `[ id:'test_run' ]`. The output inherits this meta.
33+
- bams:
34+
type: list
35+
description: |
36+
One or more modBAM files. BAMs are paired 1:1 with `truth_beds`
37+
by list index and passed as repeated `--bam-and-bed <BAM> <BED>`
38+
arguments to `modkit validate`.
39+
pattern: "*.{bam,cram}"
40+
ontologies:
41+
- edam: http://edamontology.org/format_2572
42+
- bais:
43+
type: list
44+
description: |
45+
BAM indices (`.bai` or `.csi`) matching each input BAM.
46+
pattern: "*.{bai,csi}"
47+
ontologies: []
48+
- truth_beds:
49+
type: list
50+
description: |
51+
Ground-truth BED files (one per BAM). The name field must be the
52+
short modified base code (single letter or ChEBI ID) or `-` to
53+
mark a canonical reference position. Paired 1:1 with `bams`.
54+
pattern: "*.{bed,bed.gz}"
55+
ontologies:
56+
- edam: http://edamontology.org/format_3003
57+
output:
58+
tsv:
59+
- - meta:
60+
type: map
61+
description: |
62+
Groovy Map containing sample information
63+
e.g. `[ id:'test_run' ]`.
64+
- "*.tsv":
65+
type: file
66+
description: |
67+
Machine-parseable validation summary produced by
68+
`modkit validate --out-filepath`. Columns include counts of
69+
correct / incorrect / filtered calls per class.
70+
pattern: "*.tsv"
71+
ontologies:
72+
- edam: http://edamontology.org/format_3475
73+
log:
74+
- - meta:
75+
type: map
76+
description: |
77+
Groovy Map containing sample information
78+
e.g. `[ id:'test_run' ]`.
79+
- "*.log":
80+
type: file
81+
description: |
82+
Optional modkit debug log (only emitted when `--log-filepath
83+
<name>.log` is passed via `ext.args`).
84+
pattern: "*.log"
85+
ontologies: []
86+
versions_modkit:
87+
- - ${task.process}:
88+
type: string
89+
description: The name of the process
90+
- modkit:
91+
type: string
92+
description: The name of the tool
93+
- modkit --version | sed 's/modkit //':
94+
type: eval
95+
description: The expression to obtain the version of the tool
96+
topics:
97+
versions:
98+
- - ${task.process}:
99+
type: string
100+
description: The name of the process
101+
- modkit:
102+
type: string
103+
description: The name of the tool
104+
- modkit --version | sed 's/modkit //':
105+
type: eval
106+
description: The expression to obtain the version of the tool
107+
authors:
108+
- "@sahuno"
109+
maintainers:
110+
- "@sahuno"
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
nextflow_process {
2+
3+
name "Test Process MODKIT_VALIDATE"
4+
script "../main.nf"
5+
process "MODKIT_VALIDATE"
6+
config "./nextflow.config"
7+
8+
tag "modules"
9+
tag "modules_nfcore"
10+
tag "modkit"
11+
tag "modkit/validate"
12+
tag "modkit/pileup"
13+
14+
// A real validation run needs a BAM paired with a ground-truth BED whose
15+
// `name` column is the short modified-base code (m/h/a/...) or `-` for a
16+
// canonical reference position.
17+
//
18+
// The public modules test data doesn't ship such a BED, so for the real
19+
// test we pipe `MODKIT_PILEUP` output through `cut -f1-6` (a bedMethyl's
20+
// first six columns already satisfy the BED6 + mod-code-in-name contract)
21+
// and validate the BAM against its own pileup. This is a self-consistency
22+
// smoke test — it won't produce meaningful accuracy metrics but will
23+
// exercise the full CLI path end-to-end.
24+
setup {
25+
run("MODKIT_PILEUP") {
26+
script "../../pileup/main.nf"
27+
process {
28+
"""
29+
input[0] = [
30+
[ id: 'test' ],
31+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true),
32+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true)
33+
]
34+
input[1] = [[],[],[]]
35+
input[2] = [[],[]]
36+
"""
37+
}
38+
}
39+
}
40+
41+
test("homo sapiens - modbam + truth bed - stub") {
42+
43+
options "-stub"
44+
45+
when {
46+
process {
47+
"""
48+
input[0] = [
49+
[ id: 'test' ],
50+
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true) ],
51+
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) ],
52+
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ] // placeholder truth BED for stub — real validation needs a truth BED whose name column is the mod code (m/h/-)
53+
]
54+
"""
55+
}
56+
}
57+
58+
then {
59+
assertAll(
60+
{ assert process.success },
61+
{ assert snapshot(process.out).match() }
62+
)
63+
}
64+
}
65+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{
2+
"homo sapiens - modbam + truth bed - stub": {
3+
"content": [
4+
{
5+
"0": [
6+
[
7+
{
8+
"id": "test"
9+
},
10+
"test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
11+
]
12+
],
13+
"1": [
14+
15+
],
16+
"2": [
17+
[
18+
"MODKIT_VALIDATE",
19+
"modkit",
20+
"0.6.1"
21+
]
22+
],
23+
"log": [
24+
25+
],
26+
"tsv": [
27+
[
28+
{
29+
"id": "test"
30+
},
31+
"test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
32+
]
33+
],
34+
"versions_modkit": [
35+
[
36+
"MODKIT_VALIDATE",
37+
"modkit",
38+
"0.6.1"
39+
]
40+
]
41+
}
42+
],
43+
"timestamp": "2026-04-23T22:01:55.666359485",
44+
"meta": {
45+
"nf-test": "0.9.5",
46+
"nextflow": "25.04.6"
47+
}
48+
}
49+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
process {
2+
withName: 'MODKIT_VALIDATE' {
3+
ext.args = '-c C'
4+
}
5+
}

0 commit comments

Comments
 (0)