Skip to content

Commit fafc527

Browse files
sahunoclaude
andcommitted
feat: add modkit/extract/calls module
Add new nf-core module wrapping `modkit extract calls`, which emits a per-read per-position table of base modification calls using the same pass/fail thresholding as `modkit pileup`. Complementary to `modkit/extract/full` (raw probabilities): this module emits the thresholded categorical decisions. Useful for per-read downstream analysis such as allele-specific methylation and methylation-aware phasing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 135d883 commit fafc527

6 files changed

Lines changed: 340 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- "bioconda::ont-modkit=0.6.1"
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
process MODKIT_EXTRACT_CALLS {
2+
tag "$meta.id"
3+
label 'process_high'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
7+
'https://depot.galaxyproject.org/singularity/ont-modkit:0.6.1--hcdda2d0_0':
8+
'quay.io/biocontainers/ont-modkit:0.6.1--hcdda2d0_0' }"
9+
10+
input:
11+
tuple val(meta), path(bam), path(bai)
12+
tuple val(meta2), path(fasta), path(fai)
13+
14+
output:
15+
tuple val(meta), path("*.tsv{,.gz}"), emit: tsv
16+
tuple val(meta), path("*.log") , emit: log, optional: true
17+
tuple val("${task.process}"), val('modkit'), eval("modkit --version | sed 's/modkit //'"), emit: versions_modkit, topic: versions
18+
19+
when:
20+
task.ext.when == null || task.ext.when
21+
22+
script:
23+
def args = task.ext.args ?: ''
24+
def prefix = task.ext.prefix ?: "${meta.id}"
25+
def reference = fasta ? "--reference ${fasta}" : ''
26+
def out_suffix = args.tokenize().contains('--bgzf') ? 'tsv.gz' : 'tsv'
27+
"""
28+
modkit \\
29+
extract \\
30+
calls \\
31+
$args \\
32+
--threads ${task.cpus} \\
33+
${reference} \\
34+
${bam} \\
35+
${prefix}.${out_suffix}
36+
"""
37+
38+
stub:
39+
def args = task.ext.args ?: ''
40+
def prefix = task.ext.prefix ?: "${meta.id}"
41+
def out_suffix = args.tokenize().contains('--bgzf') ? 'tsv.gz' : 'tsv'
42+
"""
43+
touch ${prefix}.${out_suffix}
44+
"""
45+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
name: modkit_extract_calls
2+
description: |
3+
Produce a per-read per-position table of base modification **calls**
4+
(pass/fail/filtered, with the called base) from a modBAM using the same
5+
thresholding algorithm as `modkit pileup`. Complementary to `modkit
6+
extract full`, which emits raw probabilities: `extract calls` emits the
7+
thresholded categorical decision per site per read.
8+
keywords:
9+
- modkit
10+
- methylation
11+
- extract
12+
- calls
13+
- read-level
14+
- modbam
15+
- nanopore
16+
- ont
17+
tools:
18+
- "modkit":
19+
description: A bioinformatics tool for working with modified bases in Oxford Nanopore
20+
sequencing data.
21+
homepage: https://github.com/nanoporetech/modkit
22+
documentation: https://nanoporetech.github.io/modkit/
23+
tool_dev_url: https://github.com/nanoporetech/modkit
24+
licence:
25+
- "Oxford Nanopore Technologies PLC. Public License Version 1.0"
26+
identifier: ""
27+
input:
28+
- - meta:
29+
type: map
30+
description: |
31+
Groovy Map containing sample information
32+
e.g. `[ id:'sample1' ]`. The output inherits this meta.
33+
- bam:
34+
type: file
35+
description: Input modBAM with MM/ML tags.
36+
pattern: "*.{bam,cram}"
37+
ontologies:
38+
- edam: http://edamontology.org/format_2572
39+
- bai:
40+
type: file
41+
description: BAM index (`.bai` or `.csi`).
42+
pattern: "*.{bai,csi}"
43+
ontologies: []
44+
- - meta2:
45+
type: map
46+
description: |
47+
Groovy Map containing reference information
48+
e.g. `[ id:'mm10' ]`. May be `[[], [], []]` to skip — reference
49+
context columns will then be "." in the output.
50+
- fasta:
51+
type: file
52+
description: Optional reference FASTA. Required only to populate reference-context
53+
columns in the output.
54+
pattern: "*.{fa,fasta,fna}"
55+
ontologies:
56+
- edam: http://edamontology.org/format_1929
57+
- fai:
58+
type: file
59+
description: Samtools FASTA index for `fasta`.
60+
pattern: "*.fai"
61+
ontologies:
62+
- edam: http://edamontology.org/format_3475
63+
output:
64+
tsv:
65+
- - meta:
66+
type: map
67+
description: |
68+
Groovy Map containing sample information
69+
e.g. `[ id:'sample1' ]`.
70+
- "*.tsv{,.gz}":
71+
type: file
72+
description: |
73+
Per-read per-position call table. BGZF-compressed when `--bgzf`
74+
is passed via `ext.args`.
75+
pattern: "*.{tsv,tsv.gz}"
76+
ontologies:
77+
- edam: http://edamontology.org/format_3475
78+
log:
79+
- - meta:
80+
type: map
81+
description: |
82+
Groovy Map containing sample information
83+
e.g. `[ id:'sample1' ]`.
84+
- "*.log":
85+
type: file
86+
description: |
87+
Optional modkit debug log (only emitted when `--log-filepath
88+
<name>.log` is passed via `ext.args`).
89+
pattern: "*.log"
90+
ontologies: []
91+
versions_modkit:
92+
- - ${task.process}:
93+
type: string
94+
description: The name of the process
95+
- modkit:
96+
type: string
97+
description: The name of the tool
98+
- modkit --version | sed 's/modkit //':
99+
type: eval
100+
description: The expression to obtain the version of the tool
101+
topics:
102+
versions:
103+
- - ${task.process}:
104+
type: string
105+
description: The name of the process
106+
- modkit:
107+
type: string
108+
description: The name of the tool
109+
- modkit --version | sed 's/modkit //':
110+
type: eval
111+
description: The expression to obtain the version of the tool
112+
authors:
113+
- "@sahuno"
114+
maintainers:
115+
- "@sahuno"
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
nextflow_process {
2+
3+
name "Test Process MODKIT_EXTRACT_CALLS"
4+
script "../main.nf"
5+
process "MODKIT_EXTRACT_CALLS"
6+
config "./nextflow.config"
7+
8+
tag "modules"
9+
tag "modules_nfcore"
10+
tag "modkit"
11+
tag "modkit/extract"
12+
tag "modkit/extract/calls"
13+
14+
test("homo sapiens - nanopore modbam - stub") {
15+
16+
options "-stub"
17+
18+
when {
19+
process {
20+
"""
21+
input[0] = [
22+
[ id: 'test' ],
23+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true),
24+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true)
25+
]
26+
input[1] = [
27+
[ id: 'genome' ],
28+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
29+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
30+
]
31+
"""
32+
}
33+
}
34+
35+
then {
36+
assertAll(
37+
{ assert process.success },
38+
{ assert snapshot(process.out).match() }
39+
)
40+
}
41+
}
42+
43+
test("homo sapiens - nanopore modbam") {
44+
45+
when {
46+
process {
47+
"""
48+
input[0] = [
49+
[ id: 'test' ],
50+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true),
51+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true)
52+
]
53+
input[1] = [
54+
[ id: 'genome' ],
55+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
56+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
57+
]
58+
"""
59+
}
60+
}
61+
62+
then {
63+
assertAll(
64+
{ assert process.success },
65+
{ assert process.out.tsv },
66+
{ assert path(process.out.tsv[0][1]).exists() },
67+
{ assert path(process.out.tsv[0][1]).size() > 0 },
68+
{ assert snapshot(process.out).match() }
69+
)
70+
}
71+
}
72+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
{
2+
"homo sapiens - nanopore modbam": {
3+
"content": [
4+
{
5+
"0": [
6+
[
7+
{
8+
"id": "test"
9+
},
10+
"test.tsv:md5,0ce2e5a6bf0889aaf8cbd682e2b17acb"
11+
]
12+
],
13+
"1": [
14+
15+
],
16+
"2": [
17+
[
18+
"MODKIT_EXTRACT_CALLS",
19+
"modkit",
20+
"0.6.1"
21+
]
22+
],
23+
"log": [
24+
25+
],
26+
"tsv": [
27+
[
28+
{
29+
"id": "test"
30+
},
31+
"test.tsv:md5,0ce2e5a6bf0889aaf8cbd682e2b17acb"
32+
]
33+
],
34+
"versions_modkit": [
35+
[
36+
"MODKIT_EXTRACT_CALLS",
37+
"modkit",
38+
"0.6.1"
39+
]
40+
]
41+
}
42+
],
43+
"timestamp": "2026-04-23T22:05:37.027585203",
44+
"meta": {
45+
"nf-test": "0.9.5",
46+
"nextflow": "25.04.6"
47+
}
48+
},
49+
"homo sapiens - nanopore modbam - stub": {
50+
"content": [
51+
{
52+
"0": [
53+
[
54+
{
55+
"id": "test"
56+
},
57+
"test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
58+
]
59+
],
60+
"1": [
61+
62+
],
63+
"2": [
64+
[
65+
"MODKIT_EXTRACT_CALLS",
66+
"modkit",
67+
"0.6.1"
68+
]
69+
],
70+
"log": [
71+
72+
],
73+
"tsv": [
74+
[
75+
{
76+
"id": "test"
77+
},
78+
"test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
79+
]
80+
],
81+
"versions_modkit": [
82+
[
83+
"MODKIT_EXTRACT_CALLS",
84+
"modkit",
85+
"0.6.1"
86+
]
87+
]
88+
}
89+
],
90+
"timestamp": "2026-04-23T22:05:22.616980777",
91+
"meta": {
92+
"nf-test": "0.9.5",
93+
"nextflow": "25.04.6"
94+
}
95+
}
96+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
process {
2+
withName: 'MODKIT_EXTRACT_CALLS' {
3+
ext.args = ''
4+
}
5+
}

0 commit comments

Comments
 (0)