Skip to content

Commit cc8a7f2

Browse files
sahunoclaude
andcommitted
feat: add modkit/entropy module
Add new nf-core module wrapping `modkit entropy`, which computes methylation entropy over genomic windows from one or more mod-BAMs. Supports an optional BED of regions for per-region descriptive statistics; emits a genome-wide BED otherwise. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 135d883 commit cc8a7f2

6 files changed

Lines changed: 437 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- "bioconda::ont-modkit=0.6.1"
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
process MODKIT_ENTROPY {
2+
tag "$meta.id"
3+
label 'process_high'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
7+
'https://depot.galaxyproject.org/singularity/ont-modkit:0.6.1--hcdda2d0_0':
8+
'biocontainers/ont-modkit:0.6.1--hcdda2d0_0' }"
9+
10+
input:
11+
// stageAs '?/*' prevents filename collisions when multiple BAMs from the
12+
// same sample (e.g. technical replicates) are passed to a single run.
13+
tuple val(meta), path(bams, stageAs: "in/?/*"), path(bais, stageAs: "in/?/*")
14+
tuple val(meta2), path(fasta), path(fai)
15+
tuple val(meta3), path(regions)
16+
17+
output:
18+
tuple val(meta), path("*.bed") , emit: bed , optional: true
19+
tuple val(meta), path("entropy_regions/*.bed") , emit: regions_bed , optional: true
20+
tuple val(meta), path("entropy_regions/*.bedgraph") , emit: bedgraph , optional: true
21+
tuple val(meta), path("entropy_regions/*.tsv") , emit: tsv , optional: true
22+
tuple val(meta), path("*.log") , emit: log , optional: true
23+
tuple val("${task.process}"), val('modkit'), eval("modkit --version | sed 's/modkit //'"), emit: versions_modkit, topic: versions
24+
25+
when:
26+
task.ext.when == null || task.ext.when
27+
28+
script:
29+
def args = task.ext.args ?: ''
30+
def prefix = task.ext.prefix ?: "${meta.id}"
31+
def bam_args = bams instanceof List ? bams.collect { "--in-bam ${it}" }.join(' ') : "--in-bam ${bams}"
32+
// modkit entropy's --out-bed expects a FILE without --regions, and a DIRECTORY with --regions
33+
def out_arg = regions ? "--regions ${regions} --out-bed entropy_regions --prefix ${prefix}" : "--out-bed ${prefix}.bed"
34+
def mkdir = regions ? "mkdir -p entropy_regions" : ""
35+
"""
36+
${mkdir}
37+
38+
modkit \\
39+
entropy \\
40+
$args \\
41+
--threads ${task.cpus} \\
42+
--ref ${fasta} \\
43+
${out_arg} \\
44+
${bam_args}
45+
"""
46+
47+
stub:
48+
def prefix = task.ext.prefix ?: "${meta.id}"
49+
"""
50+
touch ${prefix}.bed
51+
"""
52+
}
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
name: modkit_entropy
2+
description: |
3+
Calculate methylation entropy over genomic windows from one or more mod-BAMs.
4+
Entropy is a per-window measure of the diversity of methylation patterns
5+
across reads covering the window and is complementary to mean methylation.
6+
When `--regions` is supplied (via `ext.args` or the `regions` input), modkit
7+
writes per-region BED/bedgraph/tsv files into a directory; otherwise a single
8+
genome-wide BED file is produced.
9+
keywords:
10+
- modkit
11+
- methylation
12+
- entropy
13+
- nanopore
14+
- ont
15+
- modbam
16+
tools:
17+
- "modkit":
18+
description: A bioinformatics tool for working with modified bases in Oxford Nanopore
19+
sequencing data.
20+
homepage: https://github.com/nanoporetech/modkit
21+
documentation: https://nanoporetech.github.io/modkit/
22+
tool_dev_url: https://github.com/nanoporetech/modkit
23+
licence:
24+
- "Oxford Nanopore Technologies PLC. Public License Version 1.0"
25+
identifier: ""
26+
input:
27+
- - meta:
28+
type: map
29+
description: |
30+
Groovy Map containing sample information
31+
e.g. `[ id:'sample1' ]`. The output inherits this meta.
32+
- bams:
33+
type: list
34+
description: |
35+
One or more modBAM files to compute entropy from. Passing more than
36+
one BAM aggregates counts across them (multi-sample entropy).
37+
pattern: "*.{bam,cram}"
38+
ontologies:
39+
- edam: http://edamontology.org/format_2572
40+
- bais:
41+
type: list
42+
description: |
43+
BAM indices (`.bai` or `.csi`) matching each input BAM, one-to-one.
44+
pattern: "*.{bai,csi}"
45+
ontologies: []
46+
- - meta2:
47+
type: map
48+
description: |
49+
Groovy Map containing reference information
50+
e.g. `[ id:'mm10' ]`.
51+
- fasta:
52+
type: file
53+
description: Reference FASTA the BAM was aligned to.
54+
pattern: "*.{fa,fasta,fna}"
55+
ontologies:
56+
- edam: http://edamontology.org/format_1929
57+
- fai:
58+
type: file
59+
description: Samtools FASTA index for `fasta`.
60+
pattern: "*.fai"
61+
ontologies:
62+
- edam: http://edamontology.org/format_3475
63+
- - meta3:
64+
type: map
65+
description: |
66+
Groovy Map containing region information
67+
e.g. `[ id:'promoters' ]`. May be `[[], []]` to skip.
68+
- regions:
69+
type: file
70+
description: |
71+
Optional BED file of regions over which to compute per-region
72+
descriptive statistics. When provided, modkit writes per-region
73+
outputs (`.bed`, `.bedgraph`, `.tsv`) into a directory.
74+
pattern: "*.{bed,bed.gz}"
75+
ontologies:
76+
- edam: http://edamontology.org/format_3003
77+
output:
78+
bed:
79+
- - meta:
80+
type: map
81+
description: |
82+
Groovy Map containing sample information
83+
e.g. `[ id:'sample1' ]`.
84+
- "*.bed":
85+
type: file
86+
description: |
87+
Genome-wide entropy BED file, produced when `regions` is not
88+
provided. One row per window.
89+
pattern: "*.bed"
90+
ontologies:
91+
- edam: http://edamontology.org/format_3003
92+
regions_bed:
93+
- - meta:
94+
type: map
95+
description: |
96+
Groovy Map containing sample information
97+
e.g. `[ id:'sample1' ]`.
98+
- "entropy_regions/*.bed":
99+
type: file
100+
description: |
101+
Per-region entropy BED files, produced when `regions` is provided.
102+
pattern: "*.bed"
103+
ontologies:
104+
- edam: http://edamontology.org/format_3003
105+
bedgraph:
106+
- - meta:
107+
type: map
108+
description: |
109+
Groovy Map containing sample information
110+
e.g. `[ id:'sample1' ]`.
111+
- "entropy_regions/*.bedgraph":
112+
type: file
113+
description: |
114+
Per-region entropy bedgraph, produced when `regions` is provided.
115+
pattern: "*.bedgraph"
116+
ontologies:
117+
- edam: http://edamontology.org/format_3583
118+
tsv:
119+
- - meta:
120+
type: map
121+
description: |
122+
Groovy Map containing sample information
123+
e.g. `[ id:'sample1' ]`.
124+
- "entropy_regions/*.tsv":
125+
type: file
126+
description: |
127+
Per-region descriptive statistics TSV, produced when `regions`
128+
is provided.
129+
pattern: "*.tsv"
130+
ontologies:
131+
- edam: http://edamontology.org/format_3475
132+
log:
133+
- - meta:
134+
type: map
135+
description: |
136+
Groovy Map containing sample information
137+
e.g. `[ id:'sample1' ]`.
138+
- "*.log":
139+
type: file
140+
description: |
141+
Optional modkit debug log (only emitted when `--log-filepath
142+
<name>.log` is passed via `ext.args`).
143+
pattern: "*.log"
144+
ontologies: []
145+
versions_modkit:
146+
- - ${task.process}:
147+
type: string
148+
description: The name of the process
149+
- modkit:
150+
type: string
151+
description: The name of the tool
152+
- modkit --version | sed 's/modkit //':
153+
type: eval
154+
description: The expression to obtain the version of the tool
155+
topics:
156+
versions:
157+
- - ${task.process}:
158+
type: string
159+
description: The name of the process
160+
- modkit:
161+
type: string
162+
description: The name of the tool
163+
- modkit --version | sed 's/modkit //':
164+
type: eval
165+
description: The expression to obtain the version of the tool
166+
authors:
167+
- "@sahuno"
168+
maintainers:
169+
- "@sahuno"
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
nextflow_process {
2+
3+
name "Test Process MODKIT_ENTROPY"
4+
script "../main.nf"
5+
process "MODKIT_ENTROPY"
6+
config "./nextflow.config"
7+
8+
tag "modules"
9+
tag "modules_nfcore"
10+
tag "modkit"
11+
tag "modkit/entropy"
12+
13+
test("homo sapiens - nanopore modbam - cpg - stub") {
14+
15+
options "-stub"
16+
17+
when {
18+
process {
19+
"""
20+
input[0] = [
21+
[ id: 'test' ],
22+
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true) ],
23+
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) ]
24+
]
25+
input[1] = [
26+
[ id: 'genome' ],
27+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
28+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
29+
]
30+
input[2] = [[],[]]
31+
"""
32+
}
33+
}
34+
35+
then {
36+
assertAll(
37+
{ assert process.success },
38+
{ assert snapshot(process.out).match() }
39+
)
40+
}
41+
}
42+
43+
test("homo sapiens - nanopore modbam - cpg") {
44+
45+
when {
46+
process {
47+
"""
48+
input[0] = [
49+
[ id: 'test' ],
50+
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true) ],
51+
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) ]
52+
]
53+
input[1] = [
54+
[ id: 'genome' ],
55+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
56+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
57+
]
58+
input[2] = [[],[]]
59+
"""
60+
}
61+
}
62+
63+
then {
64+
assertAll(
65+
{ assert process.success },
66+
{ assert process.out.bed },
67+
{ assert path(process.out.bed[0][1]).exists() },
68+
{ assert snapshot(process.out).match() }
69+
)
70+
}
71+
}
72+
}

0 commit comments

Comments
 (0)