Skip to content

Commit d4c43b3

Browse files
emmcauleySPPearce
andauthored
feat: samclip module (#8999)
* feat: samclip module * fix: add nodefaults channel * fix: reviewer comments -> add samtools view + fixmate, emit bam, use Seqera container, update tests * fix: reviewer comments -> add samtools command args, cleanup, remove decompressed reference if made * fix: more reviewer comments (topics channel + emit one channel either BAM/CRAM) * Apply suggestions from code review Co-authored-by: Simon Pearce <[email protected]> * fix: remove versions * tests: fix snapshot (versions) --------- Co-authored-by: Simon Pearce <[email protected]>
1 parent a46512f commit d4c43b3

File tree

5 files changed

+372
-0
lines changed

5 files changed

+372
-0
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
- nodefaults
7+
dependencies:
8+
- bioconda::samclip=0.4.0
9+
- bioconda::samtools=1.22.1

modules/nf-core/samclip/main.nf

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
process SAMCLIP {
2+
tag "${meta.id}"
3+
label 'process_low'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
7+
? 'oras://community.wave.seqera.io/library/samclip_samtools:7af2916e4ae6f461'
8+
: 'community.wave.seqera.io/library/samclip_samtools:00cc7aefd75be672'}"
9+
10+
input:
11+
tuple val(meta), path(bam)
12+
tuple val(meta2), path(reference), path(reference_index)
13+
14+
output:
15+
tuple val(meta), path("*.{bam,cram}"), emit: reads
16+
tuple val("${task.process}"), val('samclip'), eval("samclip --version | sed 's/^.*samclip //g'"), topic: versions, emit: versions_samclip
17+
tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools
18+
19+
when:
20+
task.ext.when == null || task.ext.when
21+
22+
script:
23+
def args = task.ext.args ?: '' // samclip args
24+
def args2 = task.ext.args2 ?: '' // samtools sort (first, name sort) args
25+
def args3 = task.ext.args3 ?: '' // samtools fixmate args
26+
def args4 = task.ext.args4 ?: '' // samtools sort (second, coordinate sort) args
27+
def prefix = task.ext.prefix ?: "${meta.id}_samclip"
28+
def extension = args4.contains("--output-fmt cram") ? "cram" :
29+
args4.contains("-O cram") ? "cram" :
30+
args4.contains("-O CRAM") ? "cram" :
31+
"bam"
32+
def reference_arg = extension == "cram" ? "--reference ${reference}" : ""
33+
def is_compressed = reference.getName().endsWith(".gz")
34+
def ref_filename = reference.getName().replaceAll(/\.gz$/, "")
35+
36+
if ("${bam}" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
37+
"""
38+
# decompress reference if gzipped
39+
if [ "${is_compressed}" = "true" ]; then
40+
gzip -c -d ${reference} > ${ref_filename}
41+
fi
42+
43+
samtools view -h --output-fmt sam ${bam} | \\
44+
samclip ${args} --ref ${ref_filename} | \\
45+
samtools sort -n -O SAM ${args2} | \\
46+
samtools fixmate -m ${args3} - - | \\
47+
samtools sort ${args4} ${reference_arg} -O ${extension.toUpperCase()} -o ${prefix}.${extension}
48+
49+
# clean up decompressed reference
50+
if [ "${is_compressed}" = "true" ]; then
51+
rm -f ${ref_filename}
52+
fi
53+
54+
"""
55+
56+
stub:
57+
def args4 = task.ext.args4 ?: ''
58+
def prefix = task.ext.prefix ?: "${meta.id}_samclip"
59+
def extension = args4.contains("--output-fmt cram") ? "cram" :
60+
args4.contains("-O cram") ? "cram" :
61+
args4.contains("-O CRAM") ? "cram" :
62+
"bam"
63+
64+
if ("${bam}" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
65+
"""
66+
touch ${prefix}.${extension}
67+
68+
"""
69+
}

modules/nf-core/samclip/meta.yml

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
2+
name: samclip
3+
description: Filters SAM/BAM/CRAM files for soft and hard clipped alignments
4+
keywords:
5+
- soft-clipped reads
6+
- hard-clipped reads
7+
- clipping
8+
- genomics
9+
- sam
10+
- bam
11+
- cram
12+
tools:
13+
- samclip:
14+
description: Filters SAM file for soft and hard clipped alignments
15+
homepage: https://github.com/tseemann/samclip
16+
documentation: https://github.com/tseemann/samclip
17+
tool_dev_url: https://github.com/tseemann/samclip
18+
doi: "no DOI available"
19+
licence: ["GPL v3"]
20+
identifier: biotools:samclip
21+
22+
input:
23+
- - meta:
24+
type: map
25+
description: |
26+
Groovy Map containing sample information
27+
e.g. `[ id:'sample1' ]`
28+
- bam:
29+
type: file
30+
description: BAM file
31+
pattern: "*.bam"
32+
ontologies:
33+
- edam: http://edamontology.org/format_2572 # BAM
34+
- - meta2:
35+
type: map
36+
description: |
37+
Groovy Map containing fasta reference information
38+
e.g. `[ id:'ref' ]`
39+
- reference:
40+
type: file
41+
description: |
42+
reference FASTA file
43+
pattern: "*.{fasta,fa,fasta.gz,fa.gz}"
44+
ontologies:
45+
- edam: http://edamontology.org/format_1929 # FASTA
46+
- reference_index:
47+
type: file
48+
description: |
49+
reference FASTA file index
50+
pattern: "*.fai"
51+
ontologies: []
52+
output:
53+
reads:
54+
- - meta:
55+
type: map
56+
description: Groovy Map containing sample information. e.g. `[ id:'sample1']`
57+
- "*.{bam,cram}":
58+
type: file
59+
description: Filtered BAM/CRAM file
60+
pattern: "*.{bam,cram}"
61+
ontologies:
62+
- edam: http://edamontology.org/format_2572 # BAM
63+
- edam: http://edamontology.org/format_3462 # CRAM
64+
versions_samclip:
65+
- - ${task.process}:
66+
type: string
67+
description: The process the versions were collected from
68+
- samclip:
69+
type: string
70+
description: The tool name
71+
- "samclip --version | sed 's/^.*samclip //g'":
72+
type: string
73+
description: The command used to generate the version of the tool
74+
versions_samtools:
75+
- - ${task.process}:
76+
type: string
77+
description: The process the versions were collected from
78+
- samtools:
79+
type: string
80+
description: The tool name
81+
- "samtools version | sed '1!d;s/.* //'":
82+
type: string
83+
description: The command used to generate the version of the tool
84+
topics:
85+
versions:
86+
- - ${task.process}:
87+
type: string
88+
description: The process the versions were collected from
89+
- samclip:
90+
type: string
91+
description: The tool name
92+
- "samclip --version | sed 's/^.*samclip //g'":
93+
type: string
94+
description: The command used to generate the version of the tool
95+
- - ${task.process}:
96+
type: string
97+
description: The process the versions were collected from
98+
- samtools:
99+
type: string
100+
description: The tool name
101+
- "samtools version | sed '1!d;s/.* //'":
102+
type: string
103+
description: The command used to generate the version of the tool
104+
authors:
105+
- "@emmcauley"
106+
maintainers:
107+
- "@emmcauley"
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
nextflow_process {
2+
3+
name "Test Process SAMCLIP"
4+
script "../main.nf"
5+
process "SAMCLIP"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "samclip"
10+
11+
test("test-data - NA12878.chr22.bam") {
12+
13+
when {
14+
process {
15+
"""
16+
input[0] = [
17+
[ id:'NA12878_chr22' ],
18+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam', checkIfExists: true)
19+
]
20+
21+
input[1] = [
22+
[ id:'chr22_ref' ],
23+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
24+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
25+
]
26+
"""
27+
}
28+
}
29+
30+
then {
31+
assertAll(
32+
{ assert process.success },
33+
{ assert snapshot(process.out).match() }
34+
)
35+
}
36+
37+
}
38+
39+
test("test-data - NA12878.chr22.bam - stub") {
40+
41+
options "-stub"
42+
43+
when {
44+
process {
45+
"""
46+
input[0] = [
47+
[ id:'NA12878_chr22' ],
48+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr22.bam', checkIfExists: true)
49+
]
50+
51+
input[1] = [
52+
[ id:'chr22_ref' ],
53+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true),
54+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
55+
]
56+
"""
57+
}
58+
}
59+
60+
then {
61+
assertAll(
62+
{ assert process.success },
63+
{ assert snapshot(
64+
process.out,
65+
process.out.versions.collect{ path(it).yaml }
66+
).match() }
67+
)
68+
}
69+
70+
}
71+
72+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
{
2+
"test-data - NA12878.chr22.bam": {
3+
"content": [
4+
{
5+
"0": [
6+
[
7+
{
8+
"id": "NA12878_chr22"
9+
},
10+
"NA12878_chr22_samclip.bam:md5,3dba3ce13764dfeb17a9771197dc2654"
11+
]
12+
],
13+
"1": [
14+
[
15+
"SAMCLIP",
16+
"samclip",
17+
"0.4.0"
18+
]
19+
],
20+
"2": [
21+
[
22+
"SAMCLIP",
23+
"samtools",
24+
"1.22.1"
25+
]
26+
],
27+
"reads": [
28+
[
29+
{
30+
"id": "NA12878_chr22"
31+
},
32+
"NA12878_chr22_samclip.bam:md5,3dba3ce13764dfeb17a9771197dc2654"
33+
]
34+
],
35+
"versions_samclip": [
36+
[
37+
"SAMCLIP",
38+
"samclip",
39+
"0.4.0"
40+
]
41+
],
42+
"versions_samtools": [
43+
[
44+
"SAMCLIP",
45+
"samtools",
46+
"1.22.1"
47+
]
48+
]
49+
}
50+
],
51+
"meta": {
52+
"nf-test": "0.9.2",
53+
"nextflow": "25.04.6"
54+
},
55+
"timestamp": "2026-01-09T17:21:42.620917"
56+
},
57+
"test-data - NA12878.chr22.bam - stub": {
58+
"content": [
59+
{
60+
"0": [
61+
[
62+
{
63+
"id": "NA12878_chr22"
64+
},
65+
"NA12878_chr22_samclip.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
66+
]
67+
],
68+
"1": [
69+
[
70+
"SAMCLIP",
71+
"samclip",
72+
"0.4.0"
73+
]
74+
],
75+
"2": [
76+
[
77+
"SAMCLIP",
78+
"samtools",
79+
"1.22.1"
80+
]
81+
],
82+
"reads": [
83+
[
84+
{
85+
"id": "NA12878_chr22"
86+
},
87+
"NA12878_chr22_samclip.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
88+
]
89+
],
90+
"versions_samclip": [
91+
[
92+
"SAMCLIP",
93+
"samclip",
94+
"0.4.0"
95+
]
96+
],
97+
"versions_samtools": [
98+
[
99+
"SAMCLIP",
100+
"samtools",
101+
"1.22.1"
102+
]
103+
]
104+
},
105+
[
106+
107+
]
108+
],
109+
"meta": {
110+
"nf-test": "0.9.2",
111+
"nextflow": "25.04.6"
112+
},
113+
"timestamp": "2026-01-09T17:22:01.60664"
114+
}
115+
}

0 commit comments

Comments
 (0)