Skip to content

Commit e09609d

Browse files
committed
Add nf-core salmon module with index and quant processes, environment configurations, and workflow integration
1 parent 9d14fd8 commit e09609d

File tree

24 files changed

+1099
-108
lines changed

24 files changed

+1099
-108
lines changed

.nf-core.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
repository_type: pipeline

00_illumina.nf

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
process generateSalmonIndex {
2+
3+
label "short_slurm_job"
4+
5+
input:
6+
path final_sample_fasta
7+
8+
output:
9+
path "salmon_index"
10+
script:
11+
"""
12+
salmon index -p $task.cpus -t $final_sample_fasta -i salmon_index
13+
"""
14+
}
15+
16+
process generateDecoyTranscriptome {
17+
18+
label "short_slurm_job"
19+
20+
input:
21+
path genome_fasta
22+
path final_sample_gtf
23+
path final_sample_fasta
24+
25+
output:
26+
path "decoy_transcriptome/"
27+
28+
script:
29+
"""
30+
generateDecoyTranscriptome.sh \\
31+
-j $task.cpus \\
32+
-g $genome_fasta \\
33+
-t $final_sample_fasta \\
34+
-a $final_sample_gtf \\
35+
-o decoy_transcriptome
36+
"""
37+
}
38+
process runSalmon {
39+
input:
40+
path salmon_index
41+
path fastq_gz
42+
43+
output:
44+
path "salmon_quants/${fastq_gz.baseName}_quant"
45+
46+
script:
47+
"""
48+
salmon quant -i proc/decoy_transcriptome -l A -1 data/illumina/SFARI_data/${1}_R1_001.fastq.gz -2 data/illumina/SFARI_data/${1}_R2_001.fastq.gz -p 30 --validateMappings -o proc/salmon_quants/${1}_quant
49+
"""
50+
}
51+
52+
workflow salmon {
53+
take:
54+
55+
main:
56+
generateSalmonIndex(params.transcriptome_fasta)
57+
}

05_proteomic.nf

Lines changed: 0 additions & 72 deletions
This file was deleted.

main.nf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
include {merge_and_collapse} from 'subworkflows/local/merge_and_collapse'
2+
include {classify_and_count} from 'subworkflows/local/classify_and_count'
3+
include {ORFanage} from 'subworkflows/local/ORFanage'
4+
include {proteoform_classification} from 'subworkflows/local/proteoform_classification'
5+
include {proteomic} from 'subworkflows/local/proteomic'
6+
include {peptide} from 'subworkflows/local/peptide'
7+
8+
workflow {
9+
merge_and_collapse(params.flnc_bam, params.mapped_bam)
10+
classify_and_count(merge_and_collapse.out.isoform_gff, merge_and_collapse.out.id_to_sample, merge_and_collapse.out.read_stat)
11+
ORFanage(classify_and_count.out.final_sample_gtf, classify_and_count.out.final_sample_classification, classify_and_count.out.final_sample_fasta)
12+
proteoform_classification(ORFanage.out.predicted_cds_gtf, ORFanage.out.peptide_fasta, ORFanage.out.best_orf)
13+
proteomic(proteoform_classification.out.protein_database, params.mzXMLfiles)
14+
peptide(classify_and_count.out.final_sample_classification, ORFanage.out.predicted_cds_gtf, proteoform_classification.out.protein_database, proteomic.out.peptides)
15+
}

modules.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"name": "",
3+
"homePage": "",
4+
"repos": {
5+
"https://github.com/nf-core/modules.git": {
6+
"modules": {
7+
"nf-core": {
8+
"salmon/index": {
9+
"branch": "master",
10+
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
11+
"installed_by": ["modules"]
12+
},
13+
"salmon/quant": {
14+
"branch": "master",
15+
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
16+
"installed_by": ["modules"]
17+
}
18+
}
19+
}
20+
}
21+
}
22+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- bioconda::salmon=1.10.3

modules/nf-core/salmon/index/main.nf

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
process SALMON_INDEX {
2+
tag "$transcript_fasta"
3+
label "process_medium"
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
7+
'https://depot.galaxyproject.org/singularity/salmon:1.10.3--h6dccd9a_2' :
8+
'biocontainers/salmon:1.10.3--h6dccd9a_2' }"
9+
10+
input:
11+
path genome_fasta
12+
path transcript_fasta
13+
14+
output:
15+
path "salmon" , emit: index
16+
path "versions.yml", emit: versions
17+
18+
when:
19+
task.ext.when == null || task.ext.when
20+
21+
script:
22+
def args = task.ext.args ?: ''
23+
def decoys = ''
24+
def fasta = transcript_fasta
25+
if (genome_fasta){
26+
if (genome_fasta.endsWith('.gz')) {
27+
genome_fasta = "<(gunzip -c $genome_fasta)"
28+
}
29+
decoys='-d decoys.txt'
30+
fasta='gentrome.fa'
31+
}
32+
if (transcript_fasta.endsWith('.gz')) {
33+
transcript_fasta = "<(gunzip -c $transcript_fasta)"
34+
}
35+
"""
36+
if [ -n '$genome_fasta' ]; then
37+
grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 | sed 's/>//g' > decoys.txt
38+
cat $transcript_fasta $genome_fasta > $fasta
39+
fi
40+
41+
salmon \\
42+
index \\
43+
--threads $task.cpus \\
44+
-t $fasta \\
45+
$decoys \\
46+
$args \\
47+
-i salmon
48+
49+
cat <<-END_VERSIONS > versions.yml
50+
"${task.process}":
51+
salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g")
52+
END_VERSIONS
53+
"""
54+
55+
stub:
56+
"""
57+
mkdir salmon
58+
touch salmon/complete_ref_lens.bin
59+
touch salmon/ctable.bin
60+
touch salmon/ctg_offsets.bin
61+
touch salmon/duplicate_clusters.tsv
62+
touch salmon/info.json
63+
touch salmon/mphf.bin
64+
touch salmon/pos.bin
65+
touch salmon/pre_indexing.log
66+
touch salmon/rank.bin
67+
touch salmon/refAccumLengths.bin
68+
touch salmon/ref_indexing.log
69+
touch salmon/reflengths.bin
70+
touch salmon/refseq.bin
71+
touch salmon/seq.bin
72+
touch salmon/versionInfo.json
73+
74+
cat <<-END_VERSIONS > versions.yml
75+
"${task.process}":
76+
salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g")
77+
END_VERSIONS
78+
"""
79+
}

modules/nf-core/salmon/index/meta.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: salmon_index
2+
description: Create index for salmon
3+
keywords:
4+
- index
5+
- fasta
6+
- genome
7+
- reference
8+
tools:
9+
- salmon:
10+
description: |
11+
Salmon is a tool for wicked-fast transcript quantification from RNA-seq data
12+
homepage: https://salmon.readthedocs.io/en/latest/salmon.html
13+
manual: https://salmon.readthedocs.io/en/latest/salmon.html
14+
doi: 10.1038/nmeth.4197
15+
licence: ["GPL-3.0-or-later"]
16+
identifier: biotools:salmon
17+
input:
18+
- - genome_fasta:
19+
type: file
20+
description: Fasta file of the reference genome
21+
- - transcript_fasta:
22+
type: file
23+
description: Fasta file of the reference transcriptome
24+
output:
25+
- index:
26+
- salmon:
27+
type: directory
28+
description: Folder containing the star index files
29+
pattern: "salmon"
30+
- versions:
31+
- versions.yml:
32+
type: file
33+
description: File containing software versions
34+
pattern: "versions.yml"
35+
authors:
36+
- "@kevinmenden"
37+
- "@drpatelh"
38+
maintainers:
39+
- "@kevinmenden"
40+
- "@drpatelh"
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
nextflow_process {
2+
3+
name "Test Process SALMON_INDEX"
4+
script "../main.nf"
5+
process "SALMON_INDEX"
6+
tag "modules"
7+
tag "modules_nfcore"
8+
tag "salmon"
9+
tag "salmon/index"
10+
11+
test("sarscov2") {
12+
13+
when {
14+
params {
15+
outdir = "$outputDir"
16+
}
17+
process {
18+
"""
19+
input[0] = Channel.of([file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)])
20+
input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)])
21+
"""
22+
}
23+
}
24+
25+
then {
26+
assertAll(
27+
{ assert process.success },
28+
{ assert path(process.out.index.get(0)).exists() },
29+
{ assert snapshot(
30+
file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(),
31+
process.out.versions
32+
).match()}
33+
)
34+
}
35+
36+
}
37+
38+
test("sarscov2 transcriptome only") {
39+
40+
when {
41+
params {
42+
outdir = "$outputDir"
43+
}
44+
process {
45+
"""
46+
input[0] = Channel.of([])
47+
input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)])
48+
"""
49+
}
50+
}
51+
52+
then {
53+
assertAll(
54+
{ assert process.success },
55+
{ assert path(process.out.index.get(0)).exists() },
56+
{ assert snapshot(
57+
file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(),
58+
process.out.versions
59+
).match()}
60+
)
61+
}
62+
63+
}
64+
65+
test("sarscov2 stub") {
66+
options "-stub"
67+
when {
68+
params {
69+
outdir = "$outputDir"
70+
}
71+
process {
72+
"""
73+
input[0] = Channel.of([file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true)])
74+
input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)])
75+
"""
76+
}
77+
}
78+
79+
then {
80+
assertAll(
81+
{ assert process.success },
82+
{ assert path(process.out.index.get(0)).exists() },
83+
{ assert snapshot(
84+
file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(),
85+
process.out.versions
86+
).match()}
87+
)
88+
}
89+
90+
}
91+
92+
}

0 commit comments

Comments
 (0)