Skip to content

Commit 8667d59

Browse files
Add RTNI_TNI module (#5320)
* Initial work towards a module for RTN TNI Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Make it a simple PoC Tests are passing. From now on, I can make it more complex and closer to what I want this module to do in the end. * Add two more steps in the module * Prettify file * Use test data, add remaining steps/collect output in TNI Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Fix DOI in meta.yml Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Some ongoing changes Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Rename rtn_tni.R to rtn_tni.r * Fix snapshot mismatch issue Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Add --n_permutations + tests Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Get rid of the TODOs Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Use different dataset (real gene ids) Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Add support to --tfs + tests Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Fix indentation for ECLint check Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> * Remove quotes from environment dependencies for conda Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io> --------- Signed-off-by: Marcel Ribeiro-Dantas <mribeirodantas@seqera.io>
1 parent 0e04b94 commit 8667d59

10 files changed

Lines changed: 440 additions & 0 deletions

File tree

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
name: "rtn_tni"
4+
channels:
5+
- conda-forge
6+
- bioconda
7+
- defaults
8+
dependencies:
9+
- bioconda::bioconductor-rtn=2.26.0

modules/nf-core/rtn/tni/main.nf

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
process RTN_TNI {
2+
debug true
3+
tag "{$expression_matrix.name}"
4+
label 'process_medium'
5+
6+
conda "${moduleDir}/environment.yml"
7+
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
8+
'https://depot.galaxyproject.org/singularity/bioconductor-rtn:2.26.0--r43hdfd78af_0':
9+
'biocontainers/bioconductor-rtn:2.26.0--r43hdfd78af_0' }"
10+
11+
input:
12+
tuple val(meta), path(expression_matrix)
13+
14+
output:
15+
tuple val(meta), path("tni.rds") , emit: tni
16+
tuple val(meta), path("tni_permutated.rds") , emit: tni_perm
17+
tuple val(meta), path("tni_bootstrapped.rds") , emit: tni_bootstrap
18+
tuple val(meta), path("tni_filtered.rds") , emit: tni_filtered
19+
path "versions.yml" , emit: versions
20+
21+
when:
22+
task.ext.when == null || task.ext.when
23+
24+
script:
25+
def args = task.ext.args ?: ''
26+
27+
template 'rtn_tni.r'
28+
29+
stub:
30+
def args = task.ext.args ?: ''
31+
32+
"""
33+
touch tni.rds
34+
touch tni_permutated.rds
35+
touch tni_bootstrapped.rds
36+
touch tni_filtered.rds
37+
38+
cat <<-END_VERSIONS > versions.yml
39+
"${task.process}":
40+
bioconductor-rtn: \$(Rscript -e "suppressWarnings(library(RTN)); cat(as.character(packageVersion('RTN')))")
41+
END_VERSIONS
42+
"""
43+
}

modules/nf-core/rtn/tni/meta.yml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
3+
name: "rtn_tni"
4+
description: Uses the RTN R package for transcriptional regulatory network inference (TNI).
5+
keywords:
6+
- regulatory network
7+
- transcriptomics
8+
- transcription factors
9+
tools:
10+
- "rtn":
11+
description: "RTN: Reconstruction of Transcriptional regulatory Networks and analysis of regulons"
12+
homepage: "https://www.bioconductor.org/packages/release/bioc/html/RTN.html"
13+
documentation: "https://www.bioconductor.org/packages/release/bioc/vignettes/RTN/inst/doc/RTN.html"
14+
tool_dev_url: "https://www.bioconductor.org/packages/release/bioc/html/RTN.html"
15+
doi: "10.1038/ncomms3464"
16+
licence: ["Artistic-2.0"]
17+
18+
input:
19+
- expression_matrix:
20+
type: file
21+
description: expression matrix in TSV format
22+
pattern: "*.tsv"
23+
24+
output:
25+
- tni:
26+
type: file
27+
description: RDS R Object with the TNI object
28+
pattern: "tni.rds"
29+
- tni_perm:
30+
type: file
31+
description: RDS R Object with the TNI object after permutation
32+
pattern: "tni_permutated.rds"
33+
- tni_bootstrap:
34+
type: file
35+
description: RDS R Object with the TNI object after permutation and bootstrap
36+
pattern: "tni_bootstrapped.rds"
37+
- tni_filtered:
38+
type: file
39+
description: RDS R Object with the TNI object after permutation, bootstrap and filtering
40+
pattern: "tni_filtered.rds"
41+
- versions:
42+
type: file
43+
description: File containing software versions
44+
pattern: "versions.yml"
45+
46+
authors:
47+
- "@mribeirodantas"
48+
maintainers:
49+
- "@mribeirodantas"
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#!/usr/bin/env Rscript
2+
3+
# Ported to nf-core/modules with template by Jonathan Manning
4+
5+
#' Parse out options from a string without recourse to optparse
6+
#'
7+
#' @param x Long-form argument list like --opt1 val1 --opt2 val2
8+
#'
9+
#' @return named list of options and values similar to optparse
10+
11+
parse_args <- function(x){
12+
args_list <- unlist(strsplit(x, ' ?--')[[1]])[-1]
13+
args_vals <- lapply(args_list, function(x) scan(text=x, what='character', quiet = TRUE))
14+
15+
# Ensure the option vectors are length 2 (key/ value) to catch empty ones
16+
args_vals <- lapply(args_vals, function(z){ length(z) <- 2; z})
17+
18+
parsed_args <- structure(lapply(args_vals, function(x) x[2]), names = lapply(args_vals, function(x) x[1]))
19+
parsed_args[! is.na(parsed_args)]
20+
}
21+
22+
# Load
23+
library("RTN")
24+
library("snow")
25+
26+
################################################
27+
################################################
28+
## Pull in module inputs ##
29+
################################################
30+
################################################
31+
32+
input_expr_matrix <- '$expression_matrix'
33+
output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix')
34+
threads <- $task.cpus
35+
args_opt <- parse_args('$task.ext.args')
36+
37+
n_perm <- ifelse('n_permutations' %in% names(args_opt), strtoi(args_opt[['n_permutations']]), 10)
38+
39+
# Debug messages (stdout)
40+
sink(stdout(), type = "message") # sink messages to stdout
41+
message("Expression matrix file : ", input_expr_matrix)
42+
message("Nb permutations : ", n_perm)
43+
message("Nb threads : ", threads)
44+
message("Output basename : ", output_prefix)
45+
if ('tfs' %in% names(args_opt)) {
46+
message("TFs : ", args_opt[['tfs']])
47+
tfs <- strsplit(args_opt[['tfs']], ',')
48+
} else {
49+
# Load data
50+
data(tfsData)
51+
tfs <- tfsData\$Lambert2018\$SYMBOL
52+
}
53+
sink(NULL, type="message") # close the sink
54+
55+
# Preprocess
56+
# Input 1: 'expData', a named gene expression matrix (genes on rows, samples on cols);
57+
# Input 2: 'regulatoryElements', a vector listing genes regarded as TFs
58+
# Input 3: 'rowAnnotation', an optional data frame with gene annotation
59+
# Input 4: 'colAnnotation', an optional data frame with sample annotation
60+
61+
exp_data <- read.csv(input_expr_matrix, sep='\t')
62+
rownames(exp_data) <- exp_data[,1]
63+
rowAnnotation <- exp_data[,1:2]
64+
colnames(rowAnnotation) <- c('PROBEID', 'SYMBOL')
65+
rowAnnotation\$SYMBOL <- toupper(rowAnnotation\$SYMBOL)
66+
exp_data[,1:2] <- NULL
67+
68+
# Regulatory Transcriptional Network Inference
69+
tfs <- c('ENSG00000125798', 'ENSG00000125816')
70+
rtni <- tni.constructor(expData = as.matrix(exp_data),
71+
regulatoryElements = tfs,
72+
rowAnnotation = rowAnnotation)
73+
74+
options(cluster=snow::makeCluster(spec=threads, "SOCK"))
75+
76+
# Please set nPermutations >= 1000
77+
rtni_permutation <- tni.permutation(rtni, nPermutations = n_perm, pValueCutoff = 1e-4)
78+
79+
# Unstable interactions are subsequently removed by bootstrap analysis using the
80+
# tni.bootstrap() function, which creates a consensus bootstrap network, referred
81+
# here as refnet (reference network).
82+
rtni_bootstrapped <- tni.bootstrap(rtni_permutation)
83+
84+
stopCluster(getOption("cluster"))
85+
86+
# remove the weakest interaction in any triplet formed by two TFs and a common
87+
# target gene, preserving the dominant TF-target pair (ARACNe)
88+
rtni_filtered <- tni.dpi.filter(rtni_bootstrapped)
89+
90+
saveRDS(rtni, file = "tni.rds")
91+
saveRDS(rtni_permutation, file = "tni_permutated.rds")
92+
saveRDS(rtni_bootstrapped, file = "tni_bootstrapped.rds")
93+
saveRDS(rtni_filtered, file = "tni_filtered.rds")
94+
95+
# Plot
96+
#pdf(paste0(output_prefix, "_RTN.pdf"))
97+
#tni.graph(rtni_filtered, regulatoryElements = c("FOXM1", "E2F2"))
98+
#title("Regulatory Transcriptional Network")
99+
#mtext(output_prefix, side=3)
100+
#dev.off()
101+
#cat(
102+
# paste("- Threads::", threads),
103+
# fill=TRUE, labels=output_prefix,
104+
# file=paste0(output_prefix, "_intercept_slope.txt"), append=FALSE
105+
#)
106+
107+
################################################
108+
################################################
109+
## R SESSION INFO ##
110+
################################################
111+
################################################
112+
113+
sink(paste(output_prefix, "R_sessionInfo.log", sep = '.'))
114+
print(sessionInfo())
115+
sink()
116+
117+
################################################
118+
################################################
119+
## VERSIONS FILE ##
120+
################################################
121+
################################################
122+
123+
r.version <- strsplit(version[['version.string']], ' ')[[1]][3]
124+
rtn.version <- as.character(packageVersion('RTN'))
125+
126+
writeLines(
127+
c(
128+
'"${task.process}":',
129+
paste(' bioconductor-rtn:', rtn.version)
130+
),
131+
'versions.yml')
132+
133+
################################################
134+
################################################
135+
################################################
136+
################################################
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
nextflow_process {
2+
3+
name "Test Process RTN_TNI"
4+
script "../main.nf"
5+
process "RTN_TNI"
6+
7+
tag "modules"
8+
tag "modules_nfcore"
9+
tag "rtn"
10+
tag "rtn/tni"
11+
12+
test("musmusculus tni expression matrix") {
13+
when {
14+
process {
15+
"""
16+
input[0] = [
17+
[id:'test'],
18+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/riboseq_expression/salmon.merged.gene_counts_length_scaled.tsv', checkIfExists: true)
19+
]
20+
"""
21+
}
22+
}
23+
24+
then {
25+
assertAll(
26+
{ assert process.success },
27+
{ assert file(process.out.tni[0][1]).exists() },
28+
{ assert file(process.out.tni_perm[0][1]).exists() },
29+
{ assert file(process.out.tni_bootstrap[0][1]).exists() },
30+
{ assert file(process.out.tni_filtered[0][1]).exists() },
31+
{ assert file(process.out.versions[0]).exists() },
32+
{ assert process.stdout.contains('Nb permutations : 10') } // the default value is 10
33+
)
34+
}
35+
36+
}
37+
38+
test("musmusculus tni expression matrix with --n_permutations 5") {
39+
when {
40+
config "./nextflow.config"
41+
process {
42+
"""
43+
input[0] = [
44+
[id:'test'],
45+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/riboseq_expression/salmon.merged.gene_counts_length_scaled.tsv', checkIfExists: true)
46+
]
47+
"""
48+
}
49+
}
50+
51+
then {
52+
assertAll(
53+
{ assert process.success },
54+
{ assert file(process.out.tni[0][1]).exists() },
55+
{ assert file(process.out.tni_perm[0][1]).exists() },
56+
{ assert file(process.out.tni_bootstrap[0][1]).exists() },
57+
{ assert file(process.out.tni_filtered[0][1]).exists() },
58+
{ assert file(process.out.versions[0]).exists() },
59+
{ assert process.stdout.contains('Nb permutations : 5') }
60+
)
61+
}
62+
63+
}
64+
65+
test("musmusculus tni expression matrix with --tfs ENSG00000125798,ENSG00000125816") {
66+
when {
67+
config "./nextflow.config2"
68+
process {
69+
"""
70+
input[0] = [
71+
[id:'test'],
72+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/riboseq_expression/salmon.merged.gene_counts_length_scaled.tsv', checkIfExists: true)
73+
]
74+
"""
75+
}
76+
}
77+
78+
then {
79+
assertAll(
80+
{ assert process.success },
81+
{ assert file(process.out.tni[0][1]).exists() },
82+
{ assert file(process.out.tni_perm[0][1]).exists() },
83+
{ assert file(process.out.tni_bootstrap[0][1]).exists() },
84+
{ assert file(process.out.tni_filtered[0][1]).exists() },
85+
{ assert file(process.out.versions[0]).exists() },
86+
{ assert process.stdout.contains('TFs : ENSG00000125798,ENSG00000125816') }
87+
)
88+
}
89+
90+
}
91+
92+
test("musmusculus tni expression matrix with --n_permutations 12 and --tfs ENSG00000125798,ENSG00000125816") {
93+
when {
94+
config "./nextflow.config3"
95+
process {
96+
"""
97+
input[0] = [
98+
[id:'test'],
99+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/riboseq_expression/salmon.merged.gene_counts_length_scaled.tsv', checkIfExists: true)
100+
]
101+
"""
102+
}
103+
}
104+
105+
then {
106+
assertAll(
107+
{ assert process.success },
108+
{ assert file(process.out.tni[0][1]).exists() },
109+
{ assert file(process.out.tni_perm[0][1]).exists() },
110+
{ assert file(process.out.tni_bootstrap[0][1]).exists() },
111+
{ assert file(process.out.tni_filtered[0][1]).exists() },
112+
{ assert file(process.out.versions[0]).exists() },
113+
{ assert process.stdout.contains('Nb permutations : 12') },
114+
{ assert process.stdout.contains('TFs : ENSG00000125798,ENSG00000125816') }
115+
)
116+
}
117+
118+
}
119+
120+
test("musmusculus tni expression matrix - stub") {
121+
122+
options "-stub"
123+
124+
when {
125+
process {
126+
"""
127+
input[0] = [
128+
[id:'test'],
129+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/riboseq_expression/salmon.merged.gene_counts_length_scaled.tsv', checkIfExists: true)
130+
]
131+
"""
132+
}
133+
}
134+
135+
then {
136+
assertAll(
137+
{ assert process.success },
138+
{ assert file(process.out.tni[0][1]).exists() },
139+
{ assert file(process.out.tni_perm[0][1]).exists() },
140+
{ assert file(process.out.tni_bootstrap[0][1]).exists() },
141+
{ assert file(process.out.tni_filtered[0][1]).exists() },
142+
{ assert file(process.out.versions[0]).exists() }
143+
)
144+
}
145+
146+
}
147+
148+
}

0 commit comments

Comments
 (0)