Skip to content

Commit 5aafbe9

Browse files
authored
Merge pull request #57 from bcbio/eberdan-patch-3
GSVA for multiple contrasts
2 parents 61c60ca + 13a4ba0 commit 5aafbe9

1 file changed

Lines changed: 146 additions & 38 deletions

File tree

03_functional/GSVA.qmd

Lines changed: 146 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,13 @@ params:
2222
contrasts:
2323
value:
2424
- ["sample_type", "tumor", "normal"]
25+
- ["sample_type", "normal", "tumor"]
2526
project_file: ../information.R
2627
params_file: ../00_params/params-example.R # example data
2728
functions_file: ../00_libs
2829
# select from gene set repository at https://github.com/bcbio/resources/tree/main/gene_sets/gene_sets
2930
# choose geneset, click "Raw', and copy url to work with mouse data
30-
geneset_fn: https://raw.githubusercontent.com/bcbio/resources/main/gene_sets/gene_sets/20240904/human/h.all.v2024.1.Hs.entrez.gmt
31+
geneset_fn: https://raw.githubusercontent.com/bcbio/resources/main/gene_sets/gene_sets/20240904/human/c5.go.bp.v2024.1.Hs.entrez.gmt
3132
---
3233

3334
```{r}
@@ -91,7 +92,7 @@ set.seed(1234567890L)
9192
#| warning: FALSE
9293
source(params$project_file)
9394
source(params$params_file)
94-
purr::map(list.files(params$functions_file, pattern = "*.R$", full.names = T), source) %>% invisible()
95+
map(list.files(params$functions_file, pattern = "*.R$", full.names = T), source) %>% invisible()
9596
column <- params$column
9697
contrasts <- params$contrasts
9798
subset_column <- params$subset_column
@@ -114,13 +115,17 @@ sanitize_datatable <- function(df, ...) {
114115
- PI: `r PI`
115116
- Analyst: `r analyst`
116117
- Experiment: `r experiment`
117-
- Aim: `r aim`
118+
118119

119120
```{r load_data}
120121
coldata <- load_coldata(
121-
coldata_fn, column,
122+
coldata_fn,
122123
subset_column, subset_value
123124
)
125+
coldata$Treatment <- gsub("IL15_PBL","IL15", coldata$Treatment)
126+
127+
coldata$group <- paste0(coldata$Treatment,"_",coldata$RNAseq.type)
128+
124129
coldata[[contrasts[[1]][1]]] <- relevel(as.factor(coldata[[contrasts[[1]][1]]]), contrasts[[1]][3])
125130
coldata$sample <- row.names(coldata)
126131
@@ -188,58 +193,161 @@ gsvaPar <- GSVA::gsvaParam(counts_entrez, genes_by_pathway, kcdf = "Poisson")
188193
gsva.es <- gsva(gsvaPar, verbose = F)
189194
```
190195

191-
## Test for Significance
192196

193-
```{r limma}
194-
mod <- model.matrix(~ factor(coldata[[column]]))
195-
fit <- lmFit(gsva.es, mod)
196-
fit <- eBayes(fit)
197-
res <- topTable(fit, coef = paste0("factor(coldata[[column]])", contrasts[[1]][2]), number = Inf, sort.by = "P")
197+
```{r, message=F, echo=F, warning=F}
198+
199+
200+
# <column_of_comparison>_<treatment_name>_vs_<control_name>
201+
names_to_use <- lapply(contrasts, function(contrast) {
202+
coef <- paste0(contrast[1], "_", contrast[2], "_vs_", contrast[3])
203+
})
204+
# Currently the contrasts list object doesn't have names for the items in the list
205+
# Assign the names from names_to_use to the names in the contrasts list object
206+
names(contrasts) <- names_to_use
207+
208+
209+
# Perform differential expression analysis for each contrast
210+
de_list <- lapply(contrasts, function(contrast) {
211+
# Correctly assign the group_column, value1, and value2 using [[ ]]
212+
group_column <- contrast[[1]]
213+
value1 <- contrast[[2]]
214+
value2 <- contrast[[3]]
215+
216+
# Subset the coldata data frame based on the group column and values
217+
subset_coldata <- coldata[coldata[[group_column]] %in% c(value1, value2), ]
218+
subset_coldata$group <- relevel(subset_coldata$group, ref = value2)
219+
220+
# Create a GSVA subset
221+
gsva.sub <- gsva.es[, subset_coldata$sample]
222+
223+
# Create a design matrix for the model
224+
mod <- model.matrix(~ factor(subset_coldata[[group_column]])) # Using the correct group_column
225+
226+
# Fit the linear model
227+
fit <- lmFit(gsva.sub, mod)
228+
fit <- eBayes(fit)
229+
230+
# Check the column names of the coefficients in the fitted model
231+
print("Coefficients in fit:")
232+
print(colnames(fit$coefficients)) # Print column names of the coefficients in the fitted model
233+
234+
# Construct the correct coefficient name based on `value1`
235+
coef_name <- paste0("factor(subset_coldata[[group_column]])", value1)
236+
237+
238+
# Extract the results from the top table using the correct coefficient name
239+
res <- topTable(fit, coef = coef_name, number = Inf, sort.by = "P")
240+
241+
# Subset the results for significantly differentially expressed genes
242+
res_sig <- subset(res, res$adj.P.Val < 0.1)
243+
244+
# Store the results in a list
245+
results <- list(
246+
all = res,
247+
sig = res_sig,
248+
data = gsva.sub
249+
)
250+
251+
# Return the results
252+
return(results)
253+
})
254+
198255
199-
res %>% sanitize_datatable()
200256
```
201257

202-
## Graph top 5 pathways
203258

204-
```{r graph_pathways}
205-
#| results: 'asis'
259+
## Test for Significance
260+
261+
::: {.panel-tabset}
206262

207-
scores <- t(gsva.es)
263+
```{r}
264+
#| results: 'asis'
208265
209-
sig <- subset(res, res$adj.P.Val < 0.1)
266+
# Create the tabs dynamically based on the contrasts
267+
for (contrast in names(de_list)) {
268+
res_sig <- de_list[[contrast]][["sig"]]
269+
270+
# Skip if no significant results are found
271+
if (nrow(res_sig) == 0) {
272+
next
273+
}
210274
211-
if (nrow(sig) >= 5) {
212-
pathways <- rownames(sig)[1:5]
213-
} else if (nrow(sig) == 0) {
214-
pathways <- c()
215-
} else {
216-
pathways <- rownames(sig)
275+
# Create a tab header for each contrast
276+
cat("### ", contrast, "\n\n")
277+
278+
# Use htmltools to properly render the datatable
279+
dt <- DT::datatable(res_sig,
280+
options = list(
281+
scrollX = TRUE,
282+
autoWidth = TRUE
283+
),
284+
class = "stripe hover"
285+
)
286+
287+
print(htmltools::tagList(dt))
288+
289+
cat("\n\n")
217290
}
291+
```
218292

219-
if (length(pathways) > 0) {
220-
to_graph <- data.frame(scores[, pathways]) %>%
221-
rownames_to_column("sample") %>%
222-
pivot_longer(!sample, names_to = "pathway", values_to = "enrichment_score")
223-
to_graph <- left_join(to_graph, coldata)
224293

225-
for (single_pathway in pathways) {
226-
cat("### ", single_pathway, "\n")
227294

228-
to_graph_single_pathway <- to_graph %>% filter(pathway == single_pathway)
229-
p <- ggplot(to_graph_single_pathway, aes(x = .data[[column]], y = enrichment_score)) +
230-
geom_boxplot() +
231-
geom_point(alpha = 0.5) +
232-
ggtitle(single_pathway)
233-
print(p)
295+
:::
234296

235-
cat("\n\n")
297+
298+
## Graph top 5 pathways for each contrast
299+
300+
::: {.panel-tabset}
301+
302+
```{r graph_pathways}
303+
#| results: 'asis'
304+
#| fig-height: 6
305+
#| fig-width: 8
306+
307+
# Loop over contrasts to generate graphs
308+
for (contrast in names(de_list)) {
309+
# Create a tab for each contrast
310+
cat("### ", contrast, "\n\n")
311+
312+
sig <- de_list[[contrast]][["sig"]]
313+
scores <- t(de_list[[contrast]][["data"]])
314+
315+
# Ensure there are pathways to graph
316+
if (nrow(sig) >= 5) {
317+
pathways <- rownames(sig)[1:5]
318+
} else if (nrow(sig) == 0) {
319+
pathways <- c()
320+
} else {
321+
pathways <- rownames(sig)
236322
}
237-
} else {
238-
cat("No pathways were detected as significantly enriched")
323+
324+
if (length(pathways) > 0) {
325+
to_graph <- data.frame(scores[, pathways]) %>%
326+
rownames_to_column("sample") %>%
327+
pivot_longer(!sample, names_to = "pathway", values_to = "enrichment_score")
328+
to_graph <- left_join(to_graph, coldata)
329+
330+
for (single_pathway in pathways) {
331+
to_graph_single_pathway <- to_graph %>% filter(pathway == single_pathway)
332+
p <- ggplot(to_graph_single_pathway, aes(x = .data[[column]], y = enrichment_score)) +
333+
geom_boxplot() +
334+
geom_point(alpha = 0.5) +
335+
ggtitle(single_pathway)
336+
print(p)
337+
cat("\n\n")
338+
}
339+
} else {
340+
cat("No pathways were detected as significantly enriched for this contrast.\n\n")
341+
}
342+
343+
cat("\n\n")
239344
}
240345
```
241346

242347

348+
:::
349+
350+
243351

244352
# R session
245353

0 commit comments

Comments
 (0)