Skip to content

Commit

Permalink
Merge pull request #35 from yerkes-gencore/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
derrik-gratz authored Jun 28, 2024
2 parents af7f541 + 6854d6a commit 903d6a7
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 119 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: gencoreBulk
Title: NPRC Genomics Core Bulk RNA Analysis Utilities
Version: 0.2.3
Date: 2024-03-22
Version: 0.2.4
Date: 2024-06-27
Author: person("Derrik", "Gratz", email = "[email protected]", role =
c("aut", "cre"), comment = c(ORCID = "0009-0002-5934-576X"))
Maintainer: Derrik Gratz <[email protected]>
Expand Down Expand Up @@ -32,6 +32,7 @@ Imports:
plyr,
readr,
rlang,
scales,
stringr,
SummarizedExperiment,
tibble,
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(circlize,colorRamp2)
importFrom(edgeR,voomLmFit)
importFrom(fgsea,fgseaSimple)
importFrom(fgsea,fgsea)
importFrom(forcats,fct_inorder)
importFrom(graphics,legend)
importFrom(graphics,lines)
Expand All @@ -72,6 +72,8 @@ importFrom(readr,write_csv)
importFrom(readr,write_lines)
importFrom(readr,write_tsv)
importFrom(rlang,.data)
importFrom(scales,log_breaks)
importFrom(scales,trans_new)
importFrom(stats,model.matrix)
importFrom(stats,na.omit)
importFrom(stats,prcomp)
Expand Down
114 changes: 76 additions & 38 deletions R/gseaDotplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,28 @@
#'
#' @rdname gseaDotplot_joint
#'
#' @param gsea_results A table with multiple GSEA results from `fgsea::fgseaSimple()`
#' @param result A table with multiple GSEA results from `fgsea::fgseaSimple()`
#' combined by `combine_GSEA_results()`
#' @param pathway_order Order of pathways to plot
#' @param x_order Order of comparisons on X axis
#' @param significance A vector of values to indicate significance with asterisks.
#' Each subsequent value will add an extra asterisk. E.g. `c(0.05, 0.01)` will
#' give one asteriks to values below 0.05 and two asterisks to values below 0.01.
#' If this is null, no asterisks will be plotted.
#' @param cap_pvalues Bool; cap small pvalues to keep the range of dots smaller
#' @param p_val_col Column to use for significance values. Default 'pval'.
#' @inheritParams ggplot2::scale_radius
#' @param use_shortened_pathway_names Pull names from column 'pathway_short'
#' rather than pathway (if `runfgsea()` call had `breakdown_pathway_names` set
#' to `TRUE`)
#' @param breaks Values to mark for scaling radius
#' result,
#'
#' @return A ggplot object
#' @export
#'
#' @importFrom rlang .data
#' @importFrom scales trans_new
#' @importFrom scales log_breaks
#' @import ggplot2
#' @import dplyr
#'
Expand All @@ -35,61 +42,72 @@
#' joint_GSEA_results <- combine_GSEA_results(gsea_results, pathways)
#' gseaDotplot_joint(joint_GSEA_results)
#' }
gseaDotplot_joint <- function(gsea_results,
gseaDotplot_joint <- function(result,
pathway_order = NULL,
x_order = NULL,
significance = c(0.05, 0.01, 0.001),
range = c(1,8),
breaks = -log10(c(0.1,0.01,0.001,0.0001)),
labels = c(0.1,0.01,0.001,0.0001),
p_val_col = 'pval'){
breaks = c(0.1,0.01,0.001,0.0001),
cap_pvalues = TRUE,
p_val_col = 'pval',
use_shortened_pathway_names = FALSE
){
if (use_shortened_pathway_names){
result$pathway <- result$pathway_short
}
if (!is.null(pathway_order)) {
if (all(pathway_order %in% unique(gsea_results$pathway))){
pathway_order <- order(factor(gsea_results$pathway, levels = pathway_order))
gsea_results$pathway <- .wrap_underscore_strings_balance(gsea_results$pathway,36)
if (all(pathway_order %in% unique(result$pathway))){
pathway_order <- order(factor(result$pathway, levels = pathway_order))
result$pathway <- .wrap_underscore_strings_balance(result$pathway,36)
## reordering
gsea_results <- gsea_results[pathway_order,]
gsea_results$pathway <- factor(gsea_results$pathway, levels = unique(gsea_results$pathway))
result <- result[pathway_order,]
result$pathway <- factor(result$pathway, levels = unique(result$pathway))
} else {
warning('pathways specified in pathway_order not found, defaulting to arbitrary order')
}
} else {
gsea_results$pathway <- .wrap_underscore_strings_balance(gsea_results$pathway,36)
result$pathway <- .wrap_underscore_strings_balance(result$pathway,36)
}

if (!is.null(x_order)) {
if (all(x_order %in% unique(gsea_results$ID))){
gsea_results$ID <- factor(gsea_results$ID, levels = x_order)
if (all(x_order %in% unique(result$ID))){
result$ID <- factor(result$ID, levels = x_order)
} else {
warning('Specified x_order not all found in data, defaulting to arbitrary order')
}
}

gsea_results$label <- NA
if (cap_pvalues) {
## Set the minimum value to be 1/10th of the smallest label
cap_max = tail(breaks, 1)/10
result[[p_val_col]] <- pmax(cap_max, result[[p_val_col]])
}
range <- c(ceiling(-log(max(result[[p_val_col]]))),
floor(-log(min(result[[p_val_col]]))))+1
result$label <- NA
caption <- ''
if (!is.null(significance)) {
if (is.numeric(significance)) {
label <- '*'
for (cutoff in significance) {
gsea_results$label <- ifelse(gsea_results[[p_val_col]] < cutoff, label, gsea_results$label)
result$label <- ifelse(result[[p_val_col]] < cutoff, label, result$label)
caption <- paste(caption, label, '<', cutoff, ';', sep = ' ')
label <- paste0(label, '*')
}
# gsea_results$label[is.numeric(gsea_results$label)] <- NA
# result$label[is.numeric(result$label)] <- NA
} else {
stop('Significance argument should be a numeric vector')
}
}
ggplot(gsea_results, aes(x=.data$ID, y=.data$pathway, size=-log(.data[[p_val_col]]),
ggplot(result, aes(x=.data$ID, y=.data$pathway, size=.data[[p_val_col]],
color=.data$NES, label = .data$label)) +
geom_point() +
scale_color_gradient2(low="blue",
mid="white",
high="red",
midpoint=0,
breaks=c(-2,-1,0,1,2),
limits=c(min(gsea_results$NES,-1),
max(gsea_results$NES,1))) +
limits=c(min(result$NES,-1),
max(result$NES,1))) +
theme_classic(11) +
theme(panel.grid.major = element_line(colour = "grey92"),
panel.grid.minor = element_line(colour = "grey92"),
Expand All @@ -101,13 +119,13 @@ gseaDotplot_joint <- function(gsea_results,
labs(x="Comparison",
y="Gene set",
color = "Normalized\nenrichment\nscore",
size="Nom p-val",
size="p-value",
title="GSEA pathway enrichments",
caption = caption) +
scale_radius(name="NOM p-val",
range=range,
breaks=breaks,
labels=labels) +
scale_radius(range=range,
trans=reverselog_trans(),
breaks=breaks
) +
geom_text(na.rm = TRUE, color = 'white', size = 3)
}

Expand All @@ -122,21 +140,24 @@ gseaDotplot_joint <- function(gsea_results,
#' @param signif_only If TRUE, only plot results with p value < `sig_cutoff`
#' @param top_n Show the top N results sorted by pval
#' @param sig_cutoff Threshold for significance, default to 0.05
#' @param p_val_col Column to use for significance values. Default 'pval'.
#' @param significance A vector of values to indicate significance with asterisks.
#' Each subsequent value will add an extra asterisk. E.g. `c(0.05, 0.01)` will
#' give one asteriks to values below 0.05 and two asterisks to values below 0.01.
#' If this is null, no asterisks will be plotted.
#' @param use_shortened_pathway_names Pull names from column 'pathway_short'
#' rather than pathway (if `runfgsea()` call had `breakdown_pathway_names` set
#' to `TRUE`)
#' @param p_val_col Column to use for significance values. Default 'pval'.
#' @inheritParams ggplot2::scale_radius
#' @param breaks Values to mark for scaling radius
#' @param cap_pvalues Bool; cap small pvalues to keep the range of dots smaller
#'
#' @return A ggplot object
#' @export
#'
#' @importFrom rlang .data
#' @importFrom utils head
#' @importFrom scales trans_new
#' @importFrom scales log_breaks
#' @import ggplot2
#' @import dplyr
#'
Expand All @@ -153,16 +174,17 @@ gseaDotplot_single <- function(result,
sig_cutoff = 0.05,
use_shortened_pathway_names = FALSE,
significance = c(0.05, 0.01, 0.001),
range = c(1,8),
breaks = -log10(c(0.1,0.01,0.001,0.0001)),
labels = c(0.1,0.01,0.001,0.0001),
# range = c(ceiling(-log(max(result[[p_val_col]]))),
# floor(-log(min(result[[p_val_col]])))),
breaks = c(0.1,0.01,0.001,0.0001),
cap_pvalues= TRUE,
p_val_col = 'pval') {
if (use_shortened_pathway_names){
result$pathway <- result$pathway_short
}
result <- result %>%
arrange(.data[[p_val_col]], desc(.data$NES)) %>%
mutate(perc = 100 * lengths(.data$leadingEdge) / .data$size) %>%
mutate(perc = 100 * sapply(str_split(leadingEdge, ', '), length) / .data$size) %>%
mutate(name = paste0(.wrap_underscore_strings_balance(.data$pathway, 36), "\nn=", .data$size)) %>%
filter(.data$size >= min_size)
if (!is.null(filter_source)) {
Expand All @@ -180,24 +202,32 @@ gseaDotplot_single <- function(result,
if (is.numeric(significance)) {
label <- '*'
for (cutoff in significance) {
result$label <- ifelse(result$pval < cutoff, label, result$label)
result$label <- ifelse(result[[p_val_col]] < cutoff, label, result$label)
caption <- paste(caption, label, '<', cutoff, ';', sep = ' ')
label <- paste0(label, '*')
}
# gsea_results$label[is.numeric(gsea_results$label)] <- NA
# result$label[is.numeric(result$label)] <- NA
} else {
stop('Significance argument should be a numeric vector')
}
}

if (cap_pvalues) {
## Set the minimum value to be 1/10th of the smallest label
cap_max = tail(breaks, 1)/10

result[[p_val_col]] <- pmax(cap_max, result[[p_val_col]])
}
range <- c(ceiling(-log(max(result[[p_val_col]]))),
floor(-log(min(result[[p_val_col]]))))+1

toppaths <- rbind(utils::head(result, n = top_n))
toppaths$name <- factor(toppaths$name)
dotplot <- ggplot(toppaths,
aes(
x = .data[['perc']],
y = .data[['name']],
size = -log10(.data[['pval']]),
size = .data[[p_val_col]],
color = .data[['NES']],
label = .data[['label']])) +
geom_point() +
Expand All @@ -223,16 +253,24 @@ gseaDotplot_single <- function(result,
labs(
x = "% of genes in leading edge", y = "Gene set",
color = "Normalized\nenrichment\nscore",
size = "Nom p-val", title = "Top enriched pathways",
size = "p-value", title = "Top enriched pathways",
caption = paste0("n = number of genes in pathway\n", caption)) +
scale_radius(
name = "NOM p-val",
range = range,
breaks = breaks,
trans=reverselog_trans()
# limits = c(0, 3),
labels = labels
# labels = labels
) +
scale_y_discrete(limits = toppaths$name) +
geom_text(na.rm = TRUE, color = 'white', size = 3)
return(dotplot)
}

reverselog_trans <- function(base = 10) {
trans <- function(x) -log(x, base)
inv <- function(x) base^(-x)
scales::trans_new(paste0("reverselog-", format(base)), trans, inv,
scales::log_breaks(base = base),
domain = c(1e-100, Inf))
}
10 changes: 4 additions & 6 deletions R/runfgsea.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
#' pathway for easier reading
#' @param na_omit Bool, exclude genes with missing data in results? This checks
#' all values for NAs so be mindful of modified `result` objects
#' @inheritParams fgsea::fgseaSimple
#' @inheritParams fgsea::fgsea
#'
#' @inherit fgsea::fgseaSimple return
#' @inherit fgsea::fgsea return
#' @export
#'
#' @importFrom fgsea fgseaSimple
#' @importFrom fgsea fgsea
#' @importFrom stringr str_split
#' @importFrom stats na.omit
#' @examples
Expand All @@ -25,7 +25,6 @@
#'
runfgsea <- function(result,
pathways,
nperm = 1000,
minSize = 10,
maxSize = 500,
breakdown_pathway_names = FALSE,
Expand All @@ -37,10 +36,9 @@ runfgsea <- function(result,
names(fgsea_data) <- rownames(result)
fgsea_data <- fgsea_data[!is.na(fgsea_data)]
fgsea_data <- sort(fgsea_data, decreasing = TRUE)
res <- fgsea::fgseaSimple(
res <- fgsea::fgsea(
pathways = pathways,
stats = fgsea_data,
nperm = nperm,
minSize = minSize,
maxSize = maxSize
)
Expand Down
43 changes: 13 additions & 30 deletions man/gseaDotplot_joint.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 903d6a7

Please sign in to comment.