Skip to content

Commit

Permalink
Merge pull request #28 from yerkes-gencore/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
derrik-gratz authored Apr 15, 2024
2 parents 5f9fa98 + f3b6146 commit 5c6cb49
Show file tree
Hide file tree
Showing 18 changed files with 524 additions and 138 deletions.
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: gencoreBulk
Title: NPRC Genomics Core Bulk RNA Analysis Utilities
Version: 0.2.1
Version: 0.2.2
Date: 2024-03-22
Author: person("Derrik", "Gratz", email = "[email protected]", role =
c("aut", "cre"), comment = c(ORCID = "0009-0002-5934-576X"))
Expand All @@ -9,6 +9,7 @@ Description: A shared codebase for custom R functions we find useful for
bulk RNA analysis.
License: MIT + file LICENSE
URL: https://github.com/yerkes-gencore/gencoreBulk
biocViews:
Imports:
Biobase,
circlize,
Expand All @@ -35,7 +36,9 @@ Imports:
SummarizedExperiment,
tibble,
tidyr
biocViews: ComplexHeatmap, EnhancedVolcano, DESeq2, SummarizedExperiment, fgsea
Encoding: UTF-8
Roxygen: list(markdown=TRUE)
RoxygenNote: 7.2.3
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ export(gseaDotplot_joint)
export(gseaDotplot_single)
export(heatmapFromGenelist)
export(mappingBinsPlot)
export(normalizeCountsForHeatmap)
export(normalizeCountsForHeatmapByIndividual)
export(parseReadPerGeneFiles)
export(pcaPlotSimple)
export(plotFilterByExpr)
export(plotGeneExpression)
export(plotPCAFromConfig)
Expand Down Expand Up @@ -69,6 +72,7 @@ importFrom(readr,write_lines)
importFrom(readr,write_tsv)
importFrom(rlang,.data)
importFrom(stats,model.matrix)
importFrom(stats,na.omit)
importFrom(stats,prcomp)
importFrom(stringr,str_count)
importFrom(stringr,str_remove)
Expand Down
81 changes: 48 additions & 33 deletions R/PCA.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
#'
#' @import ggplot2
#' @import ggrepel
#' @importFrom stats prcomp
#' @importFrom matrixStats rowVars
#' @importFrom SummarizedExperiment assays assay
#' @importFrom rlang .data
#'
Expand All @@ -32,9 +30,8 @@ plotPCAFromConfig <- function(analysis,
size = 5,
pc.1 = 1,
pc.2 = 2) {
pca_data <-
.pcaPlotGKT(assays(analysis$dds)$vst,
intgroup = names(colData(assays(analysis$dds)$vst)),
pcaPlotSimple(counts = assay(assays(analysis$dds)$vst),
metadata = colData(analysis$dds),
xpc = pc.1, ypc = pc.2
) +
geom_point(
Expand Down Expand Up @@ -85,36 +82,54 @@ plotPCAFromConfig <- function(analysis,
theme(text = element_text(size = 10)) # , arrow=arrow(ends="last", type="closed", length=unit(0.1, "inches")))
}

.pcaDataGKT <- function(object, intgroup = "condition", ntop = 500) {
rv <- matrixStats::rowVars(assay(object))

#' Simple base for PCA plot
#'
#' Calculates PCA on counts data. Returns a ggplot object with the PCA data
#' and associated metadata. No additional geoms are included, this is meant
#' to be a blank template. You do need to provide metadata here so it can be
#' accessed with geoms you add to this output.
#'
#' @param counts Expression data matrix. Colnames of counts should match rownames
#' of `metadata`
#' @param metadata Dataframe of metadata to associate with counts data. Rownames
#' of metadata should match colnames of `counts`.
#' @param xpc Numeric, which PC to plot on the x axis. Default 1
#' @param ypc Numeric, which PC to plot on the y axis. Default 2
#' @param ntop Number of genes to include in PCA. Default 500
#'
#' @returns A ggplot object
#' @export
#'
#' @import ggplot2
#' @importFrom stats prcomp
#' @importFrom matrixStats rowVars
#' @importFrom rlang .data
#'
#'
#' @examples
#' \dontrun{
#' pcaPlot <- pcaPlotSimple(assay(assays(pbmc_subset)$vst), metadata = colData(pbmc_subset))
#'
#' ## Manually adding geoms
#' pcaPlot + geom_point(aes(color = Individual, shape=Timepoint))
#'
#' }
#'
pcaPlotSimple <- function(counts, metadata, xpc = 1, ypc = 2, ntop = 500) {
if (colnames(counts) != rownames(metadata)) {
stop('Colnames of counts does not match rownames of metadata')
}
rv <- matrixStats::rowVars(counts)
select <- order(rv, decreasing = TRUE)[seq_len(min(
ntop,
length(rv)
))]
pca <- stats::prcomp(t(assay(object)[select, ]))
pca <- stats::prcomp(t(counts[select,]))
d <- merge(pca$x, metadata, by.x = 'row.names', by.y = 'row.names')
percentVar <- pca$sdev^2 / sum(pca$sdev^2)
if (!all(intgroup %in% names(colData(object)))) {
stop("the argument 'intgroup' should specify columns of colData(dds)")
}
intgroup.df <- as.data.frame(colData(object)[, intgroup,
drop = FALSE
])
group <- if (length(intgroup) > 1) {
factor(apply(intgroup.df, 1, paste, collapse = " : "))
} else {
colData(object)[[intgroup]]
}
d <- data.frame(pca$x,
group = group,
intgroup.df, name = colnames(object)
)
attr(d, "percentVar") <- percentVar
return(d)
}

.pcaPlotGKT <- function(object, intgroup = "condition", xpc = 1, ypc = 2, ntop = 500) {
d <- .pcaDataGKT(object, intgroup, ntop)
percentVar <- round(100 * attr(d, "percentVar"))
ggplot(d, aes_string(x = names(d)[xpc], y = names(d)[ypc])) +
labs(x = paste0(names(d)[xpc], ": ", percentVar[xpc], "% variance"), y = paste0(names(d)[ypc], ": ", percentVar[ypc], "% variance"))
}
percentVar <- round(100 * percentVar)
ggplot(d, aes(x = .data[[paste0('PC',xpc)]], y = .data[[paste0('PC',ypc)]])) +
labs(x = paste0('PC',xpc, ": ", percentVar[xpc], "% variance"),
y = paste0('PC',ypc, ": ", percentVar[ypc], "% variance"))
}
13 changes: 0 additions & 13 deletions R/RLE.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,3 @@ checkRLE <- function(dds) {
ggtitle(title) +
aes(forcats::fct_inorder(.data$Sample)) #+ scale_y_continuous(limits = c(-9,3.25), expand = c(0,0))
}

.plotRLE <- function(data, title) {
tibble::as_tibble(data, rownames = NA) %>%
tidyr::pivot_longer(everything(), names_to = "Sample", values_to = "RLE") %>%
ggplot(aes(x = .data$Sample, y = .data$RLE)) +
geom_hline(yintercept = 0, color = "red") +
geom_violin(draw_quantiles = c(0.25, 0.75), trim = TRUE, color = "lightgreen", alpha = 0.1) +
geom_boxplot(alpha = 0) +
theme_bw() +
theme(axis.text.x = element_text(vjust = 0.5, angle = 90), axis.title.x = element_blank(), aspect.ratio = 0.55) +
ggtitle(title) +
aes(forcats::fct_inorder(.data$Sample)) #+ scale_y_continuous(limits = c(-9,3.25), expand = c(0,0))
}
2 changes: 1 addition & 1 deletion R/gseaDotplot_joint.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,5 @@ gseaDotplot_joint <- function(gsea_results,
range=c(1,8),
breaks=-log10(c(0.1,0.01,0.001,0.0001)),
labels=c(0.1,0.01,0.001,0.0001)) +
geom_text(na.rm = TRUE, color = 'black', size = 3)
geom_text(na.rm = TRUE, color = 'white', size = 3)
}
137 changes: 68 additions & 69 deletions R/heatmapFromGenelist.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
#' Makes a heatmap of the given list of genes, separating samples slightly by
#' group variable. The heatmap will render samples in the order of colnames
#' in the provided data, and will be split by the metadata column specified.
#' Expression values will be normalized to the median of a specified group.
#' Normalization by groups or samples is no longer contained in this function.
#' See `gencoreBulk::normalizeCounts` for help on normalizing data for heatmaps.
#'
#' If you want to reorder columns plotted, arrange the data passed into `data`.
#' See the examples
#'
#' @param geneList Character vector of genes to plot
#' @param baseline_grouping Character, level of `colnames(colData(data))`
#' @param baseline level of `baseline_grouping` in `colData(data)` used to calculate
#' median, or leave blank to use all samples to generate median
#' @param data Object of class DESeqTransform
#' @param data Matrix of processed counts
#' @param slice_labels Optional labels for sliced columns
#' @param colors vector of color values passed to colorRamp2, default is c("blue", "white", "red")
#' @param slice_labels_rot Rotation angle of `slice_labels`
Expand All @@ -35,16 +34,26 @@
#'
#' @examples
#' \dontrun{
#'
#' ## Normalize data for plotting
#' data_to_plot <- normalizeCountsForHeatmapByIndividual(assay(assays(obj.pbmc)$rld),
#' obj.pbmc@colData,
#' group_var = 'Timepoint', baseline = 'pre',
#' individual_var = 'Individual',
#' remove_baseline = FALSE)
#'
#' ## Simple call
#' heatmapFromGenelist(
#' geneList = c("Ccl2", "Cxcl1", "Cxcl2", "Postn", "Fn1", "Thbs1"),
#' data_to_plot,
#' baseline_grouping = "Group",
#' baseline = "Cont"
#' )
#'
#' ## run with custom reordering of data and labeled slices
#' heatmapFromGenelist(
#' geneList = c("Ccl2", "Cxcl1", "Cxcl2", "Postn", "Fn1", "Thbs1"),
#' data_to_plot,
#' baseline_grouping = "Group",
#' baseline = "Cont",
#' column_split = c(rep(1, 3), rep(2, 3), rep(3, 3), rep(4, 3)),
Expand All @@ -55,30 +64,32 @@
#'
#' @export
heatmapFromGenelist <- function(geneList,
baseline_grouping = NULL,
baseline = NULL,
column_split = NULL,
slice_labels = NULL,
colors = c("blue", "white", "red"),
data = SummarizedExperiment::assays(analysis$dds)$rld,
column_labels = colnames(data),
slice_labels_rot = 90,
box_width = unit(3.5, "mm"),
box_height = unit(3.5, "mm"),
width_buffer = unit(5, "mm"),
height_buffer = unit(10, "mm"),
column_title = " ",
cluster_rows = FALSE,
cluster_columns = FALSE,
column_gap = unit(2, "mm"),
scale_min = -2,
scale_max = 2,
heatmap_legend_param = list(
at = c(scale_min, 0, scale_max),
labels = c(scale_min,0,scale_max),
title = 'log2 fold\ndifference\nfrom\nmedian\nexpression'
),
...) {
data,
column_split = NULL,
slice_labels = NULL,
colors = c("blue", "white", "red"),
column_labels = colnames(data),
slice_labels_rot = 90,
box_width = unit(3.5, "mm"),
box_height = unit(3.5, "mm"),
width_buffer = unit(5, "mm"),
height_buffer = unit(10, "mm"),
column_title = " ",
cluster_rows = FALSE,
cluster_columns = FALSE,
column_gap = unit(2, "mm"),
scale_min = -2,
scale_max = 2,
heatmap_legend_param = list(
at = c(scale_min, 0, scale_max),
labels = c(scale_min,0,scale_max),
title = 'log2 fold\nchange'
),
...) {
if (inherits(data, 'DESeqTransform')) {
message('Converting DESeqTransform data into matrix')
data <- SummarizedExperiment::assay(data)
}
duds <- geneList[!geneList %in% rownames(data)]
if (length(duds) > 0){
geneList <- geneList[geneList %in% rownames(data)]
Expand All @@ -88,45 +99,33 @@ heatmapFromGenelist <- function(geneList,
message(paste0('Genes ', paste0(duds, collapse = ', '), ' not found in data'))
}
}
hmap <- data[geneList, ]
if (is.null(baseline_grouping) | is.null(baseline)) {
message('Basline grouping or baseline level not specified, using all samples
to generate median expression per gene')
baseline <- matrixStats::rowMedians(SummarizedExperiment::assay(hmap))
} else if (!baseline_grouping %in% colnames(colData(data))) {
stop("Argument 'baseline_grouping' should be in colData(data)")
} else if (!baseline %in% unique(colData(data)[[baseline_grouping]])) {
stop("Argument 'baseline' should be a level of 'baseline_grouping' in colData(data)")
} else{
baseline <- matrixStats::rowMedians(SummarizedExperiment::assay(hmap[, as.character(hmap@colData[[baseline_grouping]]) %in% baseline]))
}
hmap <- SummarizedExperiment::assay(hmap) - baseline
ComplexHeatmap::Heatmap(hmap,
heatmap_legend_param = heatmap_legend_param,
#border = "black",
width = ncol(hmap) * box_width + width_buffer,
height = nrow(hmap) * box_height + height_buffer,
#rect_gp = grid::gpar(color = "black"),
column_title = column_title,
cluster_rows = cluster_rows,
cluster_columns = cluster_columns,
column_split = column_split,
top_annotation = (if (!is.null(slice_labels)) {
if (is.null(column_split)) {
warning("Setting labels requires slices to also be set")
}
ComplexHeatmap::HeatmapAnnotation(foo = ComplexHeatmap::anno_block(
gp = gpar(col = NA),
labels = slice_labels,
labels_gp = gpar(col = "black", fontsize = 10),
labels_rot = slice_labels_rot, height = unit(2, "cm")
))
} else {
NULL
}),
column_gap = column_gap,
column_labels = column_labels,
col = circlize::colorRamp2(c(scale_min, 0, scale_max), colors),
...
data <- data[geneList,,drop=FALSE]
ComplexHeatmap::Heatmap(data,
heatmap_legend_param = heatmap_legend_param,
#border = "black",
width = ncol(data) * box_width + width_buffer,
height = nrow(data) * box_height + height_buffer,
#rect_gp = grid::gpar(color = "black"),
column_title = column_title,
cluster_rows = cluster_rows,
cluster_columns = cluster_columns,
column_split = column_split,
top_annotation = (if (!is.null(slice_labels)) {
if (is.null(column_split)) {
warning("Setting labels requires slices to also be set")
}
ComplexHeatmap::HeatmapAnnotation(foo = ComplexHeatmap::anno_block(
gp = gpar(col = NA),
labels = slice_labels,
labels_gp = gpar(col = "black", fontsize = 10),
labels_rot = slice_labels_rot, height = unit(2, "cm")
))
} else {
NULL
}),
column_gap = column_gap,
column_labels = column_labels,
col = circlize::colorRamp2(c(scale_min, 0, scale_max), colors),
...
)
}
Loading

0 comments on commit 5c6cb49

Please sign in to comment.