Skip to content

Commit

Permalink
Merge pull request #20 from yerkes-gencore/dotplot_update
Browse files Browse the repository at this point in the history
Generalize dotplot, remove RENV, add model-fit visualization
  • Loading branch information
derrik-gratz authored Feb 28, 2024
2 parents 5cfd89c + 78f072a commit 4421b93
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 19 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(mappingBinsPlot)
export(parseReadPerGeneFiles)
export(plotFilterByExpr)
export(plotGeneExpression)
export(plotModelCoeffs)
export(plotPCAFromConfig)
export(plotVoomByGroup)
export(runfgsea)
Expand Down Expand Up @@ -80,3 +81,4 @@ importFrom(tibble,as_tibble)
importFrom(tibble,rownames_to_column)
importFrom(tidyr,pivot_longer)
importFrom(utils,head)
importFrom(utils,stack)
160 changes: 147 additions & 13 deletions R/plotGeneExpression.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,17 @@
#' aesthetics. This is meant to be minimal so you can adjust the aesthetics
#' as necessary, but it can still be used as a quick check.
#'
#' You can subset data using the `subsetting` and `subsets` arguments to focus
#' the plotting.
#'
#'
#' @param gene Gene to plot expression for
#' @param grouping Variable to group expression by. Must be a column of `data@colData`
#' @param data A DESeqDataSet object. Use this argument to select specific assays
#' to plot. The default is `assays(analysis$dds)$rld` (assumed to be the rlog transform).
#' @param counts The expression data to plot
#' @param metadata The metadata to associate with the counts data
#' @param grouping Variable to group expression by. Must be a column of `metadata`
#' @param groups Optional specification of level ordering for grouping var
#' @param subsetting Optional variable to subset expression by. Must be a column of `metadata`
#' @param subsets Specification of levels to subset for
#' @param boxplot Boolean, plot `geom_boxplot`?
#' @param jitter Boolean, plot `geom_jitter`?
#' @param axis_text_x Option to modify X axis text. Default rotates labels 90
Expand All @@ -18,30 +25,157 @@
#' @export
#'
#' @import ggplot2
#' @importFrom SummarizedExperiment assay
#' @importFrom rlang .data
#' @importFrom utils stack
#' @examples
#' \dontrun{
#' ## my object is called dds_full, so I have to manually provide data
#' plotGeneExpression('MAMU-A', grouping = 'groupIDs', data = assays(dds_full)$rld)
#'
#' ## With a limma model fit
#' plotGeneExpression('MAMU-A', counts = model_fit$EList$E,
#' metadata = model_fit$targets, grouping = 'groupIDs')
#'
#' ## With a DESeq model
#' plotGeneExpression('MAMU-A', counts = assay(assays(analysis$dds)$rld),
#' metadata = colData(analysis$dds), grouping = 'groupIDs')
#' }
#'
plotGeneExpression <- function(gene,
grouping,
data = SummarizedExperiment::assays(analysis$dds)$rld,
groups,
counts,
metadata,
subsetting,
subsets,
boxplot = TRUE,
jitter = TRUE,
axis_text_x = element_text(angle = 90, vjust = 0.5, hjust=1)) {
if (!(gene %in% rownames(data))){
stop('Gene not found in data')
# if (missing(gene) | missing(grouping) | missing(counts) | missing(metadata)) {
# stop('The following arguments are all required: gene, grouping, groups, counts, metadata')
# }
if (!(gene %in% rownames(counts))){
stop('Gene not found in counts data')
}
if (!grouping %in% colnames(metadata)) {
stop('grouping variable not found in metadata')
}
if (xor(missing(subsetting), missing(subsets))) {
stop('Subsetting data requires both subsets and subsetting to be specified')
}
if (any(colnames(counts) != rownames(metadata))) {
warning('Colnames of counts does not match rownames of metadata. Continuing, but
this may suggest the data are not properly associated')
}
if (!missing(subsetting)) {
if (!subsetting %in% colnames(metadata)) {
stop('subsetting variable not found in metadata')
}
metadata <- metadata[metadata[[subsetting]] %in% subsets,]
counts <- counts[,rownames(metadata)]
}
plotdata <- as.data.frame(t(SummarizedExperiment::assay(data[gene, ])))
plotdata <- utils::stack(counts[gene, ])
colnames(plotdata) <- 'Gene'
plotdata$grouping <- data@colData[[grouping]]
plotdata$grouping <- metadata[[grouping]]
if (!missing(groups)) {
if (!all(groups %in% unique(metadata[[grouping]]))) {
stop('All groups not found in grouping variable of metadata')
}
plotdata$grouping <- factor(plotdata$grouping, levels = groups)
}
ggplot2::ggplot(plotdata, aes(x=grouping, y=.data[['Gene']])) +
(if (boxplot) { ggplot2::geom_boxplot(outlier.color = if (jitter) {NA} else {'black'}) }) +
ggplot2::theme_bw() +
(if (jitter) { ggplot2::geom_jitter() }) +
ggplot2::theme_bw() +
ggplot2::labs(x = 'Group', y = 'Expression', main = gene) +
ggplot2::theme(axis.text.x = axis_text_x)
}
}




#' Plot the results of a model fit
#'
#' Plots the expression, coefficients, and estimated change for a single gene and
#' contrast. You have to pass in a contrasts dataframe (or list) and specify which
#' contrast to use. See the example for details. The rest of the arguments get
#' passed to `plotGeneExpression`.
#'
#' For example, if the question is:
#' "the results say JUNB has a significant increase in expression,
#' in Day 14 compared to day 0, does this seem to be true?"
#'
#' Your numerator/denominator specification would look something like:
#' numerator = '(Intercept) + dayDay14', denominator = '(Intercept)'
#'
#' @param gene Gene to plot
#' @param numerator Full numerator of contrast, including all cancelled terms
#' @param denominator Full denominator of contrast, including all cancelled terms
#' @param coefficients Matrix of coefficients fit by model of your choice
#' @inheritParams plotGeneExpression
#'
#' @returns A ggplot object
#' @export
#'
#' @import ggplot2
#'
#' @examples
#' \dontrun{
#'
#' plotModelCoeffs(gene = 'EZR',
#' coefficients = bulk$fit$coefficients,
#' counts = bulk$fit$EList$E,
#' metadata = bulk$dge$samples,
#' numerator = 'grp3.P14', denominator = 'grp2.P14',
#' grouping = 'grp',
#' subsetting = 'day', subsets = 'P14')
#' }
plotModelCoeffs <- function(gene,
numerator, denominator,
counts, metadata,
grouping, groups,
subsetting, subsets,
coefficients) {
baseplot <- plotGeneExpression(gene,
counts = counts,
metadata = metadata,
grouping = grouping,
groups = groups,
subsetting = subsetting,
subsets = subsets)
contrast_num <- round(.extract_coefficients(gene = gene,
terms = numerator,
coefficients = coefficients), 2)
contrast_den <- round(.extract_coefficients(gene = gene,
terms = denominator,
coefficients = coefficients), 2)
contrast_line_num <- ggplot2::geom_hline(yintercept = contrast_num, linetype = 'dashed')
contrast_line_den <- ggplot2::geom_hline(yintercept = contrast_den, linetype = 'dashed')

arrow <- ggplot2::geom_segment(aes(x = 1.5, xend = 1.5,
y = contrast_den,
yend = contrast_num),
arrow = ggplot2::arrow())
baseplot +
contrast_line_num +
contrast_line_den +
arrow +
ggplot2::scale_y_continuous(breaks = c(contrast_den, contrast_num))
}

.extract_coefficients <- function(gene,
coefficients,
terms) {
if (!gene %in% rownames(coefficients)) {
stop('Gene not found in coefficients')
}
coefficients <- coefficients[gene,]
terms <- unlist(strsplit(terms, '\\s*\\+\\s*', perl = TRUE))
value <- 0
for (term in terms) {
if (!term %in% names(coefficients)) {
stop(paste0("Error: Term '", term,
"' not found in matrix of coefficients"))
}
value <- value + unname(coefficients[term])
}
return(value)
}
32 changes: 26 additions & 6 deletions man/plotGeneExpression.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

69 changes: 69 additions & 0 deletions man/plotModelCoeffs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4421b93

Please sign in to comment.