diff --git a/NAMESPACE b/NAMESPACE index ac4c14d..527c21b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(mappingBinsPlot) export(parseReadPerGeneFiles) export(plotFilterByExpr) export(plotGeneExpression) +export(plotModelCoeffs) export(plotPCAFromConfig) export(plotVoomByGroup) export(runfgsea) @@ -80,3 +81,4 @@ importFrom(tibble,as_tibble) importFrom(tibble,rownames_to_column) importFrom(tidyr,pivot_longer) importFrom(utils,head) +importFrom(utils,stack) diff --git a/R/plotGeneExpression.R b/R/plotGeneExpression.R index 28c4438..1313ed0 100644 --- a/R/plotGeneExpression.R +++ b/R/plotGeneExpression.R @@ -5,10 +5,17 @@ #' aesthetics. This is meant to be minimal so you can adjust the aesthetics #' as necessary, but it can still be used as a quick check. #' +#' You can subset data using the `subsetting` and `subsets` arguments to focus +#' the plotting. +#' +#' #' @param gene Gene to plot expression for -#' @param grouping Variable to group expression by. Must be a column of `data@colData` -#' @param data A DESeqDataSet object. Use this argument to select specific assays -#' to plot. The default is `assays(analysis$dds)$rld` (assumed to be the rlog transform). +#' @param counts The expression data to plot +#' @param metadata The metadata to associate with the counts data +#' @param grouping Variable to group expression by. Must be a column of `metadata` +#' @param groups Optional specification of level ordering for grouping var +#' @param subsetting Optional variable to subset expression by. Must be a column of `metadata` +#' @param subsets Specification of levels to subset for #' @param boxplot Boolean, plot `geom_boxplot`? #' @param jitter Boolean, plot `geom_jitter`? #' @param axis_text_x Option to modify X axis text. Default rotates labels 90 @@ -18,30 +25,157 @@ #' @export #' #' @import ggplot2 -#' @importFrom SummarizedExperiment assay #' @importFrom rlang .data +#' @importFrom utils stack #' @examples #' \dontrun{ -#' ## my object is called dds_full, so I have to manually provide data -#' plotGeneExpression('MAMU-A', grouping = 'groupIDs', data = assays(dds_full)$rld) +#' +#' ## With a limma model fit +#' plotGeneExpression('MAMU-A', counts = model_fit$EList$E, +#' metadata = model_fit$targets, grouping = 'groupIDs') +#' +#' ## With a DESeq model +#' plotGeneExpression('MAMU-A', counts = assay(assays(analysis$dds)$rld), +#' metadata = colData(analysis$dds), grouping = 'groupIDs') #' } #' plotGeneExpression <- function(gene, grouping, - data = SummarizedExperiment::assays(analysis$dds)$rld, + groups, + counts, + metadata, + subsetting, + subsets, boxplot = TRUE, jitter = TRUE, axis_text_x = element_text(angle = 90, vjust = 0.5, hjust=1)) { - if (!(gene %in% rownames(data))){ - stop('Gene not found in data') + # if (missing(gene) | missing(grouping) | missing(counts) | missing(metadata)) { + # stop('The following arguments are all required: gene, grouping, groups, counts, metadata') + # } + if (!(gene %in% rownames(counts))){ + stop('Gene not found in counts data') + } + if (!grouping %in% colnames(metadata)) { + stop('grouping variable not found in metadata') + } + if (xor(missing(subsetting), missing(subsets))) { + stop('Subsetting data requires both subsets and subsetting to be specified') + } + if (any(colnames(counts) != rownames(metadata))) { + warning('Colnames of counts does not match rownames of metadata. Continuing, but + this may suggest the data are not properly associated') + } + if (!missing(subsetting)) { + if (!subsetting %in% colnames(metadata)) { + stop('subsetting variable not found in metadata') + } + metadata <- metadata[metadata[[subsetting]] %in% subsets,] + counts <- counts[,rownames(metadata)] } - plotdata <- as.data.frame(t(SummarizedExperiment::assay(data[gene, ]))) + plotdata <- utils::stack(counts[gene, ]) colnames(plotdata) <- 'Gene' - plotdata$grouping <- data@colData[[grouping]] + plotdata$grouping <- metadata[[grouping]] + if (!missing(groups)) { + if (!all(groups %in% unique(metadata[[grouping]]))) { + stop('All groups not found in grouping variable of metadata') + } + plotdata$grouping <- factor(plotdata$grouping, levels = groups) + } ggplot2::ggplot(plotdata, aes(x=grouping, y=.data[['Gene']])) + (if (boxplot) { ggplot2::geom_boxplot(outlier.color = if (jitter) {NA} else {'black'}) }) + - ggplot2::theme_bw() + (if (jitter) { ggplot2::geom_jitter() }) + + ggplot2::theme_bw() + ggplot2::labs(x = 'Group', y = 'Expression', main = gene) + ggplot2::theme(axis.text.x = axis_text_x) -} \ No newline at end of file +} + + + + +#' Plot the results of a model fit +#' +#' Plots the expression, coefficients, and estimated change for a single gene and +#' contrast. You have to pass in a contrasts dataframe (or list) and specify which +#' contrast to use. See the example for details. The rest of the arguments get +#' passed to `plotGeneExpression`. +#' +#' For example, if the question is: +#' "the results say JUNB has a significant increase in expression, +#' in Day 14 compared to day 0, does this seem to be true?" +#' +#' Your numerator/denominator specification would look something like: +#' numerator = '(Intercept) + dayDay14', denominator = '(Intercept)' +#' +#' @param gene Gene to plot +#' @param numerator Full numerator of contrast, including all cancelled terms +#' @param denominator Full denominator of contrast, including all cancelled terms +#' @param coefficients Matrix of coefficients fit by model of your choice +#' @inheritParams plotGeneExpression +#' +#' @returns A ggplot object +#' @export +#' +#' @import ggplot2 +#' +#' @examples +#' \dontrun{ +#' +#' plotModelCoeffs(gene = 'EZR', +#' coefficients = bulk$fit$coefficients, +#' counts = bulk$fit$EList$E, +#' metadata = bulk$dge$samples, +#' numerator = 'grp3.P14', denominator = 'grp2.P14', +#' grouping = 'grp', +#' subsetting = 'day', subsets = 'P14') +#' } +plotModelCoeffs <- function(gene, + numerator, denominator, + counts, metadata, + grouping, groups, + subsetting, subsets, + coefficients) { + baseplot <- plotGeneExpression(gene, + counts = counts, + metadata = metadata, + grouping = grouping, + groups = groups, + subsetting = subsetting, + subsets = subsets) + contrast_num <- round(.extract_coefficients(gene = gene, + terms = numerator, + coefficients = coefficients), 2) + contrast_den <- round(.extract_coefficients(gene = gene, + terms = denominator, + coefficients = coefficients), 2) + contrast_line_num <- ggplot2::geom_hline(yintercept = contrast_num, linetype = 'dashed') + contrast_line_den <- ggplot2::geom_hline(yintercept = contrast_den, linetype = 'dashed') + + arrow <- ggplot2::geom_segment(aes(x = 1.5, xend = 1.5, + y = contrast_den, + yend = contrast_num), + arrow = ggplot2::arrow()) + baseplot + + contrast_line_num + + contrast_line_den + + arrow + + ggplot2::scale_y_continuous(breaks = c(contrast_den, contrast_num)) +} + +.extract_coefficients <- function(gene, + coefficients, + terms) { + if (!gene %in% rownames(coefficients)) { + stop('Gene not found in coefficients') + } + coefficients <- coefficients[gene,] + terms <- unlist(strsplit(terms, '\\s*\\+\\s*', perl = TRUE)) + value <- 0 + for (term in terms) { + if (!term %in% names(coefficients)) { + stop(paste0("Error: Term '", term, + "' not found in matrix of coefficients")) + } + value <- value + unname(coefficients[term]) + } + return(value) +} diff --git a/man/plotGeneExpression.Rd b/man/plotGeneExpression.Rd index 55ecf58..b8ef447 100644 --- a/man/plotGeneExpression.Rd +++ b/man/plotGeneExpression.Rd @@ -7,7 +7,11 @@ plotGeneExpression( gene, grouping, - data = SummarizedExperiment::assays(analysis$dds)$rld, + groups, + counts, + metadata, + subsetting, + subsets, boxplot = TRUE, jitter = TRUE, axis_text_x = element_text(angle = 90, vjust = 0.5, hjust = 1) @@ -16,10 +20,17 @@ plotGeneExpression( \arguments{ \item{gene}{Gene to plot expression for} -\item{grouping}{Variable to group expression by. Must be a column of \code{data@colData}} +\item{grouping}{Variable to group expression by. Must be a column of \code{metadata}} -\item{data}{A DESeqDataSet object. Use this argument to select specific assays -to plot. The default is \code{assays(analysis$dds)$rld} (assumed to be the rlog transform).} +\item{groups}{Optional specification of level ordering for grouping var} + +\item{counts}{The expression data to plot} + +\item{metadata}{The metadata to associate with the counts data} + +\item{subsetting}{Optional variable to subset expression by. Must be a column of \code{metadata}} + +\item{subsets}{Specification of levels to subset for} \item{boxplot}{Boolean, plot \code{geom_boxplot}?} @@ -36,11 +47,20 @@ Returns a simple ggplot of gene expression, optionally with jitter or boxplots overlayed. These are optional in case you want to do other aesthetics. This is meant to be minimal so you can adjust the aesthetics as necessary, but it can still be used as a quick check. + +You can subset data using the \code{subsetting} and \code{subsets} arguments to focus +the plotting. } \examples{ \dontrun{ -## my object is called dds_full, so I have to manually provide data -plotGeneExpression('MAMU-A', grouping = 'groupIDs', data = assays(dds_full)$rld) + + ## With a limma model fit + plotGeneExpression('MAMU-A', counts = model_fit$EList$E, + metadata = model_fit$targets, grouping = 'groupIDs') + + ## With a DESeq model + plotGeneExpression('MAMU-A', counts = assay(assays(analysis$dds)$rld), + metadata = colData(analysis$dds), grouping = 'groupIDs') } } diff --git a/man/plotModelCoeffs.Rd b/man/plotModelCoeffs.Rd new file mode 100644 index 0000000..798c519 --- /dev/null +++ b/man/plotModelCoeffs.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotGeneExpression.R +\name{plotModelCoeffs} +\alias{plotModelCoeffs} +\title{Plot the results of a model fit} +\usage{ +plotModelCoeffs( + gene, + numerator, + denominator, + counts, + metadata, + grouping, + groups, + subsetting, + subsets, + coefficients +) +} +\arguments{ +\item{gene}{Gene to plot} + +\item{numerator}{Full numerator of contrast, including all cancelled terms} + +\item{denominator}{Full denominator of contrast, including all cancelled terms} + +\item{counts}{The expression data to plot} + +\item{metadata}{The metadata to associate with the counts data} + +\item{grouping}{Variable to group expression by. Must be a column of \code{metadata}} + +\item{groups}{Optional specification of level ordering for grouping var} + +\item{subsetting}{Optional variable to subset expression by. Must be a column of \code{metadata}} + +\item{subsets}{Specification of levels to subset for} + +\item{coefficients}{Matrix of coefficients fit by model of your choice} +} +\value{ +A ggplot object +} +\description{ +Plots the expression, coefficients, and estimated change for a single gene and +contrast. You have to pass in a contrasts dataframe (or list) and specify which +contrast to use. See the example for details. The rest of the arguments get +passed to \code{plotGeneExpression}. +} +\details{ +For example, if the question is: +"the results say JUNB has a significant increase in expression, +in Day 14 compared to day 0, does this seem to be true?" + +Your numerator/denominator specification would look something like: +numerator = '(Intercept) + dayDay14', denominator = '(Intercept)' +} +\examples{ +\dontrun{ + +plotModelCoeffs(gene = 'EZR', + coefficients = bulk$fit$coefficients, + counts = bulk$fit$EList$E, + metadata = bulk$dge$samples, + numerator = 'grp3.P14', denominator = 'grp2.P14', + grouping = 'grp', + subsetting = 'day', subsets = 'P14') +} +}