Skip to content

Commit

Permalink
Further revise plot options.
Browse files Browse the repository at this point in the history
Redundant plot_gene() styles dropped.
Never-used plot_overview() types (boot thresh VS #DTU) dropped.
Volcano and maxdprop in vignettes refreshed.

Also fix bug introduced yesterday into the DTU id listing, that caused
the lists to be empty/wrong.
  • Loading branch information
fruce-ki committed Apr 5, 2017
1 parent 198574c commit eb50d79
Show file tree
Hide file tree
Showing 20 changed files with 305 additions and 380 deletions.
202 changes: 48 additions & 154 deletions R/results.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ get_dtu_ids <- function(dtuo) {
setorder(myt, -adp, na.last=TRUE)
# Sort genes.
})
pid <- unique(myt[, "parent_id", with=FALSE])
po <- match(dtuo$Genes[, "parent_id", with=FALSE], pid)
pid <- unique(myt$parent_id)
po <- match(dtuo$Genes$parent_id, pid)
myp <- copy(dtuo$Genes[order(po), ])

with(myp, {
Expand Down Expand Up @@ -74,11 +74,8 @@ get_dtu_ids <- function(dtuo) {
#' @param dtuo A DTU object.
#' @param pid A \code{parent_id} to make the plot for.
#' @param style Different themes: \itemize{
#' \item{"plain" - Grouped by condition.},
#' \item{"paired" - Grouped by isoform.},
#' \item{"points" - Grouped by condition. Show individual measurements as points.},
#' \item{"pairedpnt" - Grouped by isoform. Show individual measurements as points.},
#' \item{"lines" - (Default) Grouped by condition. Connect individual measurements with colour-coded lines.}
#' \item{"byisoform" - Grouped by isoform. Show individual measurements as points.},
#' \item{"bycondition" - (Default) Grouped by condition. Connect individual measurements with colour-coded lines.}
#' \item{"linesonly" - Grouped by condition. Connect replicate measurements as colour-coded lines. Hide the boxplots.}
#' }
#' @param fillby Applies to the boxplots. Not all options will work with all styles.
Expand Down Expand Up @@ -160,72 +157,14 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N

# Plot.
result <- NULL
###
if (style=="plain") {
if (is.na(fillby))
fillby <- "condition"
if (is.na(colourby))
colourby <- "DTU"
if (colourby=="replicate")
stop("This style cannot be coloured by replicate!")
shapeby="none"
result <- ggplot(vis_data, aes(x= isoform, y= vals, fill= vis_data[[fillby]], colour=vis_data[[colourby]])) +
facet_grid(type ~ condition, scales= "free") +
geom_boxplot(alpha=0.5, outlier.shape= NA) +
scale_fill_manual(values= colplt[[fillby]], name=fillby) +
scale_colour_manual(values= colplt[[colourby]], name=colourby)
###
} else if ( style == "paired" ) {
if(is.na(fillby)) {
if (is.na(colourby) || colourby != "condition") {
fillby <- "condition"
} else {
fillby <- "DTU"
}
} else if (fillby != "condition"){
if (!is.na(colourby) && colourby != "condition") {
stop("This style requires either fillby or colourby to be set to \"condition\".")
} else {
colourby <- "condition"
}
}
if(is.na(colourby))
colourby <- "DTU"
if (colourby=="replicate")
stop("This style cannot be coloured by replicate!")
shapeby="none"
result <- ggplot(vis_data, aes(x= isoform, y= vals, fill= vis_data[[fillby]], colour=vis_data[[colourby]])) +
facet_grid(type ~ ., scales= "free") +
geom_boxplot(alpha=0.5, outlier.shape= NA, width=0.5) +
scale_fill_manual(values= colplt[[fillby]], name=fillby) +
scale_colour_manual(values= colplt[[colourby]], name=colourby)
###
} else if (style=="points") {
if (is.na(fillby))
fillby <- "condition"
if (is.na(colourby))
colourby <- "DTU"
if (is.na(shapeby)) {
if (all("DTU" != c(fillby, colourby))) {
shapeby <- "DTU"
} else {
shapeby <- "none"
}
}
result <- ggplot(vis_data, aes(x= isoform, y= vals)) +
facet_grid(type ~ condition, scales= "free") +
geom_point(aes(colour= vis_data[[colourby]], shape=vis_data[[shapeby]]), position= position_jitterdodge(), stroke= rel(0.8)) +
geom_boxplot(aes(fill= vis_data[[fillby]]), alpha=0.2, outlier.shape= NA) +
scale_shape_manual(values=shaplt[[shapeby]], name=shapeby) +
scale_fill_manual(values= colplt[[fillby]], name=fillby) +
scale_colour_manual(values= colplt[[colourby]], name=colourby)
###
} else if ( style == "pairedpnt" ) {

### BY ISOFORM.
if (any(style==c("byisoform", "merged"))) {
if(is.na(fillby)) {
if (is.na(colourby) || colourby != "condition") {
if (is.na(colourby) || colourby != "condition" ) {
fillby <- "condition"
} else {
fillby <- "DTU"
} else {
fillby <- "isoform"
}
} else if (fillby != "condition"){
if (!is.na(colourby) && colourby != "condition") {
Expand All @@ -234,47 +173,47 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N
colourby <- "condition"
}
}
if(is.na(colourby))
colourby <- "DTU"
if (colourby=="replicate")
stop("This style cannot be coloured by replicate!")
if(is.na(colourby)) {
colourby <- "isoform"
} else if (colourby=="replicate") {
colourby="none"
}
if (is.na(shapeby)) {
if (all("DTU" != c(fillby, colourby))) {
shapeby <- "DTU"
} else if (all("isoform" != c(fillby, colourby))) {
shapeby <- "isoform"
} else {
shapeby <- "none"
}
}
result <- ggplot(vis_data, aes(x= isoform, y= vals)) +
result <- ggplot(vis_data, aes(x= isoform, y= vals, colour= vis_data[[colourby]], fill= vis_data[[fillby]])) +
facet_grid(type ~ ., scales= "free") +
geom_jitter(aes(colour= vis_data[[colourby]], shape=vis_data[[shapeby]]), position=position_dodge(width=0.5), stroke= rel(0.8)) +
geom_boxplot(aes(fill= vis_data[[fillby]]), alpha=0.2, outlier.shape= NA) +
geom_jitter(aes(shape=vis_data[[shapeby]]), position=position_jitterdodge(), stroke= rel(0.8)) +
geom_boxplot(position=position_dodge(), alpha=0.3, outlier.shape= NA) +
scale_shape_manual(values= shaplt[[shapeby]], name=shapeby) +
scale_fill_manual(values= colplt[[fillby]], name=fillby) +
scale_colour_manual(values= colplt[[colourby]], name=colourby)
###
} else if (style=="lines") {
### BY CONDITION.
} else if (any(style==c("bycondition", "lines"))) {
if (is.na(fillby))
fillby <- "DTU"
if (is.na(colourby))
colourby <- "replicate"
colourby <- "replicate"
shapeby="none"
result <- ggplot(vis_data, aes(x= isoform, y= vals, fill= vis_data[[fillby]])) +
facet_grid(type ~ condition, scales= "free") +
geom_path(aes(colour= replicate, group= replicate)) +
geom_boxplot(alpha=0.2, outlier.shape= NA) +
geom_boxplot(alpha=0.3, outlier.shape= NA) +
scale_fill_manual(values= colplt[[fillby]], name=fillby)
###
### BY CONDITION LINESONLY.
} else if (style=="linesonly") {
if (is.na(fillby))
fillby <- "none"
if (is.na(colourby))
colourby <- "replicate"
fillby <- "none"
colourby <- "replicate"
shapeby="none"
result <- ggplot(vis_data, aes(x= isoform, y= vals, colour= replicate)) +
facet_grid(type ~ condition, scales= "free") +
geom_path(aes(group= replicate))
###
### ERROR
} else {
stop("Unknown plot style.")
}
Expand All @@ -293,7 +232,6 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N
result <- result + guides(colour="none")
if (shapeby == "none")
result <- result + guides(shape="none")

return(result)
})
}
Expand All @@ -305,14 +243,8 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N
#' @param dtuo A DTU object.
#' @param type Type of plot. \itemize{
#' \item{"volcano"}{Change in proportion VS. statistical significance. Done at the transcript level. (Default)}
#' \item{"maxdprop"}{Distribution of biggest change in proportion in each gene.}
#' \item{"transc_quant"}{Transcript-level quantification reproducibility threshold VS. number of DTU positive calls.}
#' \item{"gene_quant"}{Gene-level quantification reproducibility threshold VS. number of DTU positive calls.}
#' \item{"transc_rep"}{Transcript-level replication reproducibility threshold VS. number of DTU positive calls.}
#' \item{"gene_rep"}{Gene-level replication reproducibility threshold VS. number of DTU positive calls.}

#' }
#' @return a ggplot2 object. Simply display it or you can also customize it.
#' \item{"maxdprop"}{Distribution of biggest change in proportion in each gene.} }
#' @return A ggplot2 object. Simply display it or you can also customize it.
#'
#' Generally uses the results of the transcript-level proportion tests.
#'
Expand All @@ -321,18 +253,20 @@ plot_gene <- function(dtuo, pid, style="lines", fillby=NA_character_, colourby=N
#' @export
plot_overview <- function(dtuo, type="volcano") {
with(dtuo, {
### VOLCANO
if (any(type == c("gene_volcano", "volcano"))) {
mydata = Transcripts[, .(target_id, Dprop, -log10(pval_corr), DTU)]
names(mydata)[3] <- "neglogP"
result <- ggplot(data = mydata, aes(Dprop, neglogP, colour = DTU)) +
geom_point(alpha = 0.3) +
ggtitle("Proportion change VS significance") +
labs(x = paste("Prop in ", Parameters$cond_B, " (-) Prop in ", Parameters$cond_A, sep=""),
y ="-log10 (Pval)") +
scale_color_manual(values=c("steelblue3", "red")) +
scale_x_continuous(breaks = seq(-1, 1, 0.2)) +
theme(panel.background= element_rect(fill= "grey98"),
panel.grid.major= element_line(colour= "grey95") )
result <- ggplot(data = na.omit(mydata), aes(Dprop, neglogP, colour = DTU)) +
geom_point(alpha = 0.3) +
ggtitle("Isoform proportion change VS significance") +
labs(x = paste("Prop in ", Parameters$cond_B, " (-) Prop in ", Parameters$cond_A, sep=""),
y ="-log10 (Pval)") +
scale_color_manual(values=c("steelblue3", "red")) +
scale_x_continuous(breaks = seq(-1, 1, 0.2)) +
theme(panel.background= element_rect(fill= "grey98"),
panel.grid.major= element_line(colour= "grey95") )
### MAXDPROP
} else if (type == "maxdprop") {
tmp <- copy(Transcripts) # I don't want the intermediate calculations to modify the dtu object.
tmp[, abma := abs(Dprop)]
Expand All @@ -342,53 +276,13 @@ plot_overview <- function(dtuo, type="volcano") {
tmp[, dtu := Genes[match(tmp$Group.1, Genes[, parent_id]), Genes$DTU] ]
# ok, plotting time
result <- ggplot(data = na.omit(tmp), aes(x, fill=dtu)) +
geom_histogram(binwidth = 0.01, position="identity", alpha = 0.5) +
ggtitle("Distribution of largest proportion change per gene") +
labs(x = paste("abs( Prop in ", Parameters$cond_B, " (-) Prop in ", Parameters$cond_A, " )", sep=""),
y ="Number of genes") +
scale_fill_manual(values=c("steelblue3", "red")) +
scale_x_continuous(breaks = seq(0, 1, 0.1)) +
scale_y_continuous(trans="sqrt")
} else if (type == "transc_quant") {
mydata <- data.frame("thresh"=seq(0, 1, 0.01),
"count"= sapply(seq(0, 1, 0.01), function(x) {
sum(Transcripts[(quant_dtu_freq >= x), elig_fx & sig], na.rm=TRUE) }))
result <- ggplot(data = mydata, aes(thresh, count)) +
geom_freqpoly(stat= "identity", size= 1.5) +
ggtitle("Quantification reproducibility VS DTU transcripts") +
labs(x="Reproducibility threshold",
y="Number of transcripts") +
scale_x_continuous(breaks = seq(0, 1, 0.1))
} else if (type == "gene_quant") {
mydata <- data.frame("thresh"=seq(0, 1, 0.01),
"count"= sapply(seq(0, 1, 0.01), function(x) {
sum(Genes[(quant_dtu_freq >= x), elig_fx & sig], na.rm=TRUE) }))
result <- ggplot(data = mydata, aes(thresh, count)) +
geom_freqpoly(stat= "identity", size= 1.5) +
ggtitle("Quantification reproducibility VS DTU genes") +
labs(x="Reproducibility threshold",
y="Number of genes") +
scale_x_continuous(breaks = seq(0, 1, 0.1))
} else if (type == "transc_rep") {
mydata <- data.frame("thresh"=seq(0, 1, 0.01),
"count"= sapply(seq(0, 1, 0.01), function(x) {
sum(Transcripts[(rep_dtu_freq >= x), elig_fx & sig], na.rm=TRUE) }))
result <- ggplot(data = mydata, aes(thresh, count)) +
geom_freqpoly(stat= "identity", size= 1.5) +
ggtitle("Replication reproducibility VS DTU transcripts") +
labs(x="Reproducibility threshold",
y="Number of transcripts") +
scale_x_continuous(breaks = seq(0, 1, 0.1))
} else if (type == "gene_rep") {
mydata <- data.frame("thresh"=seq(0, 1, 0.01),
"count"= sapply(seq(0, 1, 0.01), function(x) {
sum(Genes[(rep_dtu_freq >= x), elig_fx & sig], na.rm=TRUE) }))
result <- ggplot(data = mydata, aes(thresh, count)) +
geom_freqpoly(stat= "identity", size= 1.5) +
ggtitle("Replication reproducibility VS DTU genes") +
labs(x="Reproducibility threshold",
y="Number of genes") +
scale_x_continuous(breaks = seq(0, 1, 0.1))
geom_histogram(binwidth = 0.01, position="identity", alpha = 0.5) +
ggtitle("Distribution of largest isoform proportion change per gene") +
labs(x = paste("abs( Prop in ", Parameters$cond_B, " (-) Prop in ", Parameters$cond_A, " )", sep=""),
y ="Number of genes") +
scale_fill_manual(values=c("steelblue3", "red")) +
scale_x_continuous(breaks = seq(0, 1, 0.1)) +
scale_y_continuous(trans="sqrt")
} else {
stop("Unrecognized plot type!")
}
Expand Down
2 changes: 1 addition & 1 deletion inst/doc/input.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ output:
theme: readable
toc: yes
vignette: >
%\VignetteIndexEntry{RATs Input & Settings}
%\VignetteIndexEntry{RATs 2: Input & Settings}
%\VignetteEngine{knitr::knitr}
\usepackage[utf8]{inputenc}
Expand Down
2 changes: 1 addition & 1 deletion inst/doc/intro.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ output:
theme: readable
toc: yes
vignette: >
%\VignetteIndexEntry{RATs Quick start}
%\VignetteIndexEntry{RATs 1: Quick start}
%\VignetteEngine{knitr::knitr}
\usepackage[utf8]{inputenc}
---
Expand Down
6 changes: 5 additions & 1 deletion inst/doc/output.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ print( dtu_summary(mydtu) )
ids <- get_dtu_ids(mydtu)

# Contents
print( names(ids) )
print( ids )

# DTU positive genes according to the transcript-level test.
print( ids[[4]] )
Expand All @@ -51,3 +51,7 @@ print( names(mydtu$Transcripts) )
# Elements of ReplicateData
print( names(mydtu$Abundances) )

## ------------------------------------------------------------------------
# Abundance table for first condition.
print( head(mydtu$Abundances[[1]]) )

8 changes: 6 additions & 2 deletions inst/doc/output.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ output:
theme: readable
toc: yes
vignette: >
%\VignetteIndexEntry{RATs Output and Plots}
%\VignetteIndexEntry{RATs 3: Raw Output}
%\VignetteEngine{knitr::knitr}
\usepackage[utf8]{inputenc}
Expand Down Expand Up @@ -75,7 +75,7 @@ As of `v0.4.2` it uses the same category names as `dtu_summury()` for consistenc
ids <- get_dtu_ids(mydtu)
# Contents
print( names(ids) )
print( ids )
# DTU positive genes according to the transcript-level test.
print( ids[[4]] )
Expand Down Expand Up @@ -244,6 +244,10 @@ print( names(mydtu$Abundances) )
1. `condA` - (num) The transcript abundances in the first condition.
2. `condB` - (num) The transcript abundances in the second condition.

```{r}
# Abundance table for first condition.
print( head(mydtu$Abundances[[1]]) )
```

***

Expand Down
Loading

0 comments on commit eb50d79

Please sign in to comment.