-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Switched goseq to clusterprofiler, Updated plots
- Loading branch information
1 parent
d4612ab
commit c6afb58
Showing
20 changed files
with
731 additions
and
199 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
## R script to run differential gene expression | ||
|
||
# set custom lib path | ||
.libPaths("/sw/courses/ngsintro/rnaseq/dardel/r") | ||
|
||
library(edgeR) | ||
library(DESeq2) | ||
|
||
path_input <- "." | ||
path_output <- "." | ||
|
||
# metadata | ||
table_meta <- data.frame( | ||
accession = c("SRR3222409", "SRR3222410", "SRR3222411", "SRR3222412", "SRR3222413", "SRR3222414"), condition = c(rep(c("KO", "Wt"), each = 3)), | ||
replicate = rep(1:3, 2) | ||
) | ||
table_meta$condition <- factor(as.character(table_meta$condition), levels = c("Wt", "KO")) | ||
# table_meta$condition <- relevel(table_meta$condition,"Wt") | ||
rownames(table_meta) <- table_meta$accession | ||
|
||
# reading in data | ||
message("Reading count data ...") | ||
data_counts <- read.delim(file.path(path_input, "counts_full.txt"), skip = 1) | ||
table_counts <- as.matrix(data_counts[, -c(1:6)]) | ||
row.names(table_counts) <- data_counts$Geneid | ||
|
||
# fix column labels | ||
colnames(table_counts) <- sub(".*?(SRR[0-9]+).*", "\\1", colnames(table_counts)) | ||
|
||
# match order of counts and metadata | ||
mth <- match(colnames(table_counts), rownames(table_meta)) | ||
table_counts <- table_counts[, mth] | ||
# check if metadata and counts match | ||
all.equal(rownames(table_meta), colnames(table_counts)) | ||
|
||
# remove genes with low counts | ||
# keep genes that have minimum 1 CPM across 3 samples (since group has three replicates) | ||
message(paste0("Number of genes before filtering: ", nrow(table_counts))) | ||
keepgenes <- rowSums(edgeR::cpm(table_counts) > 1) >= 3 | ||
table_counts <- table_counts[keepgenes, ] | ||
message(paste0("Number of genes after filtering: ", nrow(table_counts))) | ||
|
||
message("Preview of count table:") | ||
print(head(table_counts)) | ||
message("\nView of metadata table:") | ||
print(table_meta) | ||
|
||
## deseq2 | ||
message("Running DESeq2 ...") | ||
|
||
# run deseq2 | ||
d <- DESeqDataSetFromMatrix(countData = table_counts, colData = table_meta, design = ~condition) | ||
d <- DESeq2::estimateSizeFactors(d, type = "ratio") | ||
d <- DESeq2::estimateDispersions(d) | ||
|
||
dg <- nbinomWaldTest(d) | ||
print(resultsNames(dg)) | ||
res <- results(dg, contrast = c("condition", "KO", "Wt"), alpha = 0.05) | ||
message("Summary of DEGs:") | ||
summary(res) | ||
|
||
# lfc shrink to correct fold changes | ||
res1 <- lfcShrink(dg, contrast = c("condition", "KO", "Wt"), res = res, type = "normal") | ||
|
||
# convert table to data.frame | ||
table_res <- as.data.frame(res1) | ||
table_res$ensembl_gene_id <- rownames(table_res) | ||
|
||
# merge results with annotations | ||
table_genes <- read.delim("../reference/mm-biomart99-genes.txt", header = T, stringsAsFactors = F) | ||
table_genes <- table_genes[!duplicated(table_genes$ensembl_gene_id), ] | ||
table_genes <- table_genes[!duplicated(table_genes$external_gene_name), ] | ||
table_res <- merge(table_res, table_genes, by = "ensembl_gene_id") | ||
table_res <- table_res[!is.na(table_res$gene), ] | ||
|
||
# alternative solution to convert ensembl gene id to gene symbol | ||
# library(clusterProfiler) | ||
# library(org.Mm.eg.db) | ||
# eg <- bitr(table_res$ensembl_gene_id, fromType = "ENSEMBL", toType = "SYMBOL", OrgDb = "org.Mm.eg.db") | ||
# colnames(eg) <- c("ensembl_gene_id", "gene") | ||
# table_res <- merge(table_res, eg, by = "ensembl_gene_id") | ||
|
||
message("Preview of DEG table:") | ||
print(head(table_res)) | ||
|
||
message("Exporting results ...") | ||
write.table(table_res, file.path(path_output, "dge_results_full.txt"), sep = "\t", quote = F, row.names = F) | ||
saveRDS(table_res, "dge_results_full.rds") | ||
|
||
# export vst counts | ||
cv <- as.data.frame(assay(DESeq2::varianceStabilizingTransformation(d, blind = T)), check.names = F) | ||
write.table(cv, file.path(path_output, "counts_vst_full.txt"), sep = "\t", dec = ".", quote = FALSE) | ||
saveRDS(cv, "counts_vst_full.rds") | ||
|
||
cat("Completed ...\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
# set custom library path | ||
.libPaths("/sw/courses/ngsintro/rnaseq/dardel/r") | ||
|
||
# path_input <- "/crex/course_data/ngsintro/rnaseq/main_full/3_mapping/" | ||
path_input <- "." | ||
path_output <- "." | ||
|
||
# For running as command line script and parsing options from command line | ||
args <- commandArgs(trailingOnly = TRUE) | ||
# test if correct number of arguments are given as input: if not, return an error | ||
if (length(args) < 3) { | ||
stop("Please supply chromosome number as well as start and stop position for the plot\n Example: Rscript gene.r chr19 6062821 6067842", | ||
call. = FALSE | ||
) | ||
} | ||
|
||
library(Gviz) | ||
library(EnsDb.Mmusculus.v79) | ||
library(GenomicRanges) | ||
|
||
edb <- EnsDb.Mmusculus.v79 | ||
gen <- "mm10" | ||
chr <- args[1] # parse the first argument after the r-script to chr | ||
start <- as.integer(args[2]) # parse the second argument after the r-script to start | ||
stop <- as.integer(args[3]) # parse the second argument after the r-script to stop | ||
|
||
message("Reading BAM files ...") | ||
|
||
# Collect bam filenames from bam folder | ||
bams <- list.files(path_input, pattern = "*.bam$", full.names = TRUE, recursive = TRUE) | ||
|
||
# Allow for using different chromosome names than ucsc | ||
options(ucscChromosomeNames = FALSE) | ||
|
||
# create tracks | ||
itrack <- IdeogramTrack(genome = gen, chromosome = chr) | ||
gtrack <- GenomeAxisTrack() | ||
grtrack <- getGeneRegionTrackForGviz(edb, chromosome = chr, start = start, end = stop) | ||
genome(grtrack) <- gen | ||
atrack <- lapply(bams, function(bam) { | ||
AlignmentsTrack(bam = bam, isPaired = TRUE, start = start, end = end, genome = gen, chromosome = chr, name = basename(bam)) | ||
}) | ||
|
||
# merge all tracks | ||
toplot <- append(list(itrack, gtrack, GeneRegionTrack(grtrack)), atrack) | ||
|
||
width <- 14 | ||
height <- 14 | ||
|
||
# plot | ||
message("Exporting coverage plot ...") | ||
png(file.path(path_output, paste0("coverage-", chr, "-", start, "-", stop, ".png")), width = width, height = height, units = "cm", res = 300) | ||
plotTracks(toplot, type = c("coverage"), from = start, to = stop) | ||
dev.off() | ||
|
||
message("Exporting sashimi plot ...") | ||
png(file.path(path_output, paste0("sashimi-", chr, "-", start, "-", stop, ".png")), width = width, height = height, units = "cm", res = 300) | ||
plotTracks(toplot, type = c("sashimi"), from = start, to = stop) | ||
dev.off() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#!/usr/local/bin/Rscript | ||
|
||
## R script to run gene set enrichment (over-enrichment analysis) on DE genes | ||
|
||
# set custom library path | ||
.libPaths("/sw/courses/ngsintro/rnaseq/dardel/r") | ||
|
||
library(clusterProfiler) | ||
library(org.Mm.eg.db) | ||
|
||
path_input <- "." | ||
path_output <- "." | ||
|
||
message("Reading data ...") | ||
|
||
table_res <- read.table(file.path(path_input, "dge_results_full.txt"), sep = "\t", header = TRUE, stringsAsFactors = FALSE, quote = "") | ||
table_res <- table_res[!is.na(table_res$padj), ] | ||
table_res <- table_res[!is.na(table_res$entrezgene_id), ] | ||
table_res <- table_res[!duplicated(table_res$entrezgene_id), ] | ||
|
||
universe <- unique(table_res$entrezgene_id) | ||
table_res <- table_res[table_res$padj < 0.05, ] | ||
degs <- unique(table_res$entrezgene_id) | ||
|
||
message("Running GSE on CC ...") | ||
gse_cc <- enrichGO( | ||
gene = as.character(degs), | ||
universe = as.character(universe), | ||
OrgDb = org.Mm.eg.db, | ||
ont = "CC", | ||
pAdjustMethod = "BH", | ||
pvalueCutoff = 0.01, | ||
qvalueCutoff = 0.05, | ||
readable = TRUE | ||
) | ||
|
||
message("Saving CC results ...") | ||
saveRDS(gse_cc, "gse-clusterprofiler-cc.rds") | ||
write.table(gse_cc, file.path(path_output, "gse-cc.txt"), col.names = TRUE, row.names = FALSE, sep = "\t", quote = FALSE) | ||
|
||
message("Running GSE on MF ...") | ||
gse_mf <- enrichGO( | ||
gene = as.character(degs), | ||
universe = as.character(universe), | ||
OrgDb = org.Mm.eg.db, | ||
ont = "MF", | ||
pAdjustMethod = "BH", | ||
pvalueCutoff = 0.01, | ||
qvalueCutoff = 0.05, | ||
readable = TRUE | ||
) | ||
|
||
message("Saving MF results ...") | ||
saveRDS(gse_mf, "gse-clusterprofiler-mf.rds") | ||
write.table(gse_mf, file.path(path_output, "gse-mf.txt"), col.names = TRUE, row.names = FALSE, sep = "\t", quote = FALSE) | ||
|
||
message("Running GSE on BP ...") | ||
gse_bp <- enrichGO( | ||
gene = as.character(degs), | ||
universe = as.character(universe), | ||
OrgDb = org.Mm.eg.db, | ||
ont = "BP", | ||
pAdjustMethod = "BH", | ||
pvalueCutoff = 0.01, | ||
qvalueCutoff = 0.05, | ||
readable = TRUE | ||
) | ||
|
||
message("Saving BP results ...") | ||
saveRDS(gse_bp, "gse-clusterprofiler-bp.rds") | ||
write.table(gse_bp, file.path(path_output, "gse-bp.txt"), col.names = TRUE, row.names = FALSE, sep = "\t", quote = FALSE) | ||
|
||
message("Completed.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
#!/usr/local/bin/Rscript | ||
|
||
## R script to run gene set enrichment (over-enrichment analysis) on DE genes | ||
|
||
# set custom lib path | ||
.libPaths("/sw/courses/ngsintro/rnaseq/dardel/r") | ||
|
||
library(goseq) | ||
library(GO.db) | ||
library(reactome.db) | ||
library(org.Mm.eg.db) | ||
|
||
## prepare directory where results will be saved | ||
export_dir <- "gse-goseq" | ||
|
||
message("Reading data ...") | ||
|
||
table_res <- read.table("dge_results_full.txt", sep = "\t", header = TRUE, stringsAsFactors = FALSE, quote = "") | ||
table_res <- table_res[!is.na(table_res$padj), ] | ||
|
||
# merge results with annotations | ||
table_genes <- read.delim("mm-biomart99-genes.txt", header = T, stringsAsFactors = F) | ||
table_genes <- table_genes[!duplicated(table_genes$ensembl_gene_id), ] | ||
table_genes$entrezgene_id <- as.character(table_genes$entrezgene_id) | ||
table_dge <- merge(table_res, table_genes, by = "ensembl_gene_id") | ||
|
||
## select only genes with entrez gene id | ||
## GO term and reactome pathway annotations are mapped to entrez gene ids | ||
table_dge_e <- table_dge[!is.na(table_dge$entrezgene_id), ] | ||
|
||
# remove lines with non-unique entrez gene id | ||
table_dge_e <- table_dge_e[!duplicated(table_dge_e$entrezgene_id), ] | ||
|
||
## list of all genes included in the analysis | ||
allgenes <- table_dge_e$entrezgene_id | ||
|
||
# select the downregulated genes at FDR<0.05 | ||
dn.genes <- table_dge_e$entrezgene_id[(table_dge_e$padj < 0.05 & table_dge_e$log2FoldChange < 0)] | ||
gene.vector.de.dn <- as.integer(allgenes %in% dn.genes) | ||
names(gene.vector.de.dn) <- allgenes | ||
|
||
up.genes <- table_dge_e$entrezgene_id[(table_dge_e$padj < 0.05 & table_dge_e$log2FoldChange > 0)] | ||
gene.vector.de.up <- as.integer(allgenes %in% up.genes) | ||
names(gene.vector.de.up) <- allgenes | ||
|
||
message("Preparing ontology data ...") | ||
|
||
# prepare go db | ||
table_go <- as.data.frame(org.Mm.egGO2ALLEGS) | ||
colnames(table_go) <- c("gene_id", "category", "evidence", "ontology") | ||
|
||
# read reactome data | ||
table_reactome <- as.data.frame(reactomeEXTID2PATHID) | ||
colnames(table_reactome) <- c("category", "gene_id") | ||
table_reactome_desc <- as.data.frame(reactomePATHID2NAME) | ||
colnames(table_reactome_desc) <- c("category", "path_name") | ||
# remove the duplicates | ||
table_reactome_desc <- table_reactome_desc[!duplicated(table_reactome_desc$category), ] | ||
|
||
message("Running functional analyses ...") | ||
|
||
## calculate the gene length bias (using nullp, a function from goseq package) | ||
pwf.de.up <- nullp(gene.vector.de.up, "mm9", "knownGene", plot.fit = F) | ||
pwf.de.dn <- nullp(gene.vector.de.dn, "mm9", "knownGene", plot.fit = F) | ||
|
||
# goseq on go | ||
go.up <- goseq(pwf.de.up, gene2cat = table_go) | ||
go.up.adj <- go.up[p.adjust(go.up$over_represented_pvalue, method = "BH") < 0.1, ] | ||
|
||
go.dn <- goseq(pwf.de.dn, gene2cat = table_go) | ||
go.dn.adj <- go.dn[p.adjust(go.dn$over_represented_pvalue, method = "BH") < 0.1, ] | ||
|
||
# goseq on reactome | ||
react.up <- goseq(pwf.de.up, gene2cat = table_reactome) | ||
react.up <- merge(react.up, table_reactome_desc, by = "category") | ||
react.up.adj <- react.up[p.adjust(react.up$over_represented_pvalue, method = "BH") < 0.1, ] | ||
|
||
react.dn <- goseq(pwf.de.dn, gene2cat = table_reactome) | ||
react.dn <- merge(react.dn, table_reactome_desc, by = "category") | ||
react.dn.adj <- react.dn[p.adjust(react.dn$over_represented_pvalue, method = "BH") < 0.1, ] | ||
|
||
message("Exporting data ...") | ||
dir.create(export_dir) | ||
|
||
# save results as tables | ||
write.table(go.up.adj, file.path(export_dir, "go_up.txt"), col.names = TRUE, row.names = FALSE, sep = "\t", quote = FALSE) | ||
write.table(go.dn.adj, file.path(export_dir, "go_down.txt"), col.names = TRUE, row.names = FALSE, sep = "\t", quote = FALSE) | ||
write.table(react.up.adj, file.path(export_dir, "reactome_up.txt"), col.names = TRUE, row.names = FALSE, sep = "\t", quote = FALSE) | ||
write.table(react.dn.adj, file.path(export_dir, "reactome_down.txt"), col.names = TRUE, row.names = FALSE, sep = "\t", quote = FALSE) | ||
|
||
message("Completed.") |
Oops, something went wrong.