-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDESeq_DHR.R
99 lines (72 loc) · 2.99 KB
/
DESeq_DHR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Install BiocManager package/library
install.packages("BiocManager")
# Install Bioconductor packages/library
BiocManager::install(c("DESeq2", "EnhancedVolcano", "ComplexHeatmap"))
# Install general packages
install.packages(c("tidyr", "dplyr"))
# Load the Libraries
library(DESeq2) # For differential expression analysis
library(tidyr) # For data wrangling
library(dplyr) # For data manipulation
library(EnhancedVolcano) # For visualization of DEGs
library(ComplexHeatmap) # For heatmap visualization
#set working directory
setwd("D:/RNAseq_tutorial/rawdata/") # Replace with your actual path
# Check if the working directory is set correctly
getwd()
# Load the raw counts, skipping the first comment lines starting with '#'
counts_raw <- read.delim("counts.txt", row.names = 1, comment.char = "#")
# Check the first few rows
head(counts_raw)
# Select only the Geneid and BAM count columns
counts <- counts_raw[, c(6:ncol(counts_raw))]
# Check the first few rows
head(counts)
# Check sample names
colnames(counts)
# clean the column name
colnames(counts) <- gsub("_sorted.bam", "", colnames(counts))
# Rename SRR* to test and control
colnames(counts) <- c("CTRL_1", "CTRL_2", "TEST_1", "TEST_2")
# Check the updated column names
print(colnames(counts))
# Create a simple condition vector
condition <- factor(c("TEST", "TEST", "CTRL", "CTRL"))
# Create DESeq2 dataset
dds <- DESeqDataSetFromMatrix(countData = counts,
colData = data.frame(condition),
design = ~ condition)
# dds view
dds
colData(dds) # Check sample conditions
assay(dds)[1:5, ] # View first 5 genes' counts
# Perform normalization and statistical analysis
dds <- DESeq(dds)
res <- results(dds, contrast = c("condition", "TEST", "CTRL")) # Extract results
# filter DE genes
res_filtered <- res[which(res$padj < 0.05 & abs(res$log2FoldChange) > 1), ]
# summary and count of DE genes
summary(res)
nrow(res_filtered)
# Save all results
write.csv(res_filtered, "DEGs_filtered.csv")
write.csv(as.data.frame(res), "DEGs_full.csv")
# Visualize the result
EnhancedVolcano(res,
lab = rownames(res),
x = 'log2FoldChange',
y = 'pvalue',
pCutoff = 0.05,
FCcutoff = 1,
title = 'Volcano Plot',
subtitle = 'TEST vs CTRL',
caption = 'DESeq2 results')
# Heatmap the DE genes
# Select genes with padj > 0.05
significant_genes <- res[which(res$padj < 0.05), ]
# Extract normalized counts for these genes
normalized_counts <- counts(dds, normalized=TRUE)
heatmap_data <- normalized_counts[rownames(significant_genes), ]
# Scale row-wise (z-score normalization)
heatmap_data_scaled <- t(scale(t(heatmap_data))) # Scale across rows
Heatmap(heatmap_data_scaled, name = "Z-score", cluster_rows = TRUE, cluster_columns = TRUE,show_row_dend = F,show_column_dend = F)