Skip to content

Inmed dechevigny scrnaseq cortex v2 #27

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
02_Container/cellranger.sif
02_Container/mambaforge:23.1.0-1.sif
05_Output
05_Output_june
.vscode/
.bash_history/
.cache/
Expand All @@ -18,4 +17,6 @@ test
S004647_to_S004650_unoptimized_ref/
S004647_to_S004650/
__pycache__

.netrc
*.errs
*.out
11 changes: 11 additions & 0 deletions 02_Container/DE_analysis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
channels :
- r
- bioconda
- conda-forge
- defaults
dependencies :
- r-ggplot2 =3.3.5
- r-data.table =1.14.2
- r-seurat =4.0.3
- r-dplyr =1.0.9
- r-ggrepel
12 changes: 12 additions & 0 deletions 02_Container/knnor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
channels :
- main
- defaults
- bioconda
- conda-forge
dependencies :
- python = 3.9
- pandas
- numpy
- pip
- pip :
- knnor
14 changes: 14 additions & 0 deletions 02_Container/labelTransfer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
channels :
- r
- bioconda
- conda-forge
- defaults
dependencies :
- r-ggplot2 =3.5.1
- r-seurat =5.1.0
- r-dplyr =1.1.4
- r-sctransform=0.4.1 # Include sctransform
- r-devtools # Required for installing packages from GitHub
- bioconductor-glmgampoi


10 changes: 10 additions & 0 deletions 02_Container/remi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
channels :
- r
- bioconda
- conda-forge
- defaults
dependencies :
- r-seurat = 4.0.3
- r-dplyr = 1.0.9
- r-ggplot2 =3.3.5
- r-cowplot = 1.1.1
3 changes: 2 additions & 1 deletion 02_Container/sims.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ dependencies :
- pip
- pip:
- --upgrade pip setuptools wheel
- --use-pep517 git+https://github.com/braingeneers/SIMS.git
# Sims version Nov 17, 2023
- --use-pep517 git+https://github.com/braingeneers/SIMS.git@c3cc547e9223e979fdc70f9af2ae932b729da88e
55 changes: 34 additions & 21 deletions 03_Script/01_prepare_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,41 @@ STEP2 = "02_seurat/"
source(file.path(dirname(DIRECTORY), "03_Script/00_general_deps.R"))

in_data_dir = file.path( OUTPUTDIR, STEP1)
in_data_dir_demux = file.path( OUTPUTDIR, STEP2)
samples <- dir(in_data_dir)



MIN_CELLS <- as.numeric(MIN_CELLS)
MIN_FEATURES <- as.numeric(MIN_FEATURES)
# HTO <- unlist(strsplit(HTO, ","))

options(Seurat.object.assay.version = 'v3')

#........................................
# Read data
#........................................
seurat_data <- Read10X(paste0(in_data_dir,SAMPLE_ID,CELL_RANGER_COUNT_PATH))
# Need a condition if data are depultiplexed with cell ranger multi
if( RUN_DEMULTIPLEX == TRUE){
seurat_data <- Read10X(paste0(in_data_dir,SAMPLE_ID,CELL_RANGER_COUNT_PATH))
seurat_obj <- CreateSeuratObject(counts = seurat_data, min.cells = MIN_CELLS, min.features = MIN_FEATURES, project = SAMPLE_ID)
} else {
seurat_obj <- CreateSeuratObject(counts = seurat_data$`Gene Expression`, min.cells = MIN_CELLS, min.features = MIN_FEATURES, project = SAMPLE_ID)
} else if (RUN_DEMULTIPLEX == FALSE) {
seurat_obj <- readRDS(paste0(in_data_dir_demux,SAMPLE_ID, "/demultiplex_", SAMPLE_ID, ".rds"))
}



# # Need a condition if data are depultiplexed with cell ranger multi
# if( RUN_DEMULTIPLEX == TRUE){
# seurat_obj <- CreateSeuratObject(counts = seurat_data, min.cells = MIN_CELLS, min.features = MIN_FEATURES, project = SAMPLE_ID)
# } else if (as.logical(MULTIPLEX) == TRUE) {
# seurat_obj <- CreateSeuratObject(counts = seurat_data$`Gene Expression`, min.cells = MIN_CELLS, min.features = MIN_FEATURES, project = SAMPLE_ID)
# hto <- seurat_data$`Antibody Capture`
# hto <- hto[c(HTO), ]
# seurat_obj[["HTO"]] <- CreateAssayObject(counts = hto)
# seurat_obj <- subset(x = seurat_obj, subset = nCount_HTO > 0)
# seurat_obj <- NormalizeData(seurat_obj, assay = "HTO", normalization.method = "CLR")
# } else{
# seurat_obj <- CreateSeuratObject(counts = seurat_data$`Gene Expression`, min.cells = MIN_CELLS, min.features = MIN_FEATURES, project = SAMPLE_ID)
# }

seurat_obj$SampleID <- SAMPLE_ID

# ..........................................................................................................
Expand Down Expand Up @@ -371,18 +389,20 @@ ggplot( seurat_obj[[]][order( seurat_obj[["outlier"]]),], # Plot FALSE first and
labs( x = "# Genes", y = "# UMIs") +
theme( legend.position = "none")

# Mitochondrial vs ribosomal distributions
if(exists( "mito.drop") && exists( "ribo.drop"))
if(exists( "mito.drop"))
{
ggplot( seurat_obj[[]][order( seurat_obj[["outlier"]]),], # Plot FALSE first and TRUE after
aes( x = percent.rb,
aes( x = nCount_RNA,
y = percent.mt,
color = outlier)) +
geom_point( size = 0.5) +
geom_vline( xintercept = FILTER_PERCENT_RB, linetype = 2, color = "blue", alpha = 0.5) +
geom_vline( xintercept = c( FILTER_FEATURE_MIN, FILTER_FEATURE_MAX),
linetype = 2,
color = c( "blue", "red")[!sapply( list( FILTER_FEATURE_MIN, FILTER_FEATURE_MAX), is.null)],
alpha = 0.5) +
geom_hline( yintercept = FILTER_PERCENT_MT, linetype = 2, color = "red", alpha = 0.5) +
scale_color_manual( values = c( "TRUE" = "#FF0000AA", "FALSE" = "#44444444")) +
labs( x = "% Ribosomal genes", y = "% Mitochondrial genes") +
labs( x = "# UMIs", y = "% Mitochondrial genes") +
theme( legend.position = "none")
}

Expand All @@ -405,15 +425,8 @@ if( QC_EXPLORATION_MODE == FALSE){
# sep="\t");

# ..........................................................................................................
## @knitr normalizeData
## @knitr SCTransform
# ..........................................................................................................

# Normalize the count data present in a given assay
seurat_obj = NormalizeData( object = seurat_obj,
normalization.method = NORM_METHOD,
scale.factor = NORM_SCALE_FACTOR,
verbose = .VERBOSE)

# # Scales and centers features in the dataset
# seurat_obj = ScaleData( object = seurat_obj,
# verbose = .VERBOSE)
# # Normalize the count data present in a given assay
seurat_obj = SCTransform(seurat_obj, vars.to.regress = "percent.mt", verbose = FALSE)
6 changes: 5 additions & 1 deletion 03_Script/02_variables_genes.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ suppressMessages( suppressWarnings( LabelPoints( plot = VariableFeaturePlot( seu
## @knitr findVariableGenes_summaryTable
# ..........................................................................................................

# Extract variable genes info as data.frame
# Extract variable genes info as data.frame feature_select_method: "vst"
# Number of features to select as top variable features; only used when select_method is set to 'dispersion' or 'vst'
variable_features: 2000
# For table in report
variable_features_showtop : 200
variableAnnotationsDT = head( HVFInfo( object = seurat_obj, assay = "RNA", selection.method = FEATURE_SELECT_METHOD)[VariableFeatures( seurat_obj),], VARIABLE_FEATURES_SHOWTOP);
variableAnnotationsDT = cbind("Gene" = rownames(variableAnnotationsDT), variableAnnotationsDT);

Expand Down
148 changes: 46 additions & 102 deletions 03_Script/03_cell_heterogeneity_PCA.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,122 +3,66 @@
# of cells using dimension reduction techniques
# ########################################################

## @knitr ldr
# ..........................................................................................................
## @knitr HTODemux
# ..........................................................................................................

# if(as.logical(MULTIPLEX) == TRUE){
# # Demultiplexing data
# seurat_obj <- HTODemux(seurat_obj, assay = "HTO", positive.quantile = 0.99)
# cat("<br><br>Removed cells after filtering Doublet et Negative:", sum(seurat_obj[["hash.ID"]] == "Negative" | seurat_obj[["hash.ID"]] == "Doublet"));
# cat("<br><br>Remaining cells after filtering:", sum(seurat_obj$hash.ID %in% HTO));
# cat("\n<br>\n");
# seurat_obj <- subset(seurat_obj, idents = c("Doublet","Negative"), invert = TRUE) ### Keep only the Singlet cells (Remove Negative and Doublet)

# seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "mean.var.plot")
# seurat_obj <- ScaleData(seurat_obj, features = VariableFeatures(seurat_obj))
# seurat_obj <- RunPCA(seurat_obj)
# seurat_obj <- RunUMAP(seurat_obj, dims = 1:25)
# print(DimPlot(seurat_obj))
# }

# Scales and centers features in the dataset
seurat_obj = ScaleData( object = seurat_obj,
verbose = .VERBOSE)
# ..........................................................................................................
## @knitr ldr
# ..........................................................................................................

# Perform linear dimensional reduction
seurat_obj <- RunPCA(seurat_obj, features = VariableFeatures(object = seurat_obj))
seurat_obj <- RunPCA(seurat_obj, assay = "SCT", verbose = FALSE)
VizDimLoadings(seurat_obj, dims = 1:DIMS, reduction = "pca")
# DimPlot(seurat_obj, reduction = "pca", group.by = "orig.ident")
# DimPlot(seurat_obj, reduction = "pca", group.by = "categorie")
DimHeatmap(seurat_obj, dims = 1:DIMS, cells = 500, balanced = TRUE)

# ..........................................................................................................
## @knitr elbowplot
# ..........................................................................................................

ElbowPlot(seurat_obj, ndims = DIMS)

# https://hbctraining.github.io/scRNA-seq/lessons/elbow_plot_metric.html
pct <- seurat_obj[["pca"]]@stdev / sum(seurat_obj[["pca"]]@stdev) * 100
cumu <- cumsum(pct)
co1 <- which(cumu > 90 & pct < 5)[1]
co2 <- sort(which((pct[1:length(pct) - 1] - pct[2:length(pct)]) > 0.1), decreasing = T)[1] + 1
pcs <- min(co1, co2)
# Create a dataframe with values
plot_df <- data.frame(pct = pct,
cumu = cumu,
rank = 1:length(pct))

# Elbow plot to visualize
ggplot(plot_df, aes(cumu, pct, label = rank, color = rank > pcs)) +
geom_text() +
geom_vline(xintercept = 90, color = "grey") +
geom_hline(yintercept = min(pct[pct > 5]), color = "grey") +
theme_bw()

# ..........................................................................................................
## @knitr heterogeneity_pca
# ..........................................................................................................

# Compute PCA on selected variable genes
nbPC = PCA_NPC
if(PCA_NPC > length(Cells(seurat_obj)))
{
warning( paste0( "Number of cells in object (", length(Cells(seurat_obj)), ") smaller than requested number of PCs (", PCA_NPC,"), setting lower PC number..." ))
nbPC = length(Cells(seurat_obj))
}
seurat_obj <- RunPCA( object = seurat_obj,
features = VariableFeatures( seurat_obj),
npcs = nbPC,
verbose = .VERBOSE);

# Plot PCA, highlighting seurat clusters for combination of dimensions
invisible( apply( combn( 1:PCA_PLOTS_NBDIMS, 2), 2, function(dims)
{
print( DimPlot( object = seurat_obj, reduction = "pca", dims = dims, group.by = "orig.ident") +
theme( legend.position = "none"));
}));

# ..........................................................................................................
## @knitr heterogeneity_pca_umisCounts
# ..........................................................................................................

# Same PCA plots but highlight UMI counts
invisible( apply( combn( 1:PCA_PLOTS_NBDIMS, 2), 2, function(dims)
{
print( FeaturePlot( seurat_obj, feature = "nCount_RNA", reduction = "pca", dims = dims) +
theme( legend.position = "none",
plot.margin = margin(0, 0, 0, 0, "cm"),
plot.title = element_blank()));
}));

# ..........................................................................................................
## @knitr heterogeneity_pca_genesCounts
# ..........................................................................................................

# Same PCA plots but highlight feature counts
invisible( apply( combn( 1:PCA_PLOTS_NBDIMS, 2), 2, function(dims)
{
print( FeaturePlot( seurat_obj, feature = "nFeature_RNA", reduction = "pca", dims = dims) +
theme( legend.position = "none",
plot.margin = margin(0, 0, 0, 0, "cm"),
plot.title = element_blank()));
}));

# ..........................................................................................................
## @knitr heterogeneity_pca_correlations
# ..........................................................................................................

# Isolate the PCA location of cells in first dimensions, together with UMI and genes counts for correlation analysis (makes use of cbind recycling to repeat values for each stacked PC)
relationToPC = suppressWarnings( cbind( stack( as.data.frame( Embeddings( seurat_obj, reduction = "pca")[ rownames( seurat_obj[[]]), paste0( "PC_", 1:PCA_PLOTS_NBDIMS) ])),
seurat_obj[[ c( "nCount_RNA", "nFeature_RNA") ]],
Cluster = Idents( seurat_obj)));

# Plot relationship of UMIs and genes counts with PCs (combine plots using '/' from 'patchwork' lib)
print( (ggplot( data = relationToPC, aes( x = values, y = nCount_RNA)) +
facet_wrap( ~ind) +
stat_binhex( bins = 60) +
#geom_point( aes(col = Cluster), alpha = 0.5) +
geom_smooth( method = 'lm') +
stat_cor( method = "spearman") +
ylab( "# UMIs") +
theme( axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.margin = unit( c( 1, 1, -0.5, 0.5), "lines")))
/
(ggplot( data = relationToPC, aes( x = values, y = nFeature_RNA)) +
facet_wrap( ~ind) +
stat_binhex( bins = 60) +
#geom_point( aes(col = Cluster), alpha = 0.5) +
geom_smooth( method = 'lm') +
stat_cor( method = "spearman") +
xlab( "PC values") +
ylab( "# Genes")));

# ..........................................................................................................
## @knitr heterogeneity_pca_loadings
# ..........................................................................................................

# Plot PCA loadings
invisible( apply( combn( 1:PCA_PLOTS_NBDIMS, 2), 2, function(dims)
{
namesPC=paste0( "PC_", dims);
# Get the loading values for concerned PCs
loadingsMatrix = Loadings( seurat_obj, reduction = "pca")[ , namesPC ];
# Sort features by average absolute value and convert as DF with features names as column
loadingsMatrix = head( loadingsMatrix[ order( apply( loadingsMatrix, 1, function(x){ mean( abs( x)) }), decreasing = TRUE), ], PCA_PLOTS_NBFEATURES);
loadingsDF = data.frame( loadingsMatrix, features = rownames( loadingsMatrix));

# Define symmetric and consistent axes for group of plots
axesLimit = max( abs( loadingsMatrix));

# Plot arrows and features name
print( ggplot( data = loadingsDF, aes_( x = as.name( namesPC[1]), y = as.name( namesPC[2]))) +
coord_cartesian( xlim = c( -axesLimit, axesLimit), ylim = c( -axesLimit, axesLimit)) +
geom_text_repel( aes( label = features), max.iter = 10000) + #, max.overlaps = Inf
geom_segment( x = 0 , y = 0, aes_( xend = as.name(namesPC[1]), yend = as.name(namesPC[2])), col = "#00000044", arrow = arrow( length = unit( 2, "mm"))));
}));
npcs = pcs,
verbose = .VERBOSE,
assay = "SCT");
Loading