Skip to content

Commit 6a7b7ea

Browse files
author
Edwin
committed
updating plots and preparing for release of version 4
1 parent 629446f commit 6a7b7ea

File tree

5 files changed

+74
-29
lines changed

5 files changed

+74
-29
lines changed

doc.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
# Seurat.Preprocessing (v1)
1+
# Seurat.Preprocessing (v4.0)
22
---
33
**Description**: GenePattern module which implements the preprocessing steps for Seurat. You may need to run this module multiple times if you want to change the filtering step.
44

5-
**Author**: Edwin Juárez
5+
**Author**: Edwin Juárez and Jonathan Zamora
66

77
**Contact**: [Forum Link](https://groups.google.com/forum/?utm_medium=email&utm_source=footer#!forum/genepattern-help)
88

9-
**Algorithm Version**: Seurat 3.2.0
9+
**Algorithm Version**: Seurat 4.0.3
1010

1111
---
1212

@@ -22,6 +22,10 @@ The `Seurat.Preprocessing` Module aims to provide a way to integrate the multipl
2222

2323
[Seurat](https://satijalab.org/seurat/)
2424

25+
[Module's GitHub repository](https://github.com/genepattern/Seurat.Preprocessing/tree/v4)
26+
27+
[Module's Docker container](https://hub.docker.com/layers/genepattern/seurat-suite/4.0.3/images/sha256-8d3f5fcae1cf4034cfc9aa87a2e3ea352b89073c948a4fa885670a1eebf16721?context=repo)
28+
2529
### Technical Notes
2630

2731

@@ -30,7 +34,7 @@ The `Seurat.Preprocessing` Module aims to provide a way to integrate the multipl
3034

3135
| Name | Description |
3236
-------|--------------
33-
| tenx_data_dir | `.tar.gz` or `.zip` file input that contains the raw single cell data -- currently only 10x data is supported.|
37+
| input_rds | RDS file created by Seurat.QC|
3438
| column_name | column name of percent mitochondrial genes. Note: not all datasets have this column, those who do often times name it percent.mt].|
3539
| pattern | what pattern to use to label mitochondrial genes [often times this is annotated as MT-].|
3640
| file_name | Basename of the file to be saved.|
@@ -48,6 +52,7 @@ The `Seurat.Preprocessing` Module aims to provide a way to integrate the multipl
4852
|feat_sel_method|Method for feature selection. You should probably not change this unless you really know what you are doing.|
4953
|num_features| Number of top features color during feature selection.|
5054
|num_to_label|Number of top features to label.|
55+
|numpcs|Number of PCA dimensions to visualize (default=50).|
5156
|vdl_num_dims|Number of PCA dimensions to visualize.|
5257
|vdhm_num_dims| Number of dimensions for the dimensional reduction heatmap.|
5358
|cells|Number of top cells to plot.|
@@ -81,4 +86,5 @@ The `Seurat.Preprocessing` Module aims to provide a way to integrate the multipl
8186

8287
| Version | Release Date | Description |
8388
----------|--------------|---------------------------------------------|
89+
| 4.0 | 2021-07-15 | Updating to Seurat 4.0.3 |
8490
| 1 | 2020-11-16 | Initial Release of `Seurat.Preprocessing` |

manifest

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
# Seurat.Preprocessing urn:lsid:genepattern.org:module.analysis:00415:3.2
1+
# Seurat.Preprocessing urn:lsid:genepattern.org:module.analysis:00415:4.0
22
#Wed Jul 14 04:57:38 UTC 2021
33
JVMLevel=
4-
LSID=urn\:lsid\:genepattern.org\:module.analysis\:00415\:3.2
4+
LSID=urn\:lsid\:genepattern.org\:module.analysis\:00415\:4.0
55
author=Edwin F. Juarez;UCSD - Mesirov Lab
66
categories=preprocess & utilities;single-cell
7-
commandLine=Rscript --no-save --quiet --no-restore <libdir>seurat_preprocess.R --input_rds <input_rds> --min_n_features <min_n_features> --max_n_features <max_n_features> --max_percent_mitochondrial <max_percent_mitochondrial> --norm_method <norm_method> --scale_factor <scale_factor> --feat_sel_method <feat_sel_method> --num_features <num_features> --num_to_label <num_to_label> --vdl_num_dims <vdl_num_dims> --vdhm_num_dims <vdhm_num_dims> --cells <cells> --file_name <file_name> --keep_scale_data <keep_scale_data>
7+
commandLine=Rscript --no-save --quiet --no-restore <libdir>seurat_preprocess.R --input_rds <input_rds> --min_n_features <min_n_features> --max_n_features <max_n_features> --max_percent_mitochondrial <max_percent_mitochondrial> --norm_method <norm_method> --scale_factor <scale_factor> --feat_sel_method <feat_sel_method> --num_features <num_features> --num_to_label <num_to_label> --vdl_num_dims <vdl_num_dims> --vdhm_num_dims <vdhm_num_dims> --cells <cells> --file_name <file_name> --keep_scale_data <keep_scale_data> --numpcs <numpcs>
88
cpuType=any
99
description=Seurat preprocessing
10-
documentationUrl=https\://github.com/genepattern/Seurat.Preprocessing/blob/develop/doc.md
10+
documentationUrl=https://github.com/genepattern/Seurat.Preprocessing/blob/v4/doc.md
1111
fileFormat=rds
1212
job.docker.image=genepattern/seurat-suite\:4.0.3
1313
language=any
@@ -29,7 +29,7 @@ p10_value=
2929
p11_MODE=
3030
p11_TYPE=Integer
3131
p11_default_value=2
32-
p11_description=Number of PCA dimensions to visualize.
32+
p11_description=Number of PCA dimensions to visualize (for elbow plot and heatmaps).
3333
p11_fileFormat=
3434
p11_flag=--vdl_num_dims
3535
p11_name=vdl_num_dims
@@ -78,6 +78,21 @@ p14_prefix=
7878
p14_prefix_when_specified=
7979
p14_type=java.lang.String
8080
p14_value=TRUE\=TRUE;FALSE\=FALSE
81+
82+
p15_MODE=
83+
p15_TYPE=Integer
84+
p15_default_value=50
85+
p15_description=Number of principal components to compute (default=50).
86+
p15_fileFormat=
87+
p15_flag=--numpcs
88+
p15_name=numpcs
89+
p15_numValues=0..1
90+
p15_optional=
91+
p15_prefix=
92+
p15_prefix_when_specified=
93+
p15_type=java.lang.Integer
94+
p15_value=
95+
8196
p1_MODE=IN
8297
p1_TYPE=FILE
8398
p1_default_value=
@@ -197,8 +212,8 @@ p9_type=java.lang.Integer
197212
p9_value=
198213
privacy=public
199214
publicationDate=11/20/2020 01\:10
200-
quality=development
201-
src.repo=https\://github.com/genepattern/Seurat.Preprocessing
215+
quality=production
216+
src.repo=https://github.com/genepattern/Seurat.Preprocessing/tree/v4
202217
taskDoc=doc.html
203218
taskType=preprocess & utilities
204219
userid=edjuaro

paramgroups.json

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,6 @@
88
"file_name"
99
]
1010
},
11-
{
12-
"name": "Other",
13-
"description": "The rest",
14-
"hidden": false,
15-
"parameters": []
16-
},
1711
{
1812
"name": "Filtering",
1913
"description": "Parameters related to filtering for QC.",
@@ -37,7 +31,8 @@
3731
"vdl_num_dims",
3832
"vdhm_num_dims",
3933
"cells",
40-
"keep_scale_data"
34+
"keep_scale_data",
35+
"numpcs"
4136
]
4237
}
4338
]

run_seurat.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
docker run -v $PWD:/LOCAL -w /LOCAL/Job_1 -t genepattern/seurat-suite:4.0.3 Rscript --no-save --quiet --slave --no-restore /LOCAL/src/seurat_preprocess.R\
2-
--input_rds '/LOCAL/data/test_run.rds' \
2+
--input_rds '/LOCAL/data/pbmc_preprocessed.rds' \
33
--column_name "percent.mt" --pattern 'MT-'\
44
--first_feature 'nFeature_RNA' --second_feature 'nCount_RNA' --third_feature 'percent.mt'\
55
--min_n_features 2 --max_n_features 6000 --max_percent_mitochondrial 25\
@@ -8,4 +8,4 @@ docker run -v $PWD:/LOCAL -w /LOCAL/Job_1 -t genepattern/seurat-suite:4.0.3 Rscr
88
--vdl_num_dims 2\
99
--vdhm_num_dims 15 --cells 500\
1010
--file_name "test_run"\
11-
--keep_scale_data "TRUE"
11+
--keep_scale_data "TRUE" --numpcs 50

src/seurat_preprocess.R

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ load_rds <- function(input.file){
2424
if (file.exists(input.file)){
2525
pbmc = readRDS(input.file)
2626
} else {
27-
Print('Input file could not be found!')
27+
print('Input file could not be found!')
2828
}
2929
return(pbmc)
3030
}
@@ -161,9 +161,9 @@ myscale <- function(pbmc){
161161

162162
## PCA
163163

164-
mypca <-function(pbmc){
164+
mypca <-function(pbmc,numpcs){
165165
feats <- VariableFeatures(object = pbmc, verbose = F)
166-
pbmc <-RunPCA(pbmc, features = feats, nfeatures.print=5, verbose = F)
166+
pbmc <-RunPCA(pbmc, features = feats, nfeatures.print=5, verbose = F, npcs = numpcs)
167167

168168
return(pbmc)
169169
}
@@ -185,9 +185,8 @@ vdl <- function(nDims){
185185

186186

187187
## ELBOW PLOT
188-
189-
ebp <- function(){
190-
plot(ElbowPlot(pbmc))
188+
ebp <- function(nDims){
189+
plot(ElbowPlot(pbmc,ndims=nDims))
191190
return(pbmc)
192191
}
193192

@@ -248,10 +247,11 @@ parser <- add_option(parser, c("--num_features"),type='integer',default=2000, he
248247
parser <- add_option(parser, c("--num_to_label"),type='integer',default=10, help = "Number of top features to label.")
249248
# ====================================
250249
# Parameter for Vizualize Dimension Loadings, vdl
250+
parser <- add_option(parser, c("--numpcs"),type='integer',default=50, help = "Number of PCA dimensions to compute (default=50).")
251251
parser <- add_option(parser, c("--vdl_num_dims"),type='integer',default=2, help = "Number of PCA dimensions to visualize.")
252252
# ====================================
253253
# Parameters for Heat Map, vdhm
254-
parser <- add_option(parser, c("--vdhm_num_dims"),type='integer',default=15, help = "Number of dimensions for the dimensional reduction heatmap.")
254+
parser <- add_option(parser, c("--vdhm_num_dims"),type='integer',default=15, help = "Number of dimensions for the dimensional reduction heatmap and elbow plots.")
255255
parser <- add_option(parser, c("--cells"),type='integer',default=500, help = "Number of top cells to plot.")
256256
# ====================================
257257
#parameter for save_it
@@ -266,6 +266,8 @@ print('Parameters used:')
266266
print(args)
267267
print('==========================================================')
268268

269+
# Setting up the PDF file for the plots
270+
pdf(file=paste(args$file_name,'.pdf',sep=''))
269271

270272
################################################################################
271273
#Begin Running the functions
@@ -359,7 +361,7 @@ job_list <- append(job_list, "MY PCA")
359361
input_size_list <- append(input_size_list, object.size(pbmc))
360362
print(object.size(pbmc), units="auto")
361363
start <- proc.time()
362-
pbmc <- mypca(pbmc)
364+
pbmc <- mypca(pbmc,args$numpcs)
363365
proc.time() - start
364366
output_size_list <- append(output_size_list, object.size(pbmc))
365367
print(object.size(pbmc), units="auto")
@@ -395,7 +397,7 @@ job_list <- append(job_list, "EBP")
395397
input_size_list <- append(input_size_list, object.size(pbmc))
396398
print(object.size(pbmc), units="auto")
397399
start <- proc.time()
398-
ebp()
400+
ebp(args$vdhm_num_dims)
399401
proc.time() - start
400402
output_size_list <- append(output_size_list, object.size(pbmc))
401403
print(object.size(pbmc), units="auto")
@@ -412,6 +414,32 @@ proc.time() - start
412414
output_size_list <- append(output_size_list, object.size(pbmc))
413415
print(object.size(pbmc), units="auto")
414416

417+
print("******************************************************")
418+
print("************ Explained variability ***************")
419+
print("******************************************************")
420+
421+
print('Computing percent of variance explained for each PC')
422+
mat <- Seurat::GetAssayData(pbmc, assay = "RNA", slot = "scale.data")
423+
pca <- pbmc[["pca"]]
424+
# Get the total variance:
425+
total_variance <- sum(matrixStats::rowVars(mat))
426+
eigValues = (pca@stdev)^2 ## EigenValues
427+
varExplained = 100*eigValues / total_variance #Percent of variance explainde
428+
print(varExplained)
429+
430+
cat('Creating plots for Variance explained')
431+
cvar<-cumsum(varExplained)
432+
433+
##default#par(mfrow=c(2,1),mar = c(5.1, 4.1, 4.1, 2.1))
434+
# par(mfrow=c(2,1),mar = c(5.1, 5, 4.1, 2.1))
435+
par(mfrow=c(1,1),mar = c(5.1, 4.1, 4.1, 2.1))
436+
plot(1:length(varExplained),varExplained,main='Percent of the variance explained by each PC',ylab='Percent',xlab='Principal Components')
437+
plot(1:length(cvar),cvar,main='Cumulative Variance explained',ylab='Percent',xlab='Principal Components')
438+
par(mfrow=c(1,1))
439+
# plot(1:length(varExplained),varExplained)
440+
# plot(1:length(cvar),cvar)
441+
print('... done')
442+
415443

416444
if (args$keep_scale_data == 'FALSE') {
417445
# Why are we calling Diet Seurat? Read this issue:
@@ -434,6 +462,7 @@ if (args$keep_scale_data == 'FALSE') {
434462
print("*************************************")
435463
print("************ SAVE RDS ***************")
436464
print("*************************************")
465+
dev.off() # Close the PDF file
437466
job_list <- append(job_list, "SAVE RDS")
438467
input_size_list <- append(input_size_list, object.size(pbmc))
439468
start <- proc.time()

0 commit comments

Comments
 (0)