Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Customise filter by nFeature_RNA threshold #54

Closed
wants to merge 52 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
59b2453
update test
Dec 18, 2023
7f08b16
fix conflicts
Jan 17, 2024
63087f1
Merge branch 'add_reports' of github.com:susansjy22/HPCell into add_r…
Jan 17, 2024
332bffc
update functions
Jan 19, 2024
53be3f1
Merge branch 'add_reports' of github.com:susansjy22/HPCell into add_r…
Jan 19, 2024
bff9dd1
update tests
Jan 19, 2024
1bd57ab
Update reports
susansjy22 Feb 17, 2024
543ee98
dummy
Feb 17, 2024
13a4a25
add reports
susansjy22 Feb 18, 2024
b1cc940
update reports
Feb 23, 2024
6e14148
update reports
susansjy22 Feb 23, 2024
e59012c
update reports
susansjy22 Feb 23, 2024
af8d6dd
merging reports
susansjy22 Feb 23, 2024
f83a64c
functional reports
Feb 24, 2024
21dac3d
Commit for merge
susansjy22 Feb 26, 2024
6a5668f
roxygen doc
susansjy22 Feb 26, 2024
1eff61f
update merge conflicts
susansjy22 Feb 26, 2024
d67dbe8
update functions
Feb 29, 2024
084fa06
merge changes stefano upstream branch
Feb 29, 2024
921c233
delete render-rmd.yml
susansjy22 Mar 1, 2024
9fb3d7d
dummy
susansjy22 Mar 1, 2024
d1f8963
merge upstream changes
susansjy22 Mar 1, 2024
368c07e
Seurat::as.seurat
susansjy22 Mar 1, 2024
83ba3c8
add SummarizedExperiment and lme4 to imports
susansjy22 Mar 1, 2024
c0853fa
BiocGenerics::as.data.frame
susansjy22 Mar 3, 2024
7cdcd7b
Update namespace and doc
susansjy22 Mar 3, 2024
e6a4160
update namespace and doc
susansjy22 Mar 3, 2024
05078e5
update github checks
Mar 5, 2024
a89c706
update function
Mar 14, 2024
e0c3c03
pull changes from remote
Mar 14, 2024
66b9efb
pull changes
Mar 14, 2024
de051ad
pull changes
Mar 14, 2024
b9197d1
update add reports branch with master
Mar 14, 2024
8f3aaab
edit github checks
susansjy22 Mar 15, 2024
6e7f2ae
fix pull conflicts
susansjy22 Mar 15, 2024
c35ea3c
edit readme remote
susansjy22 Mar 15, 2024
d663922
update add_reports
susansjy22 Mar 15, 2024
ad1205b
Merge branch 'add_reports' of github.com:susansjy22/HPCell into add_r…
Mar 19, 2024
6e637c0
fix HPCell:: issue
Mar 19, 2024
af792d5
update pipeline reports(fibrosis)
Mar 20, 2024
d0532d8
pass github checks
susansjy22 Mar 26, 2024
ba51ac7
fix github checks
susansjy22 Mar 27, 2024
03dab34
Merge remote
susansjy22 Mar 27, 2024
32f4a59
update package description
Mar 27, 2024
eaf2121
Remove pseudobulk_preprocessing from functions
Mar 28, 2024
ba88292
Merge branch 'master' of github.com:susansjy22/HPCell
Mar 28, 2024
f5ae15f
edit function examples
Mar 28, 2024
352bfab
edit utilities examples
Mar 28, 2024
f8fcdbd
document functions
myushen Apr 4, 2024
2b6318a
remove assays in aggregate_cells in create_pseudobulk_sample function
myushen Apr 5, 2024
2fa7f89
filter empty droplets checkpoint based on nFeature_RNA
myushen Apr 11, 2024
e69d5cf
fix bracket
myushen Apr 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 0 additions & 30 deletions .github/workflows/render-rmd.yml

This file was deleted.

2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Imports:
tidybulk,
tidySummarizedExperiment,
tidySingleCellExperiment,
SummarizedExperiment,
edgeR,
sctransform (>= 0.3.3),
Seurat (>= 5.0.0),
Matrix (>= 1.6),
Expand Down
15 changes: 9 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# Generated by roxygen2: do not edit by hand

export(add_RNA_assay)
export(alive_identification)
export(annotation_consensus)
export(annotation_label_transfer)
export(calc_UMAP)
export(cell_cycle_scoring)
export(create_pseudobulk)
export(doublet_identification)
export(get_unique_tissues)
export(map2_test_differential_abundance_hpc)
export(map_add_dispersion_to_se)
export(map_split_sce_by_gene)
Expand All @@ -13,7 +17,8 @@ export(map_split_se_by_number_of_genes)
export(map_test_differential_abundance)
export(non_batch_variation_removal)
export(preprocessing_output)
export(pseudobulk_preprocessing)
export(pseudobulk_merge)
export(reference_label_fine_id)
export(run_targets_pipeline)
export(seurat_to_ligand_receptor_count)
export(test_differential_abundance_hpc)
Expand Down Expand Up @@ -42,6 +47,7 @@ importFrom(CellChat,triMean)
importFrom(DropletUtils,barcodeRanks)
importFrom(DropletUtils,emptyDrops)
importFrom(EnsDb.Hsapiens.v86,EnsDb.Hsapiens.v86)
importFrom(Matrix,Matrix)
importFrom(S4Vectors,cbind)
importFrom(S4Vectors,metadata)
importFrom(Seurat,CellCycleScoring)
Expand All @@ -59,7 +65,7 @@ importFrom(Seurat,ScaleData)
importFrom(Seurat,VariableFeatures)
importFrom(Seurat,as.SingleCellExperiment)
importFrom(SingleR,SingleR)
importFrom(SummarizedExperiment,`rowData<-`)
importFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,rowData)
importFrom(celldex,BlueprintEncodeData)
Expand All @@ -78,10 +84,8 @@ importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(dplyr,with_groups)
importFrom(glmGamPoi,glm_gp)
importFrom(edgeR,estimateDisp)
importFrom(glue,glue)
importFrom(lme4,nobars)
importFrom(magrittr,"%$%")
importFrom(magrittr,extract2)
importFrom(purrr,map)
importFrom(purrr,map2)
Expand All @@ -95,7 +99,6 @@ importFrom(rlang,quo_name)
importFrom(scater,isOutlier)
importFrom(scuttle,logNormCounts)
importFrom(scuttle,perCellQCMetrics)
importFrom(stats,as.formula)
importFrom(stringr,str_detect)
importFrom(stringr,str_remove)
importFrom(stringr,str_remove_all)
Expand Down
6 changes: 3 additions & 3 deletions R/CellChat.R
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ get_table_for_cell_vs_axis_bubble_plot = function (object, sources.use = NULL, t
targets.use, sep = " -> ")
source.target.isolate <- setdiff(source.target, unique(df.net$source.target))
if (length(source.target.isolate) > 0) {
df.net.isolate <- as.data.frame(matrix(NA, nrow = length(source.target.isolate),
df.net.isolate <- BiocGenerics::as.data.frame(matrix(NA, nrow = length(source.target.isolate),
ncol = ncol(df.net)))
colnames(df.net.isolate) <- colnames(df.net)
df.net.isolate$source.target <- source.target.isolate
Expand Down Expand Up @@ -751,7 +751,7 @@ get_table_for_cell_vs_axis_bubble_plot = function (object, sources.use = NULL, t
source.target.isolate <- setdiff(source.target,
unique(df.net$source.target))
if (length(source.target.isolate) > 0) {
df.net.isolate <- as.data.frame(matrix(NA, nrow = length(source.target.isolate),
df.net.isolate <- BiocGenerics::as.data.frame(matrix(NA, nrow = length(source.target.isolate),
ncol = ncol(df.net)))
colnames(df.net.isolate) <- colnames(df.net)
df.net.isolate$source.target <- source.target.isolate
Expand Down Expand Up @@ -783,7 +783,7 @@ get_table_for_cell_vs_axis_bubble_plot = function (object, sources.use = NULL, t
df.net$prob <- -1/log(df.net$prob)
}
else {
df.net <- as.data.frame(matrix(NA, nrow = length(group.names),
df.net <- BiocGenerics::as.data.frame(matrix(NA, nrow = length(group.names),
ncol = 5))
colnames(df.net) <- c("interaction_name_2",
"source.target", "prob", "pval", "prob.original")
Expand Down
108 changes: 81 additions & 27 deletions R/execute_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@ run_targets_pipeline <- function(
tissue,
computing_resources = crew_controller_local(workers = 1),
debug_step = NULL,
filter_empty_droplets = TRUE,
filter_empty_droplets = NULL,
RNA_assay_name = "RNA",
sample_column = "sample"
sample_column = "sample",
cell_type_annotation_column = "Cell_type_in_each_tissue"
){

sample_column = enquo(sample_column)
# cell_type_annotation_column = enquo(cell_type_annotation_column)

# Save inputs for passing to targets pipeline
# input_data |> CHANGE_ASSAY |> saveRDS("input_file.rds")
Expand All @@ -42,6 +44,7 @@ run_targets_pipeline <- function(
computing_resources |> saveRDS("temp_computing_resources.rds")
filter_empty_droplets |> saveRDS("filter_empty_droplets.rds")
sample_column |> saveRDS("sample_column.rds")
cell_type_annotation_column |> saveRDS("cell_type_annotation_column.rds")
# Write pipeline to a file
tar_script({

Expand Down Expand Up @@ -106,7 +109,7 @@ run_targets_pipeline <- function(

# library(future)
# library("future.batchtools")
# slurm <-
# slurm <-
# `batchtools_slurm` |>
# future::tweak( template = glue("/stornext/Bioinf/data/bioinf-data/Papenfuss_lab_projects/people/mangiola.s/third_party_sofware/slurm_batchtools.tmpl"),
# resources=list(
Expand All @@ -116,8 +119,8 @@ run_targets_pipeline <- function(
# )
# )
# plan(slurm)
# small_slurm =

# small_slurm =
# tar_resources(
# future = tar_resources_future(
# plan = tweak(
Expand All @@ -131,8 +134,8 @@ run_targets_pipeline <- function(
# )
# )
# )
#
# big_slurm =
#
# big_slurm =
# tar_resources(
# future = tar_resources_future(
# plan = tweak(
Expand All @@ -154,7 +157,8 @@ run_targets_pipeline <- function(
tar_target(reference_file, readRDS("input_reference.rds")),
tar_target(tissue_file, readRDS("tissue.rds")),
tar_target(filtered_file, readRDS("filter_empty_droplets.rds")),
tar_target(sample_column_file, readRDS("sample_column.rds")))
tar_target(sample_column_file, readRDS("sample_column.rds")),
tar_target(cell_type_annotation_column_file, readRDS("cell_type_annotation_column.rds")))

#-----------------------#
# Pipeline
Expand All @@ -168,21 +172,31 @@ run_targets_pipeline <- function(
# tarchetypes::tar_files(name= reference_track,
# read_reference_file,
# deployment = "main"),
tar_target(do_filter_empty_droplets, filtered_file, deployment = "main"),
tar_target(tissue_type, tissue_file, deployment = "main"),
tar_target(sample_column_name, sample_column_file, deployment = "main"),
tar_target(reference_label_coarse, reference_label_coarse_id(tissue_type), deployment = "main"),
tar_target(reference_label_fine, reference_label_fine_id(tissue_type), deployment = "main"),
tar_target(filter_empty_droplets, filtered_file, deployment = "main"),
tar_target(tissue, tissue_file, deployment = "main", ),
tar_target(sample_column, sample_column_file, deployment = "main"),
tar_target(cell_type_annotation_column, cell_type_annotation_column_file, deployment = "main"),
tar_target(reference_label_coarse, reference_label_coarse_id(tissue), deployment = "main"),
tar_target(reference_label_fine, reference_label_fine_id(tissue), deployment = "main"),
# Reading input files
tar_target(input_read, readRDS(read_file),
pattern = map(read_file),
iteration = "list", deployment = "main"),

tar_target(reference_read, switch((!is.null(reference_file)) + 1, NULL, readRDS(reference_file)), deployment = "main"),
tar_target(unique_tissues,
get_unique_tissues(input_read, sample_column |> quo_name()),
pattern = map(input_read),
iteration = "list", deployment = "main"),
# tar_target(
# tissue_subsets,
# input_read, split.by = "Tissue"),
# pattern = map(input_read),
# iteration = "list"
# ),
tar_target(reference_read, reference_file, deployment = "main"),

# Identifying empty droplets
tar_target(empty_droplets_tbl,
empty_droplet_id(input_read, do_filter_empty_droplets),
empty_droplet_id(input_read, filter_empty_droplets),
pattern = map(input_read),
iteration = "list"),

Expand Down Expand Up @@ -235,7 +249,7 @@ run_targets_pipeline <- function(
iteration = "list"),

# Pre-processing output
tar_target(preprocessing_output_S, preprocessing_output(tissue_type,
tar_target(preprocessing_output_S, preprocessing_output(tissue,
non_batch_variation_removal_S,
alive_identification_tbl,
cell_cycle_score_tbl,
Expand All @@ -249,22 +263,62 @@ run_targets_pipeline <- function(
iteration = "list"),

# pseudobulk preprocessing for each sample
tar_target(create_pseudobulk_sample, create_pseudobulk(preprocessing_output_S,
assays = "SCT",
x = c(Tissue, Cell_type_in_each_tissue)),
pattern = map(preprocessing_output_S),
tar_target(create_pseudobulk_sample, create_pseudobulk(preprocessing_output_S,
assays = "SCT",
cell_type_annotation_column,
x = c(tissue_general, cell_type)),
pattern = map(preprocessing_output_S),
iteration = "list"),

tar_target(pseudobulk_merge_all_samples, pseudobulk_merge(create_pseudobulk_sample,
assays = "RNA",
x = c(Tissue)),
iteration = "list"),

tar_target(calc_UMAP_dbl_report, calc_UMAP(input_read),
pattern = map(input_read),
x = c(tissue_general)),
iteration = "list")

# tar_target(calc_UMAP_dbl_report, calc_UMAP(input_read),
# pattern = map(input_read),
# iteration = "list"),
# tar_target(variable_gene_list, HPCell:::find_variable_genes(input_read,
# empty_droplets_tbl),
# pattern = map(input_read, empty_droplets_tbl),
# iteration = "list"),
#
# tar_render(
# name = empty_droplets_report, # The name of the target
# path = paste0(system.file(package = "HPCell"), "/rmd/Empty_droplet_report.Rmd"),
# params = list(x1 = tar_read(input_read, store = store),
# x2 = tar_read(empty_droplets_tbl, store = store),
# x3 = tar_read(annotation_label_transfer_tbl, store = store),
# x4 = tar_read(unique_tissues, store = store),
# x5 = sample_column |> quo_name())
# ),
# tar_render(
# name = doublet_identification_report,
# path = paste0(system.file(package = "HPCell"), "/rmd/Doublet_identification_report.Rmd"),
# params = list(x1 = input_read,
# x2 = calc_UMAP_dbl_report,
# x3 = doublet_identification_tbl,
# x4 = annotation_label_transfer_tbl,
# x5 = sample_column |> quo_name(),
# x6 = cell_type_annotation_column |> quo_name())
# ),
# tar_render(
# name = Technical_variation_report,
# path = paste0(system.file(package = "HPCell"), "/rmd/Technical_variation_report.Rmd"),
# params = list(x1= input_read,
# x2= empty_droplets_tbl,
# x3 = variable_gene_list,
# x4 = calc_UMAP_dbl_report,
# x5 = sample_column |> quo_name())
# ),
# tar_render(
# name = pseudobulk_processing_report,
# path = paste0(system.file(package = "HPCell"), "/rmd/pseudobulk_analysis_report.Rmd"),
# params = list(x1 = pseudobulk_merge_all_samples,
# x2 = sample_column |> quo_name(),
# x3 = cell_type_annotation_column |> quo_name())
# )
))

}, script = glue("{store}.R"), ask = FALSE)

#Running targets
Expand Down
Loading
Loading