MangiolaLaboratory · myushen · Dec 18, 2023 · Jan 17, 2024 · Jan 17, 2024 · Jan 19, 2024
diff --git a/.github/workflows/render-rmd.yml b/.github/workflows/render-rmd.yml
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,6 +22,8 @@ Imports:
   tidybulk,
   tidySummarizedExperiment,
   tidySingleCellExperiment,
+  SummarizedExperiment, 
+  edgeR,
   sctransform (>= 0.3.3),
   Seurat (>= 5.0.0),
   Matrix (>= 1.6),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,10 +1,14 @@
 # Generated by roxygen2: do not edit by hand
 
+export(add_RNA_assay)
 export(alive_identification)
 export(annotation_consensus)
 export(annotation_label_transfer)
+export(calc_UMAP)
 export(cell_cycle_scoring)
+export(create_pseudobulk)
 export(doublet_identification)
+export(get_unique_tissues)
 export(map2_test_differential_abundance_hpc)
 export(map_add_dispersion_to_se)
 export(map_split_sce_by_gene)
@@ -13,7 +17,8 @@ export(map_split_se_by_number_of_genes)
 export(map_test_differential_abundance)
 export(non_batch_variation_removal)
 export(preprocessing_output)
-export(pseudobulk_preprocessing)
+export(pseudobulk_merge)
+export(reference_label_fine_id)
 export(run_targets_pipeline)
 export(seurat_to_ligand_receptor_count)
 export(test_differential_abundance_hpc)
@@ -42,6 +47,7 @@ importFrom(CellChat,triMean)
 importFrom(DropletUtils,barcodeRanks)
 importFrom(DropletUtils,emptyDrops)
 importFrom(EnsDb.Hsapiens.v86,EnsDb.Hsapiens.v86)
+importFrom(Matrix,Matrix)
 importFrom(S4Vectors,cbind)
 importFrom(S4Vectors,metadata)
 importFrom(Seurat,CellCycleScoring)
@@ -59,7 +65,7 @@ importFrom(Seurat,ScaleData)
 importFrom(Seurat,VariableFeatures)
 importFrom(Seurat,as.SingleCellExperiment)
 importFrom(SingleR,SingleR)
-importFrom(SummarizedExperiment,`rowData<-`)
+importFrom(SummarizedExperiment,SummarizedExperiment)
 importFrom(SummarizedExperiment,assays)
 importFrom(SummarizedExperiment,rowData)
 importFrom(celldex,BlueprintEncodeData)
@@ -78,10 +84,8 @@ importFrom(dplyr,pull)
 importFrom(dplyr,rename)
 importFrom(dplyr,select)
 importFrom(dplyr,with_groups)
-importFrom(glmGamPoi,glm_gp)
+importFrom(edgeR,estimateDisp)
 importFrom(glue,glue)
-importFrom(lme4,nobars)
-importFrom(magrittr,"%$%")
 importFrom(magrittr,extract2)
 importFrom(purrr,map)
 importFrom(purrr,map2)
@@ -95,7 +99,6 @@ importFrom(rlang,quo_name)
 importFrom(scater,isOutlier)
 importFrom(scuttle,logNormCounts)
 importFrom(scuttle,perCellQCMetrics)
-importFrom(stats,as.formula)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_remove)
 importFrom(stringr,str_remove_all)

diff --git a/R/CellChat.R b/R/CellChat.R
@@ -684,7 +684,7 @@ get_table_for_cell_vs_axis_bubble_plot = function (object, sources.use = NULL, t
                            targets.use, sep = " -> ")
     source.target.isolate <- setdiff(source.target, unique(df.net$source.target))
     if (length(source.target.isolate) > 0) {
-      df.net.isolate <- as.data.frame(matrix(NA, nrow = length(source.target.isolate),
+      df.net.isolate <- BiocGenerics::as.data.frame(matrix(NA, nrow = length(source.target.isolate),
                                              ncol = ncol(df.net)))
       colnames(df.net.isolate) <- colnames(df.net)
       df.net.isolate$source.target <- source.target.isolate
@@ -751,7 +751,7 @@ get_table_for_cell_vs_axis_bubble_plot = function (object, sources.use = NULL, t
       source.target.isolate <- setdiff(source.target,
                                        unique(df.net$source.target))
       if (length(source.target.isolate) > 0) {
-        df.net.isolate <- as.data.frame(matrix(NA, nrow = length(source.target.isolate),
+        df.net.isolate <- BiocGenerics::as.data.frame(matrix(NA, nrow = length(source.target.isolate),
                                                ncol = ncol(df.net)))
         colnames(df.net.isolate) <- colnames(df.net)
         df.net.isolate$source.target <- source.target.isolate
@@ -783,7 +783,7 @@ get_table_for_cell_vs_axis_bubble_plot = function (object, sources.use = NULL, t
         df.net$prob <- -1/log(df.net$prob)
       }
       else {
-        df.net <- as.data.frame(matrix(NA, nrow = length(group.names),
+        df.net <- BiocGenerics::as.data.frame(matrix(NA, nrow = length(group.names),
                                        ncol = 5))
         colnames(df.net) <- c("interaction_name_2",
                               "source.target", "prob", "pval", "prob.original")

diff --git a/R/execute_pipeline.R b/R/execute_pipeline.R
@@ -27,12 +27,14 @@ run_targets_pipeline <- function(
     tissue,
     computing_resources = crew_controller_local(workers = 1), 
     debug_step = NULL,
-    filter_empty_droplets = TRUE, 
+    filter_empty_droplets = NULL, 
     RNA_assay_name = "RNA", 
-    sample_column = "sample"
+    sample_column = "sample", 
+    cell_type_annotation_column = "Cell_type_in_each_tissue"
 ){
 
   sample_column = enquo(sample_column)
+  # cell_type_annotation_column = enquo(cell_type_annotation_column)
 
   # Save inputs for passing to targets pipeline 
   # input_data |> CHANGE_ASSAY |> saveRDS("input_file.rds")
@@ -42,6 +44,7 @@ run_targets_pipeline <- function(
   computing_resources |> saveRDS("temp_computing_resources.rds")
   filter_empty_droplets |> saveRDS("filter_empty_droplets.rds")
   sample_column |> saveRDS("sample_column.rds")
+  cell_type_annotation_column |> saveRDS("cell_type_annotation_column.rds")
   # Write pipeline to a file
   tar_script({
 
@@ -106,7 +109,7 @@ run_targets_pipeline <- function(
 
     # library(future)
     # library("future.batchtools")
-    # slurm <- 
+    # slurm <-
     #     `batchtools_slurm` |>
     #     future::tweak( template = glue("/stornext/Bioinf/data/bioinf-data/Papenfuss_lab_projects/people/mangiola.s/third_party_sofware/slurm_batchtools.tmpl"),
     #                                  resources=list(
@@ -116,8 +119,8 @@ run_targets_pipeline <- function(
     #                                  )
     #     )
     # plan(slurm)
-    
-    # small_slurm = 
+
+    # small_slurm =
     #   tar_resources(
     #     future = tar_resources_future(
     #       plan = tweak(
@@ -131,8 +134,8 @@ run_targets_pipeline <- function(
     #       )
     #     )
     #   )
-    # 
-    # big_slurm = 
+    #
+    # big_slurm =
     #   tar_resources(
     #     future = tar_resources_future(
     #       plan = tweak(
@@ -154,7 +157,8 @@ run_targets_pipeline <- function(
       tar_target(reference_file, readRDS("input_reference.rds")), 
       tar_target(tissue_file, readRDS("tissue.rds")), 
       tar_target(filtered_file, readRDS("filter_empty_droplets.rds")), 
-      tar_target(sample_column_file, readRDS("sample_column.rds")))
+      tar_target(sample_column_file, readRDS("sample_column.rds")), 
+      tar_target(cell_type_annotation_column_file, readRDS("cell_type_annotation_column.rds")))
 
     #-----------------------#
     # Pipeline
@@ -168,21 +172,31 @@ run_targets_pipeline <- function(
       # tarchetypes::tar_files(name= reference_track,
       #                        read_reference_file, 
       #                        deployment = "main"),
-      tar_target(do_filter_empty_droplets, filtered_file, deployment = "main"),
-      tar_target(tissue_type, tissue_file, deployment = "main"),
-      tar_target(sample_column_name, sample_column_file, deployment = "main"),
-      tar_target(reference_label_coarse, reference_label_coarse_id(tissue_type), deployment = "main"), 
-      tar_target(reference_label_fine, reference_label_fine_id(tissue_type), deployment = "main"), 
+      tar_target(filter_empty_droplets, filtered_file, deployment = "main"),
+      tar_target(tissue, tissue_file, deployment = "main", ),
+      tar_target(sample_column, sample_column_file, deployment = "main"),
+      tar_target(cell_type_annotation_column, cell_type_annotation_column_file, deployment = "main"),
+      tar_target(reference_label_coarse, reference_label_coarse_id(tissue), deployment = "main"), 
+      tar_target(reference_label_fine, reference_label_fine_id(tissue), deployment = "main"), 
       # Reading input files
       tar_target(input_read, readRDS(read_file),
                  pattern = map(read_file),
                  iteration = "list", deployment = "main"),
-
-      tar_target(reference_read, switch((!is.null(reference_file)) + 1, NULL, readRDS(reference_file)), deployment = "main"),
+      tar_target(unique_tissues,
+                 get_unique_tissues(input_read, sample_column |> quo_name()),
+                 pattern = map(input_read),
+                 iteration = "list", deployment = "main"),
+      # tar_target(
+      #   tissue_subsets,
+      #   input_read, split.by = "Tissue"), 
+      #   pattern = map(input_read),
+      #   iteration = "list"
+      # ),
+      tar_target(reference_read, reference_file, deployment = "main"),
 
       # Identifying empty droplets
       tar_target(empty_droplets_tbl,
-                 empty_droplet_id(input_read, do_filter_empty_droplets),
+                 empty_droplet_id(input_read, filter_empty_droplets),
                  pattern = map(input_read),
                  iteration = "list"),
 
@@ -235,7 +249,7 @@ run_targets_pipeline <- function(
                  iteration = "list"),
 
       # Pre-processing output
-      tar_target(preprocessing_output_S, preprocessing_output(tissue_type,
+      tar_target(preprocessing_output_S, preprocessing_output(tissue,
                                                               non_batch_variation_removal_S,
                                                               alive_identification_tbl,
                                                               cell_cycle_score_tbl,
@@ -249,22 +263,62 @@ run_targets_pipeline <- function(
                  iteration = "list"),
 
       # pseudobulk preprocessing for each sample 
-      tar_target(create_pseudobulk_sample, create_pseudobulk(preprocessing_output_S, 
-                                                                   assays = "SCT", 
-                                                                   x = c(Tissue, Cell_type_in_each_tissue)), 
-                 pattern = map(preprocessing_output_S), 
+      tar_target(create_pseudobulk_sample, create_pseudobulk(preprocessing_output_S,
+                                                             assays = "SCT",
+                                                             cell_type_annotation_column,
+                                                             x = c(tissue_general, cell_type)),
+                 pattern = map(preprocessing_output_S),
                  iteration = "list"),
 
       tar_target(pseudobulk_merge_all_samples, pseudobulk_merge(create_pseudobulk_sample, 
                                                                 assays = "RNA", 
-                                                                x = c(Tissue)), 
-                 iteration = "list"),
-
-      tar_target(calc_UMAP_dbl_report, calc_UMAP(input_read), 
-                 pattern = map(input_read), 
+                                                                x = c(tissue_general)), 
                  iteration = "list")
+
+      # tar_target(calc_UMAP_dbl_report, calc_UMAP(input_read), 
+      #            pattern = map(input_read), 
+      #            iteration = "list"), 
+      # tar_target(variable_gene_list, HPCell:::find_variable_genes(input_read, 
+      #                                                    empty_droplets_tbl), 
+      #            pattern = map(input_read, empty_droplets_tbl), 
+      #            iteration = "list"),
+      # 
+      # tar_render(
+      #   name = empty_droplets_report, # The name of the target
+      #   path =  paste0(system.file(package = "HPCell"), "/rmd/Empty_droplet_report.Rmd"),
+      #   params = list(x1 = tar_read(input_read, store = store),
+      #                 x2 = tar_read(empty_droplets_tbl, store = store),
+      #                 x3 = tar_read(annotation_label_transfer_tbl, store = store),
+      #                 x4 = tar_read(unique_tissues, store = store),
+      #                 x5 = sample_column |> quo_name())
+      # ),
+      # tar_render(
+      #   name = doublet_identification_report,
+      #   path = paste0(system.file(package = "HPCell"), "/rmd/Doublet_identification_report.Rmd"),
+      #   params = list(x1 = input_read,
+      #                 x2 = calc_UMAP_dbl_report,
+      #                 x3 = doublet_identification_tbl,
+      #                 x4 = annotation_label_transfer_tbl,
+      #                 x5 = sample_column |> quo_name(),
+      #                 x6 = cell_type_annotation_column |> quo_name())
+      # ),
+      # tar_render(
+      #   name = Technical_variation_report,
+      #   path =  paste0(system.file(package = "HPCell"), "/rmd/Technical_variation_report.Rmd"),
+      #   params = list(x1= input_read,
+      #                 x2= empty_droplets_tbl,
+      #                 x3 = variable_gene_list,
+      #                 x4 = calc_UMAP_dbl_report, 
+      #                 x5 = sample_column |> quo_name())
+      # ),
+      # tar_render(
+      #   name = pseudobulk_processing_report,
+      #   path = paste0(system.file(package = "HPCell"), "/rmd/pseudobulk_analysis_report.Rmd"),
+      #   params = list(x1 = pseudobulk_merge_all_samples, 
+      #                 x2 = sample_column |> quo_name(), 
+      #                 x3 = cell_type_annotation_column |> quo_name())
+      # )
       ))
-
   }, script = glue("{store}.R"), ask = FALSE)
 
   #Running targets