From 410845d5f0a377440c098926bd4796f10d7d6008 Mon Sep 17 00:00:00 2001 From: William Hutchison Date: Wed, 11 Dec 2024 13:42:06 +1100 Subject: [PATCH 1/8] Fix incorrect variables passed to functions --- R/functions.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/functions.R b/R/functions.R index 9518b4e..a8945f2 100644 --- a/R/functions.R +++ b/R/functions.R @@ -978,7 +978,7 @@ non_batch_variation_removal <- function(input_read_RNA_assay, # avoid small number of cells if (!is.null(empty_droplets_tbl)) { - filtered_counts <- input_read_RNA_assay_transform |> + filtered_counts <- input_read_RNA_assay |> left_join(empty_droplets_tbl, by = ".cell") |> dplyr::filter(!empty_droplet) } @@ -1009,9 +1009,8 @@ non_batch_variation_removal <- function(input_read_RNA_assay, # Normalise RNA normalized_rna <- - input_read_RNA_assay |> - Seurat::SCTransform( - counts, + counts |> + Seurat::SCTransform( assay=assay, return.only.var.genes=FALSE, residual.features = NULL, From 993e2b3fbdd0a3d62e5e59da90b56cf2c5fe5ef2 Mon Sep 17 00:00:00 2001 From: William Hutchison Date: Wed, 11 Dec 2024 13:45:55 +1100 Subject: [PATCH 2/8] Create SCT assay before attempting assignment --- R/functions.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/functions.R b/R/functions.R index a8945f2..91a52ad 100644 --- a/R/functions.R +++ b/R/functions.R @@ -1108,9 +1108,11 @@ preprocessing_output <- function(input_read_RNA_assay, # Add normalisation if(!is.null(non_batch_variation_removal_S)){ - if(input_read_RNA_assay |> is("Seurat")) - input_read_RNA_assay[["SCT"]] = non_batch_variation_removal_S - else if(input_read_RNA_assay |> is("SingleCellExperiment")){ + if(input_read_RNA_assay |> is("Seurat")) { + non_batch_variation_removal_S_assay <- CreateAssay5Object(data = non_batch_variation_removal_S) + input_read_RNA_assay[["SCT"]] <- non_batch_variation_removal_S_assay + + } else if(input_read_RNA_assay |> is("SingleCellExperiment")){ message("HPCell says: in order to attach SCT assay to the SingleCellExperiment, SCT was added to external experiments slot") #input_read_RNA_assay = input_read_RNA_assay[rownames(non_batch_variation_removal_S), # altExp(input_read_RNA_assay) = SingleCellExperiment(assay = list(SCT = non_batch_variation_removal_S)) From 9211c512c776542b8faa05e3e523400b93c17da7 Mon Sep 17 00:00:00 2001 From: William Hutchison Date: Wed, 11 Dec 2024 14:09:44 +1100 Subject: [PATCH 3/8] Prevent error when cell annotations are unavailable --- R/functions.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/functions.R b/R/functions.R index 91a52ad..697f031 100644 --- a/R/functions.R +++ b/R/functions.R @@ -1146,10 +1146,12 @@ preprocessing_output <- function(input_read_RNA_assay, ) # Attach annotation - if (inherits(annotation_label_transfer_tbl, "tbl_df")){ - input_read_RNA_assay <- input_read_RNA_assay |> - left_join(annotation_label_transfer_tbl, by = ".cell") - } + try({ + if (inherits(annotation_label_transfer_tbl, "tbl_df")){ + input_read_RNA_assay <- input_read_RNA_assay |> + left_join(annotation_label_transfer_tbl, by = ".cell") + } + }, silent = TRUE) input_read_RNA_assay From 38804221b66a2af3bf100801d3a20624b1b7fc21 Mon Sep 17 00:00:00 2001 From: William Hutchison Date: Wed, 11 Dec 2024 14:16:00 +1100 Subject: [PATCH 4/8] Remove NaN features from SCT assay --- R/functions.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/functions.R b/R/functions.R index 697f031..8481b60 100644 --- a/R/functions.R +++ b/R/functions.R @@ -1021,15 +1021,17 @@ non_batch_variation_removal <- function(input_read_RNA_assay, min_cells=0, ) |> GetAssayData(assay="SCT") - - + + # Remove NaN features from SCT assay + normalized_rna_filtered <- normalized_rna[!apply(normalized_rna, 1, function(row) all(is.nan(row))), ] + if (class_input == "SingleCellExperiment") { - write_HDF5_array_safe(normalized_rna, "SCT", external_path) + write_HDF5_array_safe(normalized_rna_filtered, "SCT", external_path) } else if (class_input == "Seurat") { - normalized_rna + normalized_rna_filtered } From 369d98ba0fe12342760f696743b6e67fa5e2f8ae Mon Sep 17 00:00:00 2001 From: William Hutchison Date: Wed, 11 Dec 2024 14:30:52 +1100 Subject: [PATCH 5/8] Version up --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f5e27f1..1044cda 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: HPCell Title: Massively-parallel R native pipeline for single-cell analysis -Version: 0.3.11 +Version: 0.3.12 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Jiayi", "Si", email = "si.j@wehi.edu.au", From 1cab8530fc5cd9ed53af2b6494dfaded6f0332d2 Mon Sep 17 00:00:00 2001 From: William Hutchison Date: Wed, 11 Dec 2024 14:37:14 +1100 Subject: [PATCH 6/8] Remove undefined function export --- NAMESPACE | 1 - man/run_targets_pipeline.Rd | 57 ------------------------------------- 2 files changed, 58 deletions(-) delete mode 100644 man/run_targets_pipeline.Rd diff --git a/NAMESPACE b/NAMESPACE index e494269..a98c129 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -63,7 +63,6 @@ export(remove_dead_scuttle) export(remove_doublets_scDblFinder) export(remove_empty_DropletUtils) export(remove_empty_threshold) -export(run_targets_pipeline) export(save_experiment_data) export(score_cell_cycle_seurat) export(se_add_dispersion) diff --git a/man/run_targets_pipeline.Rd b/man/run_targets_pipeline.Rd deleted file mode 100644 index c1ba5b7..0000000 --- a/man/run_targets_pipeline.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/execute_pipeline.R -\name{run_targets_pipeline} -\alias{run_targets_pipeline} -\title{Run Targets Pipeline for HPCell} -\usage{ -run_targets_pipeline( - input_data, - store = "./", - input_reference = NULL, - tissue, - computing_resources = crew_controller_local(workers = 1), - debug_step = NULL, - filter_empty_droplets = NULL, - RNA_assay_name = "RNA", - sample_column = "sample", - cell_type_annotation_column = "Cell_type_in_each_tissue", - data_container_type -) -} -\arguments{ -\item{input_data}{Input data for the pipeline.} - -\item{store}{Directory path for storing the pipeline files.} - -\item{input_reference}{Optional reference data.} - -\item{tissue}{Tissue type for the analysis.} - -\item{computing_resources}{Configuration for computing resources.} - -\item{debug_step}{Optional step for debugging.} - -\item{filter_empty_droplets}{Flag to indicate if input filtering is needed.} - -\item{RNA_assay_name}{Name of the RNA assay.} - -\item{sample_column}{Column name for sample identification.} - -\item{cell_type_annotation_column}{Column name for cell type annotation in input data} - -\item{data_container_type}{A character vector of length one specifies the input data type.} - -\item{profiler}{Optional step for profilling. Default is FALSE -data type can be one of the following: anndata for annotated data mainly used in python. -sce_rds and seurat_rds for \code{SingleCellExperiment} and \code{Seurat} RDS format representively -seurat_rds for \code{Seurat} RDS format. -sce_hdf5 for \code{SingleCellExperiment} HDF5 format -seurat_hdf5 for \code{Seurat} HDF5 format} -} -\value{ -The output of the \code{targets} pipeline, typically a pre-processed data set. -} -\description{ -This function sets up and executes a \code{targets} pipeline for HPCell. It saves input data and configurations, -writes a pipeline script, and runs the pipeline using the 'targets' package. -} From 3772e8423d3c035eb627eed145cb07328b058b12 Mon Sep 17 00:00:00 2001 From: William Hutchison Date: Wed, 11 Dec 2024 14:37:32 +1100 Subject: [PATCH 7/8] Version up --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1044cda..e923ae2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: HPCell Title: Massively-parallel R native pipeline for single-cell analysis -Version: 0.3.12 +Version: 0.3.13 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Jiayi", "Si", email = "si.j@wehi.edu.au", From ccd92d6f92ad46fc3fd89068ca0a7709b47d4c19 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Thu, 12 Dec 2024 13:09:45 +1030 Subject: [PATCH 8/8] Stefano's changes made directly on Github --- R/functions.R | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/R/functions.R b/R/functions.R index 8481b60..22bf945 100644 --- a/R/functions.R +++ b/R/functions.R @@ -970,7 +970,7 @@ non_batch_variation_removal <- function(input_read_RNA_assay, # Rename assay assay_name_old = input_read_RNA_assay |> Assays() |> _[[1]] - input_read_RNA_assay_transform = input_read_RNA_assay |> + input_read_RNA_assay = input_read_RNA_assay |> RenameAssays( assay.name = assay_name_old, new.assay.name = assay) @@ -978,21 +978,23 @@ non_batch_variation_removal <- function(input_read_RNA_assay, # avoid small number of cells if (!is.null(empty_droplets_tbl)) { - filtered_counts <- input_read_RNA_assay |> + input_read_RNA_assay <- input_read_RNA_assay |> left_join(empty_droplets_tbl, by = ".cell") |> dplyr::filter(!empty_droplet) } - - counts = - filtered_counts |> + + if (!is.null(alive_identification_tbl)) { + input_read_RNA_assay = + input_read_RNA_assay |> left_join( alive_identification_tbl |> select(.cell, any_of(factors_to_regress)), by=".cell" ) + } if(!is.null(cell_cycle_score_tbl)) - counts = counts |> + input_read_RNA_assay = input_read_RNA_assay |> left_join( cell_cycle_score_tbl |> @@ -1005,11 +1007,11 @@ non_batch_variation_removal <- function(input_read_RNA_assay, # variable_features = readRDS(input_path_merged_variable_genes) # # # Set variable features - # VariableFeatures(counts) = variable_features + # VariableFeatures(input_read_RNA_assay) = variable_features # Normalise RNA - normalized_rna <- - counts |> + input_read_RNA_assay <- + input_read_RNA_assay |> Seurat::SCTransform( assay=assay, return.only.var.genes=FALSE, @@ -1018,20 +1020,23 @@ non_batch_variation_removal <- function(input_read_RNA_assay, vst.flavor = "v2", scale_factor=2186, conserve.memory=T, - min_cells=0, + min_cells=0 ) |> GetAssayData(assay="SCT") - - # Remove NaN features from SCT assay - normalized_rna_filtered <- normalized_rna[!apply(normalized_rna, 1, function(row) all(is.nan(row))), ] if (class_input == "SingleCellExperiment") { - - write_HDF5_array_safe(normalized_rna_filtered, "SCT", external_path) + + if(input_read_RNA_assay[,1,drop=FALSE] |> is.nan() |> any()) + warning("HPCell says: some features might be all 0s, NaN are added by Seurat in the SCT assay, and kept in the assay because SingleCellExperiment requires same feature set for all assays.") + + write_HDF5_array_safe(input_read_RNA_assay, "SCT", external_path) } else if (class_input == "Seurat") { - - normalized_rna_filtered + + # Remove NaN features from SCT assay + input_read_RNA_assay <- input_read_RNA_assay[!apply(input_read_RNA_assay, 1, function(row) all(is.nan(row))), ] + + input_read_RNA_assay }