Skip to content

Commit

Permalink
Merge pull request #148 from myushen/master
Browse files Browse the repository at this point in the history
solve corrupted pseudobulk assays
  • Loading branch information
stemangiola authored Jul 11, 2024
2 parents 55ca4a9 + 86ef5a0 commit f2950d2
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: CuratedAtlasQueryR
Title: Queries the Human Cell Atlas
Version: 1.3.6
Version: 1.3.7
Authors@R: c(
person(
"Stefano",
Expand Down
23 changes: 21 additions & 2 deletions R/counts.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ COUNTS_VERSION <- "0.2.1"
#' @noRd
pseudobulk_url <- single_line_str(
"https://object-store.rc.nectar.org.au/v1/
AUTH_06d6e008e3e642da99d806ba3ea629c5/pseudobulk-0.1.0"
AUTH_06d6e008e3e642da99d806ba3ea629c5/pseudobulk-0.1.1"
)


Expand Down Expand Up @@ -332,7 +332,9 @@ group_to_data_container <- function(i, df, dir_prefix, features, grouping_column
select(-dplyr::all_of(intersect(names(df), cell_level_anno))) |>
distinct() |>
mutate(
sample_identifier = glue("{sample_}___{cell_type_harmonised}"),
sample_identifier = ifelse(file_id %in% file_ids,
glue("{sample_}___{cell_type_harmonised}___{disease}___{is_primary_data_x}"),
glue("{sample_}___{cell_type_harmonised}")),
original_sample_id = .data$sample_identifier
) |>
column_to_rownames("original_sample_id")
Expand All @@ -351,6 +353,23 @@ group_to_data_container <- function(i, df, dir_prefix, features, grouping_column
}
}

#' A temporary solution for get_pseudobulk duplicated rownames due to column
#' `disease` and `is_primary_data` columns are not included in `sample_` in the metadata.
#' @noRd
# file_ids that are corrupted
file_ids <- c(
"b50b15f1-bf19-4775-ab89-02512ec941a6",
"bffedc04-5ba1-46d4-885c-989a294bedd4",
"cc3ff54f-7587-49ea-b197-1515b6d98c4c",
"0af763e1-0e2f-4de6-9563-5abb0ad2b01e",
"51f114ae-232a-4550-a910-934e175db814",
"327927c7-c365-423c-9ebc-07acb09a0c1a",
"3ae36927-c188-4511-88cc-572ee1edf906",
"6ed2cdc2-dda8-4908-ad6c-cead9afee85e",
"56e0359f-ee8d-4ba5-a51d-159a183643e5",
"5c64f247-5b7c-4842-b290-65c722a65952"
)

#' Synchronises one or more remote assays with a local copy
#' @param url A character vector of length one. The base HTTP URL from which to
#' obtain the files.
Expand Down
24 changes: 24 additions & 0 deletions tests/testthat/test-query.R
Original file line number Diff line number Diff line change
Expand Up @@ -229,4 +229,28 @@ test_that("get_pseudobulk() syncs appropriate files", {
expect_gt(1)
})

test_that("get_pseudobulk() syncs appropriate fixed file", {
temp <- tempfile()
ids <- c(
"b50b15f1-bf19-4775-ab89-02512ec941a6",
"bffedc04-5ba1-46d4-885c-989a294bedd4",
"cc3ff54f-7587-49ea-b197-1515b6d98c4c",
"0af763e1-0e2f-4de6-9563-5abb0ad2b01e",
"51f114ae-232a-4550-a910-934e175db814",
"327927c7-c365-423c-9ebc-07acb09a0c1a",
"3ae36927-c188-4511-88cc-572ee1edf906",
"6ed2cdc2-dda8-4908-ad6c-cead9afee85e",
"56e0359f-ee8d-4ba5-a51d-159a183643e5",
"5c64f247-5b7c-4842-b290-65c722a65952"
)
meta <- get_metadata(cache_directory = temp) |> dplyr::filter(file_id %in% ids)

# The remote dataset should have many genes
sme <- get_pseudobulk(meta, cache_directory = temp)
sme |>
row.names() |>
length() |>
expect_gt(1)
})


0 comments on commit f2950d2

Please sign in to comment.