Skip to content

Commit f2950d2

Browse files
authored
Merge pull request #148 from myushen/master
solve corrupted pseudobulk assays
2 parents 55ca4a9 + 86ef5a0 commit f2950d2

File tree

3 files changed

+46
-3
lines changed

3 files changed

+46
-3
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: CuratedAtlasQueryR
33
Title: Queries the Human Cell Atlas
4-
Version: 1.3.6
4+
Version: 1.3.7
55
Authors@R: c(
66
person(
77
"Stefano",

R/counts.R

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ COUNTS_VERSION <- "0.2.1"
2929
#' @noRd
3030
pseudobulk_url <- single_line_str(
3131
"https://object-store.rc.nectar.org.au/v1/
32-
AUTH_06d6e008e3e642da99d806ba3ea629c5/pseudobulk-0.1.0"
32+
AUTH_06d6e008e3e642da99d806ba3ea629c5/pseudobulk-0.1.1"
3333
)
3434

3535

@@ -332,7 +332,9 @@ group_to_data_container <- function(i, df, dir_prefix, features, grouping_column
332332
select(-dplyr::all_of(intersect(names(df), cell_level_anno))) |>
333333
distinct() |>
334334
mutate(
335-
sample_identifier = glue("{sample_}___{cell_type_harmonised}"),
335+
sample_identifier = ifelse(file_id %in% file_ids,
336+
glue("{sample_}___{cell_type_harmonised}___{disease}___{is_primary_data_x}"),
337+
glue("{sample_}___{cell_type_harmonised}")),
336338
original_sample_id = .data$sample_identifier
337339
) |>
338340
column_to_rownames("original_sample_id")
@@ -351,6 +353,23 @@ group_to_data_container <- function(i, df, dir_prefix, features, grouping_column
351353
}
352354
}
353355

356+
#' A temporary solution for get_pseudobulk duplicated rownames due to column
357+
#' `disease` and `is_primary_data` columns are not included in `sample_` in the metadata.
358+
#' @noRd
359+
# file_ids that are corrupted
360+
file_ids <- c(
361+
"b50b15f1-bf19-4775-ab89-02512ec941a6",
362+
"bffedc04-5ba1-46d4-885c-989a294bedd4",
363+
"cc3ff54f-7587-49ea-b197-1515b6d98c4c",
364+
"0af763e1-0e2f-4de6-9563-5abb0ad2b01e",
365+
"51f114ae-232a-4550-a910-934e175db814",
366+
"327927c7-c365-423c-9ebc-07acb09a0c1a",
367+
"3ae36927-c188-4511-88cc-572ee1edf906",
368+
"6ed2cdc2-dda8-4908-ad6c-cead9afee85e",
369+
"56e0359f-ee8d-4ba5-a51d-159a183643e5",
370+
"5c64f247-5b7c-4842-b290-65c722a65952"
371+
)
372+
354373
#' Synchronises one or more remote assays with a local copy
355374
#' @param url A character vector of length one. The base HTTP URL from which to
356375
#' obtain the files.

tests/testthat/test-query.R

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,4 +229,28 @@ test_that("get_pseudobulk() syncs appropriate files", {
229229
expect_gt(1)
230230
})
231231

232+
test_that("get_pseudobulk() syncs appropriate fixed file", {
233+
temp <- tempfile()
234+
ids <- c(
235+
"b50b15f1-bf19-4775-ab89-02512ec941a6",
236+
"bffedc04-5ba1-46d4-885c-989a294bedd4",
237+
"cc3ff54f-7587-49ea-b197-1515b6d98c4c",
238+
"0af763e1-0e2f-4de6-9563-5abb0ad2b01e",
239+
"51f114ae-232a-4550-a910-934e175db814",
240+
"327927c7-c365-423c-9ebc-07acb09a0c1a",
241+
"3ae36927-c188-4511-88cc-572ee1edf906",
242+
"6ed2cdc2-dda8-4908-ad6c-cead9afee85e",
243+
"56e0359f-ee8d-4ba5-a51d-159a183643e5",
244+
"5c64f247-5b7c-4842-b290-65c722a65952"
245+
)
246+
meta <- get_metadata(cache_directory = temp) |> dplyr::filter(file_id %in% ids)
247+
248+
# The remote dataset should have many genes
249+
sme <- get_pseudobulk(meta, cache_directory = temp)
250+
sme |>
251+
row.names() |>
252+
length() |>
253+
expect_gt(1)
254+
})
255+
232256

0 commit comments

Comments
 (0)