Skip to content

Commit

Permalink
tests/testthat/test-query-condition.R
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 10, 2024
1 parent f95efe5 commit 4855c0e
Show file tree
Hide file tree
Showing 4 changed files with 234 additions and 31 deletions.
18 changes: 10 additions & 8 deletions apis/r/R/QueryCondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,12 @@ parse_query_condition_new <- function(
somactx
) {

stopifnot("The schema argument must be an Arrow Schema" =
is(schema, "ArrowObject") &&
is(schema, "Schema"))
stopifnot(
"The schema argument must be an Arrow Schema" =
is(schema, "ArrowObject") &&
is(schema, "Schema"),
"The argument must be a somactx object" =
is(somactx, "externalptr"))

# ----------------------------------------------------------------
# Helpers for walking the parse tree
Expand Down Expand Up @@ -124,7 +127,6 @@ parse_query_condition_new <- function(
r_op_name <- tolower(as.character(node[1]))
tdb_op_name <- if (r_op_name == "%in%") "IN" else "NOT_IN"

# XXX EXTRACT HELPER
arrow_field <- schema[[attr_name]]
if (is.null(arrow_field)) {
.error_function("No attribute '", attr_name, "' is present.", call. = FALSE)
Expand Down Expand Up @@ -168,8 +170,8 @@ parse_query_condition_new <- function(
ascii = rhs_text,
utf8 = rhs_text,
bool = as.logical(rhs_text),
## XXX DATETIME_MS = as.POSIXct(rhs_text),
## XXX DATETIME_DAY = as.Date(rhs_text),
date32 = as.POSIXct(rhs_text),
timestamp = as.Date(rhs_text),
as.numeric(rhs_text)),
arrow_type_name = arrow_type_name,
op_name = .map_op_to_character(op_name),
Expand Down Expand Up @@ -201,12 +203,12 @@ setClass(
# ================================================================
#' Creates a 'tiledbsoma_query_condition' object
#'
#' @param ctx (optional) A TileDB Ctx object; if not supplied the default
#' @param somactx (optional) A TileDB Ctx object; if not supplied the default
#' context object is retrieved
#' @return A 'tiledbsoma_query_condition' object
#' @export
tiledbsoma_empty_query_condition <- function(somactx) {
stopifnot("The argument must be a ctx object" = is(ctx, "externalptr"))
stopifnot("The argument must be a somactx object" = is(somactx, "externalptr"))
ptr <- libtiledbsoma_empty_query_condition(somactx)
query_condition <- new("tiledbsoma_query_condition", ptr = ptr, init = FALSE)
invisible(query_condition)
Expand Down
26 changes: 13 additions & 13 deletions apis/r/src/query_condition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ void libtiledbsoma_query_condition_from_triple(
uint64_t cond_val_size = sizeof(float);
query_cond->init(attr_name, (void*)&v, cond_val_size, op);

} else if (arrow_type_name == "ascii" || arrow_type_name == "utf8") {
} else if (arrow_type_name == "utf8" || arrow_type_name == "large_utf8") {
std::string v = Rcpp::as<std::string>(condition_value);
query_cond->init(attr_name, v, op);

Expand All @@ -139,18 +139,18 @@ void libtiledbsoma_query_condition_from_triple(
uint64_t cond_val_size = sizeof(bool);
query_cond->init(attr_name, (void*)&v, cond_val_size, op);

// XXX FIXME
// } else if (arrow_type_name == "DATETIME_MS") {
// int64_t v = static_cast<int64_t>(
// Rcpp::as<double>(condition_value) * 1000);
// uint64_t cond_val_size = sizeof(int64_t);
// query_cond->init(attr_name, (void*)&v, cond_val_size, op);

// } else if (arrow_type_name == "DATETIME_DAY") {
// int64_t v =
// static_cast<int64_t>(Rcpp::as<double>(condition_value));
// uint64_t cond_val_size = sizeof(int64_t);
// query_cond->init(attr_name, (void*)&v, cond_val_size, op);
} else if (arrow_type_name == "timestamp") {
// Arrow timestamp TileDB DATETIME_MS
int64_t v = static_cast<int64_t>(
Rcpp::as<double>(condition_value) * 1000);
uint64_t cond_val_size = sizeof(int64_t);
query_cond->init(attr_name, (void*)&v, cond_val_size, op);

} else if (arrow_type_name == "date32") {
// Arrow date32 TileDB DATETIME_DAY
int64_t v = static_cast<int64_t>(Rcpp::as<double>(condition_value));
uint64_t cond_val_size = sizeof(int64_t);
query_cond->init(attr_name, (void*)&v, cond_val_size, op);

} else {
Rcpp::stop(
Expand Down
206 changes: 206 additions & 0 deletions apis/r/tests/testthat/test-query-condition.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
test_that("DataFrame Factory", {
uri <- tempfile()
if (dir.exists(uri)) unlink(uri, recursive=TRUE)

ctx <- soma_context()

sch <- arrow::schema(
arrow::field("soma_joinid", arrow::int64()),
arrow::field("int8", arrow::int8()),
arrow::field("int16", arrow::int16()),
arrow::field("int32", arrow::int32()),
arrow::field("int64", arrow::int64()),
arrow::field("uint8", arrow::uint8()),
arrow::field("uint16", arrow::uint16()),
arrow::field("uint32", arrow::uint32()),
arrow::field("uint64", arrow::uint64()),
arrow::field("string", arrow::string()),
arrow::field("large_utf8", arrow::large_utf8()),
arrow::field("enum",
arrow::dictionary(
index_type = arrow::int8(),
value_type = arrow::utf8(),
ordered = TRUE)),
arrow::field("float32", arrow::float32()),
arrow::field("float64", arrow::float64())
# TODO: for a follow-up PR
# arrow::field("timestamp_s", arrow::timestamp(unit="s")),
# arrow::field("timestamp_ms", arrow::timestamp(unit="ms")),
# arrow::field("timestamp_us", arrow::timestamp(unit="us")),
# arrow::field("timestamp_ns", arrow::timestamp(unit="ns"))
# Not supported in libtiledbsoma
# arrow::field("datetime_day", arrow::date32())
)

sdf <- SOMADataFrameCreate(uri, sch, index_column_names = "soma_joinid")
expect_true(sdf$exists())
expect_true(dir.exists(uri))

tbl <- arrow::arrow_table(
soma_joinid = 1L:10L,
int8 = -11L:-20L,
int16 = -201L:-210L,
int32 = -301L:-310L,
int64 = -401L:-410L,
uint8 = 11L:20L,
uint16 = 201L:210L,
uint32 = 301L:310L,
uint64 = 401L:410L,
string = c("apple", "ball", "cat", "dog", "egg", "fig", "goose", "hay", "ice", "jam"),
large_utf8 = c("APPLE", "BALL", "CAT", "DOG", "EGG", "FIG", "GOOSE", "HAY", "ICE", "JAM"),
enum = factor(
c("red", "yellow", "green", "red", "red", "red", "yellow", "green", "red", "green"),
levels = c("red", "yellow", "green")),
float32 = 1.5:10.5,
float64 = 11.5:20.5,
# TODO: for a follow-up PR
# timestamp_s = as.POSIXct(as.numeric(3600 + 1:10), tz="GMT"),
# timestamp_ms = as.POSIXct(as.numeric(3600*1000 + 1:10), tz="GMT"),
# timestamp_us = as.POSIXct(as.numeric(3600*1000*1000 + 1:10), tz="GMT"),
# timestamp_ns = as.POSIXct(as.numeric(3600*1000*1000*1000 + 1:10), tz="GMT"),
schema = sch)
sdf$write(tbl)
sdf$close()

sdf$reopen("READ")

good_cases <- list(
'soma_joinid > 5' = function(df) {
expect_equal(df$soma_joinid, 6:10)
expect_equal(df$int32, -306:-310)
},
'soma_joinid == 10' = function(df) {
expect_equal(df$soma_joinid, 10)
expect_equal(df$int32, -310)
expect_equal(as.character(df$enum), c("green"))
},
'soma_joinid > 4 && soma_joinid < 8' = function(df) {
expect_equal(df$soma_joinid, 5:7)
expect_equal(df$string, c("egg", "fig", "goose"))
expect_equal(df$large_utf8, c("EGG", "FIG", "GOOSE"))
},
'soma_joinid < 4 || soma_joinid > 8' = function(df) {
expect_equal(df$soma_joinid, c(1:3, 9:10))
},

'int8 == 8' = function(df) {
expect_equal(length(df$soma_joinid), 0)
},
'int8 == -12' = function(df) {
expect_equal(df$soma_joinid, c(2))
},
'int16 > -203' = function(df) {
expect_equal(df$soma_joinid, c(1, 2))
},
'uint16 < 204' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 3))
},
'int32 > -303' = function(df) {
expect_equal(df$soma_joinid, c(1, 2))
},
'uint32 < 304' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 3))
},
'int64 > -403' = function(df) {
expect_equal(df$soma_joinid, c(1, 2))
},
'uint64 < 404' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 3))
},

'float32 < 4.5' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 3))
},
'float64 < 14.5' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 3))
},

'string == "dog"' = function(df) {
expect_equal(df$soma_joinid, c(4))
},
'string %in% c("fig", "dog")' = function(df) {
expect_equal(df$soma_joinid, c(4, 6))
},
'string %nin% c("fig", "dog")' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 3, 5, 7, 8, 9, 10))
},

'enum == "red"' = function(df) {
expect_equal(df$soma_joinid, c(1, 4, 5, 6, 9))
},
'enum != "red"' = function(df) {
expect_equal(df$soma_joinid, c(2, 3, 7, 8, 10))
},
'enum == "orange"' = function(df) {
expect_equal(length(df$soma_joinid), 0)
},
'enum != "orange"' = function(df) {
expect_equal(df$soma_joinid, 1:10)
},
'enum %in% c("red", "green")' = function(df) {
expect_equal(df$soma_joinid, c(1, 3, 4, 5, 6, 8, 9, 10))
},
'enum %nin% c("red", "green")' = function(df) {
expect_equal(df$soma_joinid, c(2, 7))
},
'enum %in% c("orange", "green")' = function(df) {
expect_equal(df$soma_joinid, c(3, 8, 10))
},
'enum %nin% c("orange", "green")' = function(df) {
expect_equal(df$soma_joinid, c(1, 2, 4, 5, 6, 7, 9))
},
'enum %in% c("orange", "purple")' = function(df) {
expect_equal(length(df$soma_joinid), 0)
},
'enum %nin% c("orange", "purple")' = function(df) {
expect_equal(df$soma_joinid, 1:10)
}

# TODO: for a follow-up PR
# 'timestamp_s < "1969-12-31 20:01:04 EST"' = function(df) {
# expect_equal(df$soma_joinid, 1:3)
# },
# 'timestamp_ms != "1970-02-11 11:00:05 EST"' = function(df) {
# expect_equal(df$soma_joinid, 1:10)
# },
# 'timestamp_us > "1970-01-01 00:00:01 GMT"' = function(df) {
# expect_equal(df$soma_joinid, 1:10)
# },
# 'timestamp_ns > "1970-01-01 00:00:01 GMT"' = function(df) {
# expect_equal(df$soma_joinid, 1:10)
# }
)

for (query_string in names(good_cases)) {
parsed <- do.call(
what = tiledbsoma:::parse_query_condition_new,
args = list(expr=str2lang(query_string), schema=sch, somactx=ctx))
clib_value_filter <- parsed@ptr

sr <- sr_setup(uri = sdf$uri, ctx, qc=clib_value_filter)
iter <- TableReadIter$new(sr)
tbl <- iter$read_next()
expect_true(iter$read_complete())
df <- as.data.frame(tbl)
# Call the validator
good_cases[[query_string]](df)
}

bad_cases <- list(
'',
' ',
'nonesuch < 10',
'soma_joinid << 10',
'soma_joinid',
'soma_joinid < 4 or soma_joinid > 8'
)

for (query_string in names(bad_cases)) {
expect_error(
do.call(
what = tiledbsoma:::parse_query_condition_new,
args = list(expr=str2lang(query_string), schema=sch, somactx=ctx)))
}

sdf$close()
})
15 changes: 5 additions & 10 deletions libtiledbsoma/test/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,12 @@ create_arrow_schema_and_index_columns(
// Create index-column info only, no schema involving the attrs
ArrowTable create_column_index_info(const std::vector<DimInfo>& dim_infos) {
for (auto info : dim_infos) {
LOG_DEBUG(fmt::format("create_column_index_info name={}", info.name));

LOG_DEBUG(fmt::format(
"create_column_index_info type={}",
tiledb::impl::to_str(info.tiledb_datatype)));

LOG_DEBUG(
fmt::format("create_column_index_info dim_max={}", info.dim_max));

LOG_DEBUG(fmt::format(
"create_column_index_info ucd={}", info.use_current_domain));
"create_column_index_info name={} type={} dim_max={} ucd={}",
info.name,
tiledb::impl::to_str(info.tiledb_datatype),
info.dim_max,
info.use_current_domain));
}

auto index_cols_info_schema = _create_index_cols_info_schema(dim_infos);
Expand Down

0 comments on commit 4855c0e

Please sign in to comment.