From ce0dc826ea3847720d5855c30606e80262aa21ea Mon Sep 17 00:00:00 2001 From: XanthosXanthopoulos <38084549+XanthosXanthopoulos@users.noreply.github.com> Date: Tue, 4 Feb 2025 18:26:07 +0200 Subject: [PATCH] [c++] Migrate current domain resize to use `SOMAColumn` (#3643) * Migrate shape getter to SOMAColumn * Migrate shape upgrade checker to SOMAColumn * Migrate shape setter to SOMAColumn * Fix ndrectangle initialization * Update test assertions * Update attribute and dimension name getters * Migrate optional soma_joinid shape getters * Migrate dataframe domain setters * Fix the attribute column generation order whem metadata are missing * Add missing return statement * Replace const string with string_view * Migrate joinid shape setters to SOMAColumn * ArrowSchema generation improvement --- apis/python/tests/test_shape.py | 14 +- libtiledbsoma/src/soma/soma_array.cc | 658 +++++------------- libtiledbsoma/src/soma/soma_array.h | 23 +- libtiledbsoma/src/soma/soma_column.cc | 11 +- libtiledbsoma/src/soma/soma_dimension.cc | 111 ++- libtiledbsoma/src/utils/arrow_adapter.cc | 15 +- libtiledbsoma/src/utils/arrow_adapter.h | 5 +- libtiledbsoma/src/utils/common.h | 3 + .../test/unit_soma_geometry_dataframe.cc | 6 +- .../test/unit_soma_sparse_ndarray.cc | 4 +- 10 files changed, 306 insertions(+), 544 deletions(-) diff --git a/apis/python/tests/test_shape.py b/apis/python/tests/test_shape.py index 0894003a02..912c8ad651 100644 --- a/apis/python/tests/test_shape.py +++ b/apis/python/tests/test_shape.py @@ -123,7 +123,10 @@ def test_sparse_nd_array_basics( with tiledbsoma.SparseNDArray.open(uri, "w") as snda: (ok, msg) = snda.resize(new_shape, check_only=True) assert not ok - assert msg == "can_resize for soma_dim_0: new 50 < existing shape 100" + assert ( + msg + == "[can_resize] index-column name 'soma_dim_0': new upper 49 < old upper 99 (downsize is unsupported)" + ) # TODO: check draft spec # with pytest.raises(ValueError): with pytest.raises(tiledbsoma.SOMAError): @@ -175,7 +178,10 @@ def test_sparse_nd_array_basics( too_small = tuple(e - 1 for e in new_shape) (ok, msg) = snda.resize(too_small, check_only=True) assert not ok - assert msg == "can_resize for soma_dim_0: new 149 < existing shape 150" + assert ( + msg + == "[can_resize] index-column name 'soma_dim_0': new upper 148 < old upper 149 (downsize is unsupported)" + ) with tiledbsoma.SparseNDArray.open(uri, "w") as snda: (ok, msg) = snda.resize(new_shape, check_only=True) @@ -518,7 +524,7 @@ def _check_ndarray(ndarray, has_shapes, expected_shape): assert "dataframe already has a domain" in msg else: assert not ok - assert "new lower > new upper" in msg + assert "new lower 10 > new upper 4" in msg ok, msg = exp.obs.tiledbsoma_upgrade_domain([[0, 1]], check_only=True) if has_shapes: @@ -597,7 +603,7 @@ def _check_ndarray(ndarray, has_shapes, expected_shape): ) else: assert ( - "Not OK: can_resize for soma_dim_1: new 13713 < existing shape 13714" + "Not OK: [can_resize] index-column name 'soma_dim_1': new upper 13712 < old upper 13713 (downsize is unsupported)" in body ) diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 779fb40545..8c70753d3e 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -350,22 +350,27 @@ void SOMAArray::set_column_data( }; uint64_t SOMAArray::ndim() const { - return tiledb_schema()->domain().ndim(); + return std::count_if( + columns_.cbegin(), columns_.cend(), [](const auto& col) { + return col->isIndexColumn(); + }); } std::vector SOMAArray::dimension_names() const { std::vector result; - auto dimensions = tiledb_schema()->domain().dimensions(); - for (const auto& dim : dimensions) { - result.push_back(dim.name()); + for (const auto& column : + columns_ | std::views::filter( + [](const auto& col) { return col->isIndexColumn(); })) { + result.push_back(column->name()); } return result; } -bool SOMAArray::has_dimension_name(const std::string& name) const { - auto dimensions = tiledb_schema()->domain().dimensions(); - for (const auto& dim : dimensions) { - if (dim.name() == name) { +bool SOMAArray::has_dimension_name(std::string_view name) const { + for (const auto& column : + columns_ | std::views::filter( + [](const auto& col) { return col->isIndexColumn(); })) { + if (column->name() == name) { return true; } } @@ -374,10 +379,11 @@ bool SOMAArray::has_dimension_name(const std::string& name) const { std::vector SOMAArray::attribute_names() const { std::vector result; - auto schema = tiledb_schema(); - unsigned n = schema->attribute_num(); - for (unsigned i = 0; i < n; i++) { - result.push_back(schema->attribute(i).name()); + for (const auto& column : + columns_ | std::views::filter([](const auto& col) { + return !col->isIndexColumn(); + })) { + result.push_back(column->name()); } return result; } @@ -495,10 +501,7 @@ std::optional SOMAArray::timestamp() { // The domainish enum simply lets us re-use code which is common across // core domain, core current domain, and core non-empty domain. ArrowTable SOMAArray::_get_core_domainish(enum Domainish which_kind) { - int array_ndim = std::count_if( - columns_.begin(), columns_.end(), [](const auto& col) { - return col->isIndexColumn(); - }); + size_t array_ndim = static_cast(ndim()); auto arrow_schema = ArrowAdapter::make_arrow_schema_parent(array_ndim); auto arrow_array = ArrowAdapter::make_arrow_array_parent(array_ndim); @@ -689,8 +692,9 @@ StatusAndReason SOMAArray::_can_set_shape_helper( std::string function_name_for_messages) { // E.g. it's an error to try to upgrade_domain or resize specifying // a 3-D shape on a 2-D array. - auto arg_ndim = newshape.size(); - auto array_ndim = schema_->domain().ndim(); + size_t arg_ndim = newshape.size(); + size_t array_ndim = static_cast(ndim()); + if (array_ndim != arg_ndim) { return std::pair( false, @@ -738,22 +742,14 @@ StatusAndReason SOMAArray::_can_set_shape_helper( // // if the requested shape fits in the array's core domain, it's good to go // as a new shape. + // For new-style arrays, we need to additionally that the the requested + // shape (core current domain) isn't a downsize of the current one. auto domain_check = _can_set_shape_domainish_subhelper( - newshape, false, function_name_for_messages); + newshape, function_name_for_messages); if (!domain_check.first) { return domain_check; } - // For new-style arrays, we need to additionally that the the requested - // shape (core current domain) isn't a downsize of the current one. - if (has_shape) { - auto current_domain_check = _can_set_shape_domainish_subhelper( - newshape, true, function_name_for_messages); - if (!current_domain_check.first) { - return current_domain_check; - } - } - return std::pair(true, ""); } @@ -762,61 +758,34 @@ StatusAndReason SOMAArray::_can_set_shape_helper( // domain. StatusAndReason SOMAArray::_can_set_shape_domainish_subhelper( const std::vector& newshape, - bool check_current_domain, std::string function_name_for_messages) { - Domain domain = schema_->domain(); - - for (unsigned i = 0; i < domain.ndim(); i++) { - const auto& dim = domain.dimension(i); + std::optional + ndrect = has_current_domain() ? + std::make_optional( + tiledb::ArraySchemaExperimental::current_domain( + *ctx_->tiledb_ctx(), arr_->schema()) + .ndrectangle()) : + std::nullopt; + + size_t idx = 0; + for (const auto& column : + columns_ | std::views::filter( + [](const auto& col) { return col->isIndexColumn(); })) { + auto status = column->can_set_current_domain_slot( + ndrect, + std::vector({std::make_any>( + std::array({0, newshape[idx] - 1}))})); - const std::string& dim_name = dim.name(); + if (status.first == false) { + status.second = std::format( + "[{}] {}", function_name_for_messages, status.second); - // These methods are only for SOMA NDArrays, and any other arrays for - // which the indices are entirely int64. SOMA DataFrame objects, with - // multi-type dims, need to go through upgrade_domain -- and this is - // library-internal code, it's not the user's fault if we got here. - if (dim.type() != TILEDB_INT64) { - throw TileDBSOMAError(std::format( - "{}: internal error: expected {} dim to be {}; got {}", - function_name_for_messages, - dim_name, - tiledb::impl::type_to_str(TILEDB_INT64), - tiledb::impl::type_to_str(dim.type()))); + return status; } - if (check_current_domain) { - std::pair - cap = _core_current_domain_slot(dim_name); - int64_t old_dim_shape = cap.second + 1; - - if (newshape[i] < old_dim_shape) { - return std::pair( - false, - std::format( - "{} for {}: new {} < existing shape {}", - function_name_for_messages, - dim_name, - newshape[i], - old_dim_shape)); - } - - } else { - std::pair cap = _core_domain_slot( - dim_name); - int64_t old_dim_shape = cap.second + 1; - - if (newshape[i] > old_dim_shape) { - return std::pair( - false, - std::format( - "{} for {}: new {} < maxshape {}", - function_name_for_messages, - dim_name, - newshape[i], - old_dim_shape)); - } - } + ++idx; } + return std::pair(true, ""); } @@ -838,7 +807,6 @@ StatusAndReason SOMAArray::_can_set_soma_joinid_shape_helper( } else { // Resizing an array's existing current domain - if (!has_current_domain()) { return std::pair( false, @@ -849,14 +817,14 @@ StatusAndReason SOMAArray::_can_set_soma_joinid_shape_helper( } // OK if soma_joinid isn't a dim. - if (!has_dimension_name("soma_joinid")) { + if (!has_dimension_name(SOMA_JOINID)) { return std::pair(true, ""); } // Fail if the newshape isn't within the array's core current domain. if (must_already_have) { std::pair cur_dom_lo_hi = _core_current_domain_slot( - "soma_joinid"); + SOMA_JOINID); if (newshape < cur_dom_lo_hi.second) { return std::pair( false, @@ -869,7 +837,7 @@ StatusAndReason SOMAArray::_can_set_soma_joinid_shape_helper( } // Fail if the newshape isn't within the array's core (max) domain. - std::pair dom_lo_hi = _core_domain_slot("soma_joinid"); + std::pair dom_lo_hi = _core_domain_slot(SOMA_JOINID); if (newshape > dom_lo_hi.second) { return std::pair( false, @@ -914,25 +882,27 @@ void SOMAArray::_set_shape_helper( _check_dims_are_int64(); auto tctx = ctx_->tiledb_ctx(); - ArraySchema schema = arr_->schema(); - Domain domain = schema.domain(); ArraySchemaEvolution schema_evolution(*tctx); CurrentDomain new_current_domain(*tctx); - NDRectangle ndrect(*tctx, domain); + NDRectangle ndrect(*tctx, arr_->schema().domain()); - unsigned n = domain.ndim(); - if ((unsigned)newshape.size() != n) { + size_t array_ndim = static_cast(ndim()); + if (newshape.size() != array_ndim) { throw TileDBSOMAError(std::format( "[SOMAArray::resize]: newshape has dimension count {}; array has " "{} ", newshape.size(), - n)); + array_ndim)); } - for (unsigned i = 0; i < n; i++) { - ndrect.set_range( - domain.dimension(i).name(), 0, newshape[i] - 1); + size_t idx = 0; + for (const auto& column : + columns_ | std::views::filter( + [](const auto& col) { return col->isIndexColumn(); })) { + column->set_current_domain_slot( + ndrect, std::vector({0, newshape[idx] - 1})); + ++idx; } new_current_domain.set_ndrectangle(ndrect); @@ -966,9 +936,6 @@ void SOMAArray::_set_soma_joinid_shape_helper( } } - ArraySchema schema = arr_->schema(); - Domain domain = schema.domain(); - unsigned ndim = domain.ndim(); auto tctx = ctx_->tiledb_ctx(); ArraySchemaEvolution schema_evolution(*tctx); CurrentDomain new_current_domain(*tctx); @@ -977,121 +944,38 @@ void SOMAArray::_set_soma_joinid_shape_helper( // For upgrade: copy from the full/wide/max domain except for the // soma_joinid restriction. - NDRectangle ndrect(*tctx, domain); + NDRectangle ndrect(*tctx, arr_->schema().domain()); + auto soma_domain = get_soma_domain(); - for (unsigned i = 0; i < ndim; i++) { - const Dimension& dim = domain.dimension(i); - const std::string dim_name = dim.name(); - if (dim_name == "soma_joinid") { - if (dim.type() != TILEDB_INT64) { + for (const auto& column : + columns_ | std::views::filter([](const auto& col) { + return col->isIndexColumn(); + })) { + if (column->name() == SOMA_JOINID) { + if (column->domain_type().value() != TILEDB_INT64) { throw TileDBSOMAError(std::format( "{}: expected soma_joinid to be of type {}; got {}", function_name_for_messages, tiledb::impl::type_to_str(TILEDB_INT64), - tiledb::impl::type_to_str(dim.type()))); + tiledb::impl::type_to_str( + column->domain_type().value()))); } - ndrect.set_range(dim_name, 0, newshape - 1); - } else { - switch (dim.type()) { - case TILEDB_STRING_ASCII: - case TILEDB_STRING_UTF8: - case TILEDB_CHAR: - case TILEDB_GEOM_WKB: - case TILEDB_GEOM_WKT: - // See comments in soma_array.h. - ndrect.set_range(dim_name, "", "\x7f"); - break; - - case TILEDB_INT8: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - case TILEDB_BOOL: - case TILEDB_UINT8: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - case TILEDB_INT16: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - case TILEDB_UINT16: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - case TILEDB_INT32: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - case TILEDB_UINT32: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - case TILEDB_INT64: - case TILEDB_DATETIME_YEAR: - case TILEDB_DATETIME_MONTH: - case TILEDB_DATETIME_WEEK: - case TILEDB_DATETIME_DAY: - case TILEDB_DATETIME_HR: - case TILEDB_DATETIME_MIN: - case TILEDB_DATETIME_SEC: - case TILEDB_DATETIME_MS: - case TILEDB_DATETIME_US: - case TILEDB_DATETIME_NS: - case TILEDB_DATETIME_PS: - case TILEDB_DATETIME_FS: - case TILEDB_DATETIME_AS: - case TILEDB_TIME_HR: - case TILEDB_TIME_MIN: - case TILEDB_TIME_SEC: - case TILEDB_TIME_MS: - case TILEDB_TIME_US: - case TILEDB_TIME_NS: - case TILEDB_TIME_PS: - case TILEDB_TIME_FS: - case TILEDB_TIME_AS: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - case TILEDB_UINT64: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - case TILEDB_FLOAT32: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - case TILEDB_FLOAT64: - ndrect.set_range( - dim_name, - dim.domain().first, - dim.domain().second); - break; - default: - throw TileDBSOMAError(std::format( - "{}: internal error: unhandled type {} for {}.", - function_name_for_messages, - tiledb::impl::type_to_str(dim.type()), - dim_name)); + + if (column->type() != + soma_column_datatype_t::SOMA_COLUMN_DIMENSION) { + throw TileDBSOMAError(std::format( + "{}: expected soma_joinid type to be of type " + "SOMA_COLUMN_DIMENSION", + function_name_for_messages)); } + + column->set_current_domain_slot( + ndrect, std::vector({0, newshape - 1})); + } else { + column->set_current_domain_slot( + ndrect, + ArrowAdapter::get_table_any_column_by_name<2>( + soma_domain, column->name(), 0)); } } @@ -1102,13 +986,17 @@ void SOMAArray::_set_soma_joinid_shape_helper( // new soma_joinid value. CurrentDomain old_current_domain = ArraySchemaExperimental::current_domain( - *tctx, schema); + *tctx, arr_->schema()); NDRectangle ndrect = old_current_domain.ndrectangle(); - for (unsigned i = 0; i < ndim; i++) { - if (domain.dimension(i).name() == "soma_joinid") { - ndrect.set_range( - domain.dimension(i).name(), 0, newshape - 1); + for (const auto& column : + columns_ | std::views::filter([](const auto& col) { + return col->isIndexColumn(); + })) { + if (column->name() == SOMA_JOINID) { + column->set_current_domain_slot( + ndrect, std::vector({0, newshape - 1})); + break; } } @@ -1156,21 +1044,13 @@ StatusAndReason SOMAArray::_can_set_domain_helper( // domain isn't outside the core domain, which is immutable. For // old-style dataframes, if the requested domain fits in the array's // core domain, it's good to go as a new soma domain. - auto domain_check = _can_set_dataframe_domainish_subhelper( - newdomain, false, function_name_for_messages); - if (!domain_check.first) { - return domain_check; - } - // For new-style dataframes, we need to additionally that the the // requested soma domain (core current domain) isn't a downsize of the // current one. - if (has_current_domain()) { - auto current_domain_check = _can_set_dataframe_domainish_subhelper( - newdomain, true, function_name_for_messages); - if (!current_domain_check.first) { - return current_domain_check; - } + auto current_domain_check = _can_set_dataframe_domainish_subhelper( + newdomain, function_name_for_messages); + if (!current_domain_check.first) { + return current_domain_check; } return std::pair(true, ""); @@ -1180,140 +1060,49 @@ StatusAndReason SOMAArray::_can_set_domain_helper( // the user's requested soma domain against the core current domain or core // (max) domain. StatusAndReason SOMAArray::_can_set_dataframe_domainish_subhelper( - const ArrowTable& newdomain, - bool check_current_domain, - std::string function_name_for_messages) { - Domain domain = arr_->schema().domain(); - - ArrowArray* new_domain_array = newdomain.first.get(); - ArrowSchema* new_domain_schema = newdomain.second.get(); - - if (new_domain_schema->n_children != domain.ndim()) { + const ArrowTable& newdomain, std::string function_name_for_messages) { + if (newdomain.second->n_children != static_cast(ndim())) { return std::pair( false, std::format( "{}: requested domain has ndim={} but the dataframe has " "ndim={}", function_name_for_messages, - new_domain_schema->n_children, - domain.ndim())); + newdomain.second->n_children, + ndim())); } - if (new_domain_schema->n_children != new_domain_array->n_children) { + if (newdomain.second->n_children != newdomain.first->n_children) { return std::pair( false, std::format( "{}: internal coding error", function_name_for_messages)); } - for (unsigned i = 0; i < domain.ndim(); i++) { - const auto& dim = domain.dimension(i); + std::optional + ndrect = has_current_domain() ? + std::make_optional( + tiledb::ArraySchemaExperimental::current_domain( + *ctx_->tiledb_ctx(), arr_->schema()) + .ndrectangle()) : + std::nullopt; - StatusAndReason status_and_reason; + for (const auto& column : + columns_ | std::views::filter( + [](const auto& col) { return col->isIndexColumn(); })) { + auto status = column->can_set_current_domain_slot( + ndrect, + ArrowAdapter::get_table_any_column_by_name<2>( + newdomain, column->name(), 0)); - switch (dim.type()) { - case TILEDB_STRING_ASCII: - case TILEDB_STRING_UTF8: - case TILEDB_CHAR: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_string( - check_current_domain, newdomain, dim.name()); - break; - case TILEDB_BOOL: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string( - check_current_domain, newdomain, dim.name()); - break; - case TILEDB_INT8: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string< - int8_t>(check_current_domain, newdomain, dim.name()); - break; - case TILEDB_UINT8: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string< - uint8_t>(check_current_domain, newdomain, dim.name()); - break; - case TILEDB_INT16: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string< - int16_t>(check_current_domain, newdomain, dim.name()); - break; - case TILEDB_UINT16: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string< - uint16_t>(check_current_domain, newdomain, dim.name()); - break; - case TILEDB_INT32: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string< - int32_t>(check_current_domain, newdomain, dim.name()); - break; - case TILEDB_UINT32: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string< - uint32_t>(check_current_domain, newdomain, dim.name()); - break; - case TILEDB_INT64: - case TILEDB_DATETIME_YEAR: - case TILEDB_DATETIME_MONTH: - case TILEDB_DATETIME_WEEK: - case TILEDB_DATETIME_DAY: - case TILEDB_DATETIME_HR: - case TILEDB_DATETIME_MIN: - case TILEDB_DATETIME_SEC: - case TILEDB_DATETIME_MS: - case TILEDB_DATETIME_US: - case TILEDB_DATETIME_NS: - case TILEDB_DATETIME_PS: - case TILEDB_DATETIME_FS: - case TILEDB_DATETIME_AS: - case TILEDB_TIME_HR: - case TILEDB_TIME_MIN: - case TILEDB_TIME_SEC: - case TILEDB_TIME_MS: - case TILEDB_TIME_US: - case TILEDB_TIME_NS: - case TILEDB_TIME_PS: - case TILEDB_TIME_FS: - case TILEDB_TIME_AS: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string< - int64_t>(check_current_domain, newdomain, dim.name()); - break; - case TILEDB_UINT64: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string< - uint64_t>(check_current_domain, newdomain, dim.name()); - break; - case TILEDB_FLOAT32: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string( - check_current_domain, newdomain, dim.name()); - break; - case TILEDB_FLOAT64: - status_and_reason = - _can_set_dataframe_domainish_slot_checker_non_string< - double>(check_current_domain, newdomain, dim.name()); - break; - default: - throw TileDBSOMAError(std::format( - "{}: saw invalid TileDB type when attempting to cast " - "domain information: {}", - function_name_for_messages, - tiledb::impl::type_to_str(dim.type()))); - } + if (status.first == false) { + status.second = std::format( + "[{}] {}", function_name_for_messages, status.second); - if (status_and_reason.first == false) { - return std::pair( - false, - std::format( - "{} for {}: {}", - function_name_for_messages, - dim.name(), - status_and_reason.second)); + return status; } } + return std::pair(true, ""); } @@ -1341,136 +1130,32 @@ void SOMAArray::_set_domain_helper( } } - Domain domain = arr_->schema().domain(); - - ArrowArray* new_domain_array = newdomain.first.get(); - ArrowSchema* new_domain_schema = newdomain.second.get(); - - if (new_domain_schema->n_children != domain.ndim()) { + if (newdomain.second->n_children != static_cast(ndim())) { throw TileDBSOMAError(std::format( "{}: requested domain has ndim={} but the dataframe has " "ndim={}", function_name_for_messages, - new_domain_schema->n_children, - domain.ndim())); + newdomain.second->n_children, + ndim())); } - if (new_domain_schema->n_children != new_domain_array->n_children) { + if (newdomain.second->n_children != newdomain.first->n_children) { throw TileDBSOMAError(std::format( "{}: internal coding error", function_name_for_messages)); } auto tctx = ctx_->tiledb_ctx(); - NDRectangle ndrect(*tctx, domain); + NDRectangle ndrect(*tctx, arr_->schema().domain()); CurrentDomain new_current_domain(*tctx); ArraySchemaEvolution schema_evolution(*tctx); - for (unsigned i = 0; i < domain.ndim(); i++) { - const Dimension& dim = domain.dimension(i); - const std::string dim_name = dim.name(); - - switch (dim.type()) { - case TILEDB_STRING_ASCII: - case TILEDB_STRING_UTF8: - case TILEDB_CHAR: - case TILEDB_GEOM_WKB: - case TILEDB_GEOM_WKT: { - auto lo_hi = ArrowAdapter::get_table_string_column_by_index( - newdomain, i); - if (lo_hi[0] == "" && lo_hi[1] == "") { - // Don't care -> as big as possible. - // See comments in soma_array.h. - ndrect.set_range(dim_name, "", "\x7f"); - } else { - throw TileDBSOMAError(std::format( - "domain (\"{}\", \"{}\") cannot be set for " - "string index columns: please use " - "(\"\", \"\")", - lo_hi[0], - lo_hi[1])); - } - } break; - - case TILEDB_INT8: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - int8_t>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - case TILEDB_BOOL: - case TILEDB_UINT8: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - uint8_t>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - case TILEDB_INT16: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - int16_t>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - case TILEDB_UINT16: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - uint16_t>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - case TILEDB_INT32: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - int32_t>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - case TILEDB_UINT32: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - uint32_t>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - case TILEDB_INT64: - case TILEDB_DATETIME_YEAR: - case TILEDB_DATETIME_MONTH: - case TILEDB_DATETIME_WEEK: - case TILEDB_DATETIME_DAY: - case TILEDB_DATETIME_HR: - case TILEDB_DATETIME_MIN: - case TILEDB_DATETIME_SEC: - case TILEDB_DATETIME_MS: - case TILEDB_DATETIME_US: - case TILEDB_DATETIME_NS: - case TILEDB_DATETIME_PS: - case TILEDB_DATETIME_FS: - case TILEDB_DATETIME_AS: - case TILEDB_TIME_HR: - case TILEDB_TIME_MIN: - case TILEDB_TIME_SEC: - case TILEDB_TIME_MS: - case TILEDB_TIME_US: - case TILEDB_TIME_NS: - case TILEDB_TIME_PS: - case TILEDB_TIME_FS: - case TILEDB_TIME_AS: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - int64_t>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - case TILEDB_UINT64: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - uint64_t>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - case TILEDB_FLOAT32: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - float>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - case TILEDB_FLOAT64: { - auto lo_hi = ArrowAdapter::get_table_non_string_column_by_index< - double>(newdomain, i); - ndrect.set_range(dim_name, lo_hi[0], lo_hi[1]); - } break; - default: - throw TileDBSOMAError(std::format( - "{}: internal error: unhandled type {} for {}.", - function_name_for_messages, - tiledb::impl::type_to_str(dim.type()), - dim_name)); - } + for (const auto& column : + columns_ | std::views::filter( + [](const auto& col) { return col->isIndexColumn(); })) { + column->set_current_domain_slot( + ndrect, + ArrowAdapter::get_table_any_column_by_name<2>( + newdomain, column->name(), 0)); } new_current_domain.set_ndrectangle(ndrect); @@ -1502,10 +1187,13 @@ std::vector SOMAArray::_shape_via_tiledb_current_domain() { NDRectangle ndrect = current_domain.ndrectangle(); - for (auto dimension_name : dimension_names()) { - auto range = ndrect.range(dimension_name); - result.push_back(range[1] - range[0] + 1); + for (const auto& column : + columns_ | std::views::filter( + [](const auto& col) { return col->isIndexColumn(); })) { + auto current_domain = column->core_current_domain_slot(ndrect); + result.push_back(current_domain.second - current_domain.first + 1); } + return result; } @@ -1514,11 +1202,11 @@ std::vector SOMAArray::_shape_via_tiledb_domain() { _check_dims_are_int64(); std::vector result; - auto dimensions = schema_->domain().dimensions(); - - for (const auto& dim : dimensions) { - result.push_back( - dim.domain().second - dim.domain().first + 1); + for (const auto& column : + columns_ | std::views::filter( + [](const auto& col) { return col->isIndexColumn(); })) { + auto core_domain = column->core_domain_slot(); + result.push_back(core_domain.second - core_domain.first + 1); } return result; @@ -1536,66 +1224,50 @@ std::optional SOMAArray::_maybe_soma_joinid_maxshape() { std::optional SOMAArray::_maybe_soma_joinid_shape_via_tiledb_current_domain() { - const std::string dim_name = "soma_joinid"; - - auto dom = schema_->domain(); - if (!dom.has_dimension(dim_name)) { + if (!has_dimension_name(SOMA_JOINID)) { return std::nullopt; } - auto current_domain = _get_current_domain(); - if (current_domain.is_empty()) { - throw TileDBSOMAError("internal coding error"); - } - - auto t = current_domain.type(); - if (t != TILEDB_NDRECTANGLE) { - throw TileDBSOMAError("current_domain type is not NDRECTANGLE"); - } - - NDRectangle ndrect = current_domain.ndrectangle(); - - auto dim = dom.dimension(dim_name); - if (dim.type() != TILEDB_INT64) { + auto column = get_column(SOMA_JOINID); + if (column->domain_type().value() != TILEDB_INT64) { throw TileDBSOMAError(std::format( "expected {} dim to be {}; got {}", - dim_name, + SOMA_JOINID, tiledb::impl::type_to_str(TILEDB_INT64), - tiledb::impl::type_to_str(dim.type()))); + tiledb::impl::type_to_str(column->domain_type().value()))); } - auto range = ndrect.range(dim_name); - auto max = range[1] + 1; + auto max = column->core_current_domain_slot(*ctx_, *arr_).second + + 1; + return std::optional(max); } std::optional SOMAArray::_maybe_soma_joinid_shape_via_tiledb_domain() { - const std::string dim_name = "soma_joinid"; - - auto dom = schema_->domain(); - if (!dom.has_dimension(dim_name)) { + if (!has_dimension_name(SOMA_JOINID)) { return std::nullopt; } - auto dim = dom.dimension(dim_name); - if (dim.type() != TILEDB_INT64) { + auto column = get_column(SOMA_JOINID); + if (column->domain_type().value() != TILEDB_INT64) { throw TileDBSOMAError(std::format( "expected {} dim to be {}; got {}", - dim_name, + SOMA_JOINID, tiledb::impl::type_to_str(TILEDB_INT64), - tiledb::impl::type_to_str(dim.type()))); + tiledb::impl::type_to_str(column->domain_type().value()))); } - auto max = dim.domain().second + 1; + auto max = column->core_domain_slot().second + 1; return std::optional(max); } bool SOMAArray::_dims_are_int64() { - ArraySchema schema = arr_->schema(); - Domain domain = schema.domain(); - for (auto dimension : domain.dimensions()) { - if (dimension.type() != TILEDB_INT64) { + for (const auto& column : + columns_ | std::views::filter( + [](const auto& col) { return col->isIndexColumn(); })) { + if (column->type() != soma_column_datatype_t::SOMA_COLUMN_DIMENSION || + column->domain_type().value_or(TILEDB_ANY) != TILEDB_INT64) { return false; } } diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 30bba9571b..57a745334b 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -316,7 +316,7 @@ class SOMAArray : public SOMAObject { * * @return bool */ - bool has_dimension_name(const std::string& name) const; + bool has_dimension_name(std::string_view name) const; /** * @brief Get the name of each attribute. @@ -757,7 +757,7 @@ class SOMAArray : public SOMAObject { * domains. */ template - std::pair non_empty_domain_slot(const std::string& name) const { + std::pair non_empty_domain_slot(std::string_view name) const { return get_column(name)->non_empty_domain_slot(*arr_); } @@ -768,7 +768,7 @@ class SOMAArray : public SOMAObject { */ template std::optional> non_empty_domain_slot_opt( - const std::string& name) const { + std::string_view name) const { return get_column(name)->non_empty_domain_slot_opt(*ctx_, *arr_); } @@ -807,7 +807,7 @@ class SOMAArray : public SOMAObject { * @return Pair of [lower, upper] inclusive bounds. */ template - std::pair _core_current_domain_slot(const std::string& name) const { + std::pair _core_current_domain_slot(std::string_view name) const { return get_column(name)->core_current_domain_slot(*ctx_, *arr_); } @@ -826,8 +826,8 @@ class SOMAArray : public SOMAObject { * @return Pair of [lower, upper] inclusive bounds. */ template - std::pair _core_domain_slot(const std::string& name) const { - return schema_->domain().dimension(name).domain(); + std::pair _core_domain_slot(std::string_view name) const { + return get_column(name)->core_domain_slot(); } /** @@ -839,7 +839,7 @@ class SOMAArray : public SOMAObject { * - soma domain is core domain */ template - std::pair soma_domain_slot(const std::string& name) const { + std::pair soma_domain_slot(std::string_view name) const { if (has_current_domain()) { return _core_current_domain_slot(name); } else { @@ -856,7 +856,7 @@ class SOMAArray : public SOMAObject { * - soma maxdomain is core domain */ template - std::pair soma_maxdomain_slot(const std::string& name) const { + std::pair soma_maxdomain_slot(std::string_view name) const { return _core_domain_slot(name); } @@ -922,7 +922,7 @@ class SOMAArray : public SOMAObject { */ template std::pair _core_domainish_slot( - const std::string& name, enum Domainish which_kind) const { + std::string_view name, enum Domainish which_kind) const { return get_column(name)->domain_slot(*ctx_, *arr_, which_kind); } @@ -1224,16 +1224,13 @@ class SOMAArray : public SOMAObject { */ StatusAndReason _can_set_shape_domainish_subhelper( const std::vector& newshape, - bool check_current_domain, std::string function_name_for_messages); /** * This is a code-dedupe helper for can_upgrade_domain. */ StatusAndReason _can_set_dataframe_domainish_subhelper( - const ArrowTable& newdomain, - bool check_current_domain, - std::string function_name_for_messages); + const ArrowTable& newdomain, std::string function_name_for_messages); /** * This is a code-dedupe helper for can_resize_soma_joinid_shape and diff --git a/libtiledbsoma/src/soma/soma_column.cc b/libtiledbsoma/src/soma/soma_column.cc index bfb8cffd68..3062eabf6c 100644 --- a/libtiledbsoma/src/soma/soma_column.cc +++ b/libtiledbsoma/src/soma/soma_column.cc @@ -86,19 +86,18 @@ std::vector> SOMAColumn::deserialize( columns.push_back(std::make_shared(dimension)); } - for (auto& attribute : array.schema().attributes()) { + for (size_t i = 0; i < array.schema().attribute_num(); ++i) { + auto attribute = array.schema().attribute(i); auto enumeration_name = AttributeExperimental::get_enumeration_name( - ctx, attribute.second); + ctx, attribute); auto enumeration = enumeration_name.has_value() ? std::make_optional( ArrayExperimental::get_enumeration( - ctx, - array, - attribute.second.name())) : + ctx, array, attribute.name())) : std::nullopt; columns.push_back( - std::make_shared(attribute.second, enumeration)); + std::make_shared(attribute, enumeration)); } } diff --git a/libtiledbsoma/src/soma/soma_dimension.cc b/libtiledbsoma/src/soma/soma_dimension.cc index 36a34a2154..d87fe9028f 100644 --- a/libtiledbsoma/src/soma/soma_dimension.cc +++ b/libtiledbsoma/src/soma/soma_dimension.cc @@ -106,6 +106,15 @@ void SOMADimension::_set_dim_points( case TILEDB_DATETIME_PS: case TILEDB_DATETIME_FS: case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: case TILEDB_INT64: query->select_points( dimension.name(), @@ -193,6 +202,15 @@ void SOMADimension::_set_dim_ranges( case TILEDB_DATETIME_PS: case TILEDB_DATETIME_FS: case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: case TILEDB_INT64: query->select_ranges( dimension.name(), @@ -276,6 +294,15 @@ void SOMADimension::_set_current_domain_slot( case TILEDB_DATETIME_PS: case TILEDB_DATETIME_FS: case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: case TILEDB_INT64: { auto dom = std::any_cast>(domain[0]); rectangle.set_range(dimension.name(), dom[0], dom[1]); @@ -337,7 +364,8 @@ std::pair SOMADimension::_can_set_current_domain_slot( if (new_domain.size() != 1) { throw TileDBSOMAError(std::format( "[SOMADimension][_can_set_current_domain_slot] Expected domain " - "size is 1, found {}", + "size for '{}' is 1, found {}", + name(), new_domain.size())); } @@ -348,8 +376,10 @@ std::pair SOMADimension::_can_set_current_domain_slot( return std::pair( false, std::format( - "index-column name {}: new lower > new upper", - dimension.name())); + "index-column name '{}': new lower {} > new upper {}", + dimension.name(), + new_dom[0], + new_dom[1])); } // If we're checking against the core current domain: the user-provided @@ -359,23 +389,29 @@ std::pair SOMADimension::_can_set_current_domain_slot( // domain must be contained within the core (max) domain. if (rectangle.has_value()) { - auto dom = rectangle.value().range(dimension.name()); + auto dom = rectangle->range(dimension.name()); if (new_dom[0] > dom[0]) { return std::pair( false, std::format( - "index-column name {}: new lower > old lower (downsize " + "index-column name '{}': new lower {} > old lower {} " + "(downsize " "is unsupported)", - dimension.name())); + dimension.name(), + new_dom[0], + dom[0])); } if (new_dom[1] < dom[1]) { return std::pair( false, std::format( - "index-column name {}: new upper < old upper (downsize " + "index-column name '{}': new upper {} < old upper {} " + "(downsize " "is unsupported)", - dimension.name())); + dimension.name(), + new_dom[1], + dom[1])); } } else { auto dom = std::any_cast>(_core_domain_slot()); @@ -384,15 +420,19 @@ std::pair SOMADimension::_can_set_current_domain_slot( return std::pair( false, std::format( - "index-column name {}: new lower < limit lower", - dimension.name())); + "index-column name '{}': new lower {} < limit lower {}", + dimension.name(), + new_dom[0], + dom.first)); } if (new_dom[1] > dom.second) { return std::pair( false, std::format( - "index-column name {}: new upper > limit upper", - dimension.name())); + "index-column name '{}': new upper {} > limit upper {}", + dimension.name(), + new_dom[1], + dom.second)); } } @@ -434,6 +474,15 @@ std::pair SOMADimension::_can_set_current_domain_slot( case TILEDB_DATETIME_PS: case TILEDB_DATETIME_FS: case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: case TILEDB_INT64: return comparator( std::any_cast>(new_domain[0])); @@ -499,6 +548,15 @@ std::any SOMADimension::_core_domain_slot() const { case TILEDB_DATETIME_PS: case TILEDB_DATETIME_FS: case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: case TILEDB_INT64: return std::make_any>( dimension.domain()); @@ -551,6 +609,15 @@ std::any SOMADimension::_non_empty_domain_slot(Array& array) const { case TILEDB_DATETIME_PS: case TILEDB_DATETIME_FS: case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: case TILEDB_INT64: return std::make_any>( array.non_empty_domain(dimension.name())); @@ -723,6 +790,15 @@ std::any SOMADimension::_non_empty_domain_slot_opt( case TILEDB_DATETIME_PS: case TILEDB_DATETIME_FS: case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: case TILEDB_INT64: if (is_empty) { return std::make_any< @@ -832,6 +908,15 @@ std::any SOMADimension::_core_current_domain_slot(NDRectangle& ndrect) const { case TILEDB_DATETIME_PS: case TILEDB_DATETIME_FS: case TILEDB_DATETIME_AS: + case TILEDB_TIME_HR: + case TILEDB_TIME_MIN: + case TILEDB_TIME_SEC: + case TILEDB_TIME_MS: + case TILEDB_TIME_US: + case TILEDB_TIME_NS: + case TILEDB_TIME_PS: + case TILEDB_TIME_FS: + case TILEDB_TIME_AS: case TILEDB_INT64: { std::array domain = ndrect.range( dimension.name()); @@ -867,7 +952,6 @@ std::any SOMADimension::_core_current_domain_slot(NDRectangle& ndrect) const { ArrowArray* SOMADimension::arrow_domain_slot( const SOMAContext& ctx, Array& array, enum Domainish kind) const { switch (domain_type().value()) { - case TILEDB_INT64: case TILEDB_DATETIME_YEAR: case TILEDB_DATETIME_MONTH: case TILEDB_DATETIME_WEEK: @@ -890,6 +974,7 @@ ArrowArray* SOMADimension::arrow_domain_slot( case TILEDB_TIME_PS: case TILEDB_TIME_FS: case TILEDB_TIME_AS: + case TILEDB_INT64: return ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); case TILEDB_UINT64: diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc index e3e12a1229..827730fe49 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.cc +++ b/libtiledbsoma/src/utils/arrow_adapter.cc @@ -1611,20 +1611,21 @@ std::unique_ptr ArrowAdapter::make_arrow_schema( } std::unique_ptr ArrowAdapter::make_arrow_schema_parent( - int num_columns) { + size_t num_columns) { auto arrow_schema = std::make_unique(); - arrow_schema->format = "+s"; // structure, i.e. non-leaf node + arrow_schema->format = strdup("+s"); // structure, i.e. non-leaf node arrow_schema->name = strdup("parent"); arrow_schema->metadata = nullptr; arrow_schema->flags = 0; - arrow_schema->n_children = num_columns; // non-leaf node + arrow_schema->n_children = static_cast( + num_columns); // non-leaf node arrow_schema->children = (ArrowSchema**)malloc( arrow_schema->n_children * sizeof(ArrowSchema*)); arrow_schema->dictionary = nullptr; arrow_schema->release = &ArrowAdapter::release_schema; arrow_schema->private_data = nullptr; - for (int i = 0; i < num_columns; i++) { + for (size_t i = 0; i < num_columns; i++) { arrow_schema->children[i] = nullptr; } @@ -1636,7 +1637,7 @@ std::unique_ptr ArrowAdapter::make_arrow_schema_parent( } std::unique_ptr ArrowAdapter::make_arrow_array_parent( - int num_columns) { + size_t num_columns) { auto arrow_array = std::make_unique(); // All zero/null since this is a parent ArrowArray, and each @@ -1645,7 +1646,7 @@ std::unique_ptr ArrowAdapter::make_arrow_array_parent( arrow_array->null_count = 0; arrow_array->offset = 0; arrow_array->n_buffers = 0; - arrow_array->n_children = num_columns; + arrow_array->n_children = static_cast(num_columns); arrow_array->buffers = nullptr; arrow_array->dictionary = nullptr; arrow_array->release = &ArrowAdapter::release_array; @@ -1653,7 +1654,7 @@ std::unique_ptr ArrowAdapter::make_arrow_array_parent( arrow_array->children = (ArrowArray**)malloc( num_columns * sizeof(ArrowArray*)); - for (int i = 0; i < num_columns; i++) { + for (size_t i = 0; i < num_columns; i++) { arrow_array->children[i] = nullptr; } diff --git a/libtiledbsoma/src/utils/arrow_adapter.h b/libtiledbsoma/src/utils/arrow_adapter.h index 4004974aa9..29bc0159b2 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.h +++ b/libtiledbsoma/src/utils/arrow_adapter.h @@ -498,7 +498,7 @@ class ArrowAdapter { * ArrowSchema. This constructs the parent and not the children. */ static std::unique_ptr make_arrow_schema_parent( - int num_columns); + size_t num_columns); /** * @brief Creates a nanoarrow ArrowArray which accommodates @@ -507,7 +507,8 @@ class ArrowAdapter { * Note that the parents and children in nanoarrow are both of type * ArrowArray. This constructs the parent and not the children. */ - static std::unique_ptr make_arrow_array_parent(int num_columns); + static std::unique_ptr make_arrow_array_parent( + size_t num_columns); /** * @brief Creates a nanoarrow ArrowArray for a single column. diff --git a/libtiledbsoma/src/utils/common.h b/libtiledbsoma/src/utils/common.h index 9cac42e212..aed83717a3 100644 --- a/libtiledbsoma/src/utils/common.h +++ b/libtiledbsoma/src/utils/common.h @@ -16,10 +16,13 @@ #include // for windows: error C2039: 'runtime_error': is not a member of 'std' #include +#include #include namespace tiledbsoma { +constexpr std::string_view SOMA_JOINID = "soma_joinid"; + const std::string SOMA_OBJECT_TYPE_KEY = "soma_object_type"; const std::string ENCODING_VERSION_KEY = "soma_encoding_version"; const std::string ENCODING_VERSION_VAL = "1.1.0"; diff --git a/libtiledbsoma/test/unit_soma_geometry_dataframe.cc b/libtiledbsoma/test/unit_soma_geometry_dataframe.cc index 5e1a802e13..bd2f263337 100644 --- a/libtiledbsoma/test/unit_soma_geometry_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_geometry_dataframe.cc @@ -96,11 +96,7 @@ TEST_CASE("SOMAGeometryDataFrame: basic", "[SOMAGeometryDataFrame]") { REQUIRE(soma_geometry->ctx() == ctx); REQUIRE(soma_geometry->type() == "SOMAGeometryDataFrame"); std::vector expected_index_column_names = { - dim_infos[0].name, - SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[0].name + "__min", - SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[1].name + "__min", - SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[0].name + "__max", - SOMA_GEOMETRY_DIMENSION_PREFIX + spatial_dim_infos[1].name + "__max"}; + dim_infos[0].name, dim_infos[1].name}; std::vector expected_spatial_column_names = { spatial_dim_infos[0].name, spatial_dim_infos[1].name}; diff --git a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc index b3c73ea7be..24a0f4456b 100644 --- a/libtiledbsoma/test/unit_soma_sparse_ndarray.cc +++ b/libtiledbsoma/test/unit_soma_sparse_ndarray.cc @@ -380,7 +380,9 @@ TEST_CASE("SOMASparseNDArray: can_resize", "[SOMASparseNDArray]") { check = snda->can_resize(newshape_too_small, "testing"); REQUIRE(check.first == false); REQUIRE( - check.second == "testing for soma_dim_0: new 40 < existing shape 1000"); + check.second == + "[testing] index-column name 'soma_dim_0': new upper 39 < old upper " + "999 (downsize is unsupported)"); check = snda->can_resize(newshape_good, "testing"); REQUIRE(check.first == true);