Skip to content

Commit

Permalink
Merge internal dimension domains under soma_geometry domain (x_mi…
Browse files Browse the repository at this point in the history
…n, x_max, y_min, y_max) (#3299)
  • Loading branch information
XanthosXanthopoulos authored Nov 17, 2024
1 parent 0583de7 commit 3e6d9f2
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 6 deletions.
9 changes: 4 additions & 5 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,8 @@
#ifndef SOMA_ARRAY
#define SOMA_ARRAY

#include <stdexcept> // for windows: error C2039: 'runtime_error': is not a member of 'std'

#include <future>
#include <stdexcept> // for windows: error C2039: 'runtime_error': is not a member of 'std'

#include <tiledb/tiledb>
#include <tiledb/tiledb_experimental>
Expand Down Expand Up @@ -1005,7 +1004,7 @@ class SOMAArray : public SOMAObject {
* @tparam T Domain datatype
* @return Pair of [lower, upper] inclusive bounds.
*/
ArrowTable get_soma_domain() {
virtual ArrowTable get_soma_domain() {
if (has_current_domain()) {
return _get_core_current_domain();
} else {
Expand All @@ -1028,15 +1027,15 @@ class SOMAArray : public SOMAObject {
* @tparam T Domain datatype
* @return Pair of [lower, upper] inclusive bounds.
*/
ArrowTable get_soma_maxdomain() {
virtual ArrowTable get_soma_maxdomain() {
return _get_core_domain();
}

/**
* Returns the core non-empty domain in its entirety, as an Arrow
* table for return to Python/R.
*/
ArrowTable get_non_empty_domain() {
virtual ArrowTable get_non_empty_domain() {
return _get_core_domainish(Domainish::kind_non_empty_domain);
}

Expand Down
59 changes: 59 additions & 0 deletions libtiledbsoma/src/soma/soma_geometry_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,18 @@ void SOMAGeometryDataFrame::set_array_data(
SOMAArray::set_array_data(std::move(arrow_schema), std::move(arrow_array));
}

ArrowTable SOMAGeometryDataFrame::get_soma_domain() {
return _reconstruct_geometry_domain(SOMAArray::get_soma_domain());
}

ArrowTable SOMAGeometryDataFrame::get_soma_maxdomain() {
return _reconstruct_geometry_domain(SOMAArray::get_soma_maxdomain());
}

ArrowTable SOMAGeometryDataFrame::get_non_empty_domain() {
return _reconstruct_geometry_domain(SOMAArray::get_non_empty_domain());
}

//===================================================================
//= private non-static
//===================================================================
Expand Down Expand Up @@ -364,4 +376,51 @@ ArrowTable SOMAGeometryDataFrame::_reconstruct_geometry_data_table(
return ArrowTable(std::move(arrow_array), std::move(arrow_schema));
}

ArrowTable SOMAGeometryDataFrame::_reconstruct_geometry_domain(
const ArrowTable& domain) {
std::unique_ptr<ArrowSchema> schema = std::make_unique<ArrowSchema>(
ArrowSchema{});
std::unique_ptr<ArrowArray> array = std::make_unique<ArrowArray>(
ArrowArray{});

int64_t internal_axes = 2 * spatial_column_names().size();

NANOARROW_THROW_NOT_OK(
ArrowSchemaInitFromType(schema.get(), NANOARROW_TYPE_STRUCT));
NANOARROW_THROW_NOT_OK(ArrowSchemaAllocateChildren(
schema.get(), domain.second->n_children - internal_axes + 1));
NANOARROW_THROW_NOT_OK(
ArrowArrayInitFromType(array.get(), NANOARROW_TYPE_STRUCT));
NANOARROW_THROW_NOT_OK(ArrowArrayAllocateChildren(
array.get(), domain.first->n_children - internal_axes + 1));

std::vector<std::string> dim_names = this->dimension_names();
auto is_internal = [](std::string name) {
return name.rfind(SOMA_GEOMETRY_DIMENSION_PREFIX, 0) == 0;
};
auto first_dim = static_cast<int64_t>(
std::find_if(begin(dim_names), end(dim_names), is_internal) -
dim_names.begin());

for (int64_t i = 0, orig_i = 0; i < schema->n_children; ++i, ++orig_i) {
ArrowSchemaMove(domain.second->children[orig_i], schema->children[i]);
ArrowArrayMove(domain.first->children[orig_i], array->children[i]);

if (i == first_dim) {
NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(
schema->children[i], SOMA_GEOMETRY_COLUMN_NAME.c_str()));
std::vector<double_t> data;
for (; orig_i < i + internal_axes; orig_i += 2) {
data.push_back(
((double_t*)domain.first->children[orig_i]->buffers[1])[0]);
data.push_back(
((double_t*)domain.first->children[orig_i]->buffers[1])[1]);
}
array->children[i] = ArrowAdapter::make_arrow_array_child(data);
}
}

return ArrowTable(std::move(array), std::move(schema));
}

} // namespace tiledbsoma
13 changes: 13 additions & 0 deletions libtiledbsoma/src/soma/soma_geometry_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,12 @@ class SOMAGeometryDataFrame : virtual public SOMAArray {
std::unique_ptr<ArrowSchema> arrow_schema,
std::unique_ptr<ArrowArray> arrow_array) override;

ArrowTable get_soma_domain() override;

ArrowTable get_soma_maxdomain() override;

ArrowTable get_non_empty_domain() override;

private:
//===================================================================
//= private non-static
Expand All @@ -260,6 +266,13 @@ class SOMAGeometryDataFrame : virtual public SOMAArray {
*/
ArrowTable _reconstruct_geometry_data_table(
ArrowTable original_data, const std::vector<ArrowTable>& wkb_data);

/**
* @brief Create a new ArrowTable by merging the internal spatial dimensions
* and setting the ``soma_geometry`` domain as the stacked domain of each
* spatial axis.
*/
ArrowTable _reconstruct_geometry_domain(const ArrowTable& domain);
};
} // namespace tiledbsoma

Expand Down
2 changes: 1 addition & 1 deletion libtiledbsoma/test/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ std::unique_ptr<ArrowSchema> create_index_cols_info_schema(

auto schema = ArrowAdapter::make_arrow_schema(names, tiledb_datatypes);

for (size_t i = 0; i < schema->n_children; ++i) {
for (int64_t i = 0; i < schema->n_children; ++i) {
if (strcmp(schema->children[i]->name, "soma_geometry")) {
nanoarrow::UniqueBuffer buffer;
ArrowMetadataBuilderInit(buffer.get(), nullptr);
Expand Down

0 comments on commit 3e6d9f2

Please sign in to comment.