Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c++] Hide internal dimensions #3266

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
8 changes: 8 additions & 0 deletions libtiledbsoma/src/soma/managed_query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,14 @@ void ManagedQuery::setup_read() {
for (int i = 0; i < attribute_num; i++) {
columns_.push_back(schema.attribute(i).name());
}

auto is_internal = [](std::string name) {
return name.rfind(SOMA_GEOMETRY_DIMENSION_PREFIX, 0) == 0;
};

auto internal_end = std::remove_if(
columns_.begin(), columns_.end(), is_internal);
columns_.erase(internal_end, columns_.end());
}

// Allocate and attach buffers
Expand Down
9 changes: 4 additions & 5 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,8 @@
#ifndef SOMA_ARRAY
#define SOMA_ARRAY

#include <stdexcept> // for windows: error C2039: 'runtime_error': is not a member of 'std'

#include <future>
#include <stdexcept> // for windows: error C2039: 'runtime_error': is not a member of 'std'

#include <tiledb/tiledb>
#include <tiledb/tiledb_experimental>
Expand Down Expand Up @@ -997,7 +996,7 @@
* @tparam T Domain datatype
* @return Pair of [lower, upper] inclusive bounds.
*/
ArrowTable get_soma_domain() {
virtual ArrowTable get_soma_domain() {

Check warning on line 999 in libtiledbsoma/src/soma/soma_array.h

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.h#L999

Added line #L999 was not covered by tests
if (has_current_domain()) {
return _get_core_current_domain();
} else {
Expand All @@ -1020,15 +1019,15 @@
* @tparam T Domain datatype
* @return Pair of [lower, upper] inclusive bounds.
*/
ArrowTable get_soma_maxdomain() {
virtual ArrowTable get_soma_maxdomain() {

Check warning on line 1022 in libtiledbsoma/src/soma/soma_array.h

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.h#L1022

Added line #L1022 was not covered by tests
return _get_core_domain();
}

/**
* Returns the core non-empty domain in its entirety, as an Arrow
* table for return to Python/R.
*/
ArrowTable get_non_empty_domain() {
virtual ArrowTable get_non_empty_domain() {

Check warning on line 1030 in libtiledbsoma/src/soma/soma_array.h

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_array.h#L1030

Added line #L1030 was not covered by tests
return _get_core_domainish(Domainish::kind_non_empty_domain);
}

Expand Down
75 changes: 73 additions & 2 deletions libtiledbsoma/src/soma/soma_geometry_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
std::shared_ptr<SOMAContext> ctx,
PlatformConfig platform_config,
std::optional<TimestampRange> timestamp) {
std::vector<std::string> spatial_axes;
auto tiledb_schema = ArrowAdapter::tiledb_schema_from_arrow_schema(
ctx->tiledb_ctx(),
schema,
Expand Down Expand Up @@ -98,7 +97,20 @@

const std::vector<std::string> SOMAGeometryDataFrame::index_column_names()
const {
return this->dimension_names();
std::vector<std::string> dim_names = this->dimension_names();

auto is_internal = [](std::string name) {
return name.rfind(SOMA_GEOMETRY_DIMENSION_PREFIX, 0) == 0;
};

auto first_dim = std::find_if(
begin(dim_names), end(dim_names), is_internal);
dim_names.insert(first_dim, SOMA_GEOMETRY_COLUMN_NAME);
auto internal_end = std::remove_if(
begin(dim_names), end(dim_names), is_internal);
dim_names.erase(internal_end, dim_names.end());

return dim_names;
}

const std::vector<std::string> SOMAGeometryDataFrame::spatial_column_names()
Expand Down Expand Up @@ -172,6 +184,18 @@
SOMAArray::set_array_data(std::move(arrow_schema), std::move(arrow_array));
}

ArrowTable SOMAGeometryDataFrame::get_soma_domain() {
return _reconstruct_geometry_domain(SOMAArray::get_soma_domain());

Check warning on line 188 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L187-L188

Added lines #L187 - L188 were not covered by tests
}

ArrowTable SOMAGeometryDataFrame::get_soma_maxdomain() {
return _reconstruct_geometry_domain(SOMAArray::get_soma_maxdomain());

Check warning on line 192 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L191-L192

Added lines #L191 - L192 were not covered by tests
}

ArrowTable SOMAGeometryDataFrame::get_non_empty_domain() {
return _reconstruct_geometry_domain(SOMAArray::get_non_empty_domain());

Check warning on line 196 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L195-L196

Added lines #L195 - L196 were not covered by tests
}

//===================================================================
//= private non-static
//===================================================================
Expand Down Expand Up @@ -352,4 +376,51 @@
return ArrowTable(std::move(arrow_array), std::move(arrow_schema));
}

ArrowTable SOMAGeometryDataFrame::_reconstruct_geometry_domain(

Check warning on line 379 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L379

Added line #L379 was not covered by tests
const ArrowTable& domain) {
std::unique_ptr<ArrowSchema> schema = std::make_unique<ArrowSchema>(
ArrowSchema{});
std::unique_ptr<ArrowArray> array = std::make_unique<ArrowArray>(
ArrowArray{});

Check warning on line 384 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L381-L384

Added lines #L381 - L384 were not covered by tests

int64_t internal_axes = 2 * spatial_column_names().size();

Check warning on line 386 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L386

Added line #L386 was not covered by tests

NANOARROW_THROW_NOT_OK(

Check warning on line 388 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L388

Added line #L388 was not covered by tests
ArrowSchemaInitFromType(schema.get(), NANOARROW_TYPE_STRUCT));
NANOARROW_THROW_NOT_OK(ArrowSchemaAllocateChildren(

Check warning on line 390 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L390

Added line #L390 was not covered by tests
schema.get(), domain.second->n_children - internal_axes + 1));
NANOARROW_THROW_NOT_OK(

Check warning on line 392 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L392

Added line #L392 was not covered by tests
ArrowArrayInitFromType(array.get(), NANOARROW_TYPE_STRUCT));
NANOARROW_THROW_NOT_OK(ArrowArrayAllocateChildren(

Check warning on line 394 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L394

Added line #L394 was not covered by tests
array.get(), domain.first->n_children - internal_axes + 1));

std::vector<std::string> dim_names = this->dimension_names();
auto is_internal = [](std::string name) {
return name.rfind(SOMA_GEOMETRY_DIMENSION_PREFIX, 0) == 0;

Check warning on line 399 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L397-L399

Added lines #L397 - L399 were not covered by tests
};
auto first_dim = static_cast<int64_t>(
std::find_if(begin(dim_names), end(dim_names), is_internal) -
dim_names.begin());

Check warning on line 403 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L401-L403

Added lines #L401 - L403 were not covered by tests

for (int64_t i = 0, orig_i = 0; i < schema->n_children; ++i, ++orig_i) {
ArrowSchemaMove(domain.second->children[orig_i], schema->children[i]);
ArrowArrayMove(domain.first->children[orig_i], array->children[i]);

Check warning on line 407 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L406-L407

Added lines #L406 - L407 were not covered by tests

if (i == first_dim) {
NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(

Check warning on line 410 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L410

Added line #L410 was not covered by tests
schema->children[i], SOMA_GEOMETRY_COLUMN_NAME.c_str()));
std::vector<double_t> data;

Check warning on line 412 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L412

Added line #L412 was not covered by tests
for (; orig_i < i + internal_axes; orig_i += 2) {
data.push_back(
((double_t*)domain.first->children[orig_i]->buffers[1])[0]);
data.push_back(
((double_t*)domain.first->children[orig_i]->buffers[1])[1]);

Check warning on line 417 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L414-L417

Added lines #L414 - L417 were not covered by tests
}
array->children[i] = ArrowAdapter::make_arrow_array_child(data);

Check warning on line 419 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L419

Added line #L419 was not covered by tests
}
}

return ArrowTable(std::move(array), std::move(schema));

Check warning on line 423 in libtiledbsoma/src/soma/soma_geometry_dataframe.cc

View check run for this annotation

Codecov / codecov/patch

libtiledbsoma/src/soma/soma_geometry_dataframe.cc#L423

Added line #L423 was not covered by tests
}

} // namespace tiledbsoma
73 changes: 73 additions & 0 deletions libtiledbsoma/src/soma/soma_geometry_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#ifndef SOMA_GEOMETRY_DATAFRAME
#define SOMA_GEOMETRY_DATAFRAME

#include <algorithm>
#include <filesystem>
#include <vector>

Expand Down Expand Up @@ -175,10 +176,75 @@ class SOMAGeometryDataFrame : virtual public SOMAArray {
*/
uint64_t count();

/**
* @brief Set the spatial axis slice using multiple ranges
*
* @note Partitioning is not supported
*
* @tparam T
* @param axis
* @param ranges
*/
template <typename T>
void set_spatial_dim_ranges(
const std::string& axis, const std::vector<std::pair<T, T>>& ranges) {
std::vector<std::pair<T, T>> min_range;
std::vector<std::pair<T, T>> max_range;

if (ranges.size() != 1) {
throw TileDBSOMAError(
"Multi ranges are not supported for axis dimensions");
}

T min_domain, max_domain;

// Both min and max dimension share the same domain
if (ArraySchemaExperimental::current_domain(
*this->ctx()->tiledb_ctx(), *this->tiledb_schema())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You reference both of these throughout

*this->ctx()->tiledb_ctx(), *this->tiledb_schema())

Please make an

auto ctx = this->ctx()->tiledb_ctx();

and

auto schema = this->tiledb_schema();

.is_empty()) {
std::pair<T, T> domain = this->tiledb_schema()
->domain()
.dimension(
SOMA_GEOMETRY_DIMENSION_PREFIX +
axis + "__min")
.domain<T>();
min_domain = domain.first;
max_domain = domain.second;
} else {
auto current_domain = ArraySchemaExperimental::current_domain(
*this->ctx()->tiledb_ctx(),
*this->tiledb_schema().get())
.ndrectangle()
.range<T>(
SOMA_GEOMETRY_DIMENSION_PREFIX +
axis + "__min");
min_domain = current_domain[0];
max_domain = current_domain[1];
}

for (const std::pair<T, T>& range : ranges) {
min_range.push_back(
std::make_pair(min_domain, std::min(range.second, max_domain)));
max_range.push_back(
std::make_pair(std::max(range.first, min_domain), max_domain));
}

this->set_dim_ranges(
SOMA_GEOMETRY_DIMENSION_PREFIX + axis + "__min", min_range);
this->set_dim_ranges(
SOMA_GEOMETRY_DIMENSION_PREFIX + axis + "__max", max_range);
}

void set_array_data(
std::unique_ptr<ArrowSchema> arrow_schema,
std::unique_ptr<ArrowArray> arrow_array) override;

ArrowTable get_soma_domain() override;

ArrowTable get_soma_maxdomain() override;

ArrowTable get_non_empty_domain() override;

private:
//===================================================================
//= private non-static
Expand All @@ -200,6 +266,13 @@ class SOMAGeometryDataFrame : virtual public SOMAArray {
*/
ArrowTable _reconstruct_geometry_data_table(
ArrowTable original_data, const std::vector<ArrowTable>& wkb_data);

/**
* @brief Create a new ArrowTable by merging the internal spatial dimensions
* and setting the ``soma_geometry`` domain as the stacked domain of each
* spatial axis.
*/
ArrowTable _reconstruct_geometry_domain(const ArrowTable& domain);
};
} // namespace tiledbsoma

Expand Down
Loading
Loading