Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c++] Push down PointCloudDataFrame spatial metadata from Python to C++ #3630

Merged
merged 3 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions apis/python/src/tiledbsoma/_point_cloud_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
from ._constants import (
SOMA_COORDINATE_SPACE_METADATA_KEY,
SOMA_JOINID,
SOMA_SPATIAL_ENCODING_VERSION,
SOMA_SPATIAL_VERSION_METADATA_KEY,
SPATIAL_DISCLAIMER,
)
from ._dataframe import (
Expand Down Expand Up @@ -123,9 +121,16 @@ def create(
warnings.warn(SPATIAL_DISCLAIMER)

axis_dtype: pa.DataType | None = None
if not isinstance(coordinate_space, CoordinateSpace):
coordinate_space = CoordinateSpace.from_axis_names(coordinate_space)
for column_name in coordinate_space.axis_names:

# Get coordinate space axis data.
if isinstance(coordinate_space, CoordinateSpace):
axis_names = tuple(axis.name for axis in coordinate_space)
axis_units = tuple(axis.unit for axis in coordinate_space)
else:
axis_names = tuple(coordinate_space)
axis_units = tuple(len(axis_names) * [None])

for column_name in axis_names:
# Check axis column type is valid and all axis columns have the same type.
if axis_dtype is None:
try:
Expand All @@ -152,7 +157,7 @@ def create(
) from ke
if column_dtype != axis_dtype:
raise ValueError("All spatial axes must have the same datatype.")
index_column_names = coordinate_space.axis_names + (SOMA_JOINID,)
index_column_names = axis_names + (SOMA_JOINID,)

context = _validate_soma_tiledb_context(context)
schema = _canonicalize_schema(schema, index_column_names)
Expand Down Expand Up @@ -251,22 +256,17 @@ def create(
uri,
schema=schema,
index_column_info=index_column_info,
axis_names=axis_names,
axis_units=axis_units,
ctx=context.native_context,
platform_config=plt_cfg,
timestamp=(0, timestamp_ms),
)
except SOMAError as e:
raise map_exception_for_create(e, uri) from None

handle = cls._wrapper_type.open(uri, "w", context, tiledb_timestamp)
handle.metadata[SOMA_SPATIAL_VERSION_METADATA_KEY] = (
SOMA_SPATIAL_ENCODING_VERSION
)
handle.meta[SOMA_COORDINATE_SPACE_METADATA_KEY] = coordinate_space_to_json(
coordinate_space
)
return cls(
handle,
cls._wrapper_type.open(uri, "w", context, tiledb_timestamp),
_dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code",
)

Expand Down
7 changes: 7 additions & 0 deletions apis/python/src/tiledbsoma/soma_point_cloud_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ void load_soma_point_cloud_dataframe(py::module& m) {
[](std::string_view uri,
py::object py_schema,
py::object index_column_info,
std::vector<std::string> axis_names,
std::vector<std::optional<std::string>> axis_units,
std::shared_ptr<SOMAContext> context,
PlatformConfig platform_config,
std::optional<std::pair<uint64_t, uint64_t>> timestamp) {
Expand Down Expand Up @@ -80,13 +82,16 @@ void load_soma_point_cloud_dataframe(py::module& m) {
index_column_info.attr("_export_to_c")(
index_column_array_ptr, index_column_schema_ptr);

SOMACoordinateSpace coord_space{axis_names, axis_units};

try {
SOMAPointCloudDataFrame::create(
uri,
std::make_unique<ArrowSchema>(schema),
ArrowTable(
std::make_unique<ArrowArray>(index_column_array),
std::make_unique<ArrowSchema>(index_column_schema)),
coord_space,
context,
platform_config,
timestamp);
Expand All @@ -101,6 +106,8 @@ void load_soma_point_cloud_dataframe(py::module& m) {
py::kw_only(),
"schema"_a,
"index_column_info"_a,
"axis_names"_a,
"axis_units"_a,
"ctx"_a,
"platform_config"_a,
"timestamp"_a = py::none())
Expand Down
44 changes: 26 additions & 18 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,40 +29,48 @@ void SOMAArray::create(
std::string_view soma_type,
std::optional<std::string_view> soma_schema,
std::optional<TimestampRange> timestamp) {
Array::create(std::string(uri), schema);
_create(ctx, uri, schema, soma_type, soma_schema, timestamp);
}

std::shared_ptr<Array> array;
if (timestamp) {
array = std::make_shared<Array>(
*ctx->tiledb_ctx(),
std::string(uri),
TILEDB_WRITE,
TemporalPolicy(
TimestampStartEnd, timestamp->first, timestamp->second));
} else {
array = std::make_shared<Array>(
*ctx->tiledb_ctx(), std::string(uri), TILEDB_WRITE);
}
Array SOMAArray::_create(
std::shared_ptr<SOMAContext> ctx,
std::string_view uri,
ArraySchema schema,
std::string_view soma_type,
std::optional<std::string_view> soma_schema,
std::optional<TimestampRange> timestamp) {
// Create TileDB array.
Array::create(std::string(uri), schema);

array->put_metadata(
// Open TileDB array at requested time.
auto temporal_policy = timestamp.has_value() ? TemporalPolicy(
TimestampStartEnd,
timestamp->first,
timestamp->second) :
TemporalPolicy();
Array array{
*ctx->tiledb_ctx(), std::string(uri), TILEDB_WRITE, temporal_policy};

// Set SOMA metadata.
array.put_metadata(
SOMA_OBJECT_TYPE_KEY,
TILEDB_STRING_UTF8,
static_cast<uint32_t>(soma_type.length()),
soma_type.data());

array->put_metadata(
array.put_metadata(
ENCODING_VERSION_KEY,
TILEDB_STRING_UTF8,
static_cast<uint32_t>(ENCODING_VERSION_VAL.length()),
ENCODING_VERSION_VAL.c_str());

if (soma_schema.has_value()) {
array->put_metadata(
array.put_metadata(
TILEDB_SOMA_SCHEMA_KEY,
TILEDB_STRING_UTF8,
static_cast<uint32_t>(soma_schema->length()),
soma_schema->data());
}
// Return internal TileDB array.
return array;
}

std::unique_ptr<SOMAArray> SOMAArray::open(
Expand Down
8 changes: 8 additions & 0 deletions libtiledbsoma/src/soma/soma_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -1298,6 +1298,14 @@ class SOMAArray : public SOMAObject {
std::optional<int64_t> _maybe_soma_joinid_shape();
std::optional<int64_t> _maybe_soma_joinid_maxshape();

static Array _create(
std::shared_ptr<SOMAContext> ctx,
std::string_view uri,
ArraySchema schema,
std::string_view soma_type,
std::optional<std::string_view> soma_schema,
std::optional<TimestampRange> timestamp);

private:
//===================================================================
//= private non-static
Expand Down
19 changes: 18 additions & 1 deletion libtiledbsoma/src/soma/soma_point_cloud_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
*/

#include "soma_point_cloud_dataframe.h"
#include <tiledb/tiledb>
#include "utils/common.h"

namespace tiledbsoma {
using namespace tiledb;
Expand All @@ -24,9 +26,11 @@ void SOMAPointCloudDataFrame::create(
std::string_view uri,
const std::unique_ptr<ArrowSchema>& schema,
const ArrowTable& index_columns,
const SOMACoordinateSpace& coordinate_space,
std::shared_ptr<SOMAContext> ctx,
PlatformConfig platform_config,
std::optional<TimestampRange> timestamp) {
// Create TileDB array that is open for writing.
auto [tiledb_schema, soma_schema_extension] =
ArrowAdapter::tiledb_schema_from_arrow_schema(
ctx->tiledb_ctx(),
Expand All @@ -35,13 +39,26 @@ void SOMAPointCloudDataFrame::create(
"SOMAPointCloudDataFrame",
true,
platform_config);
SOMAArray::create(
auto array = SOMAArray::_create(
ctx,
uri,
tiledb_schema,
"SOMAPointCloudDataFrame",
std::nullopt,
timestamp);

// Add additional point cloud dataframe metadata.
array.put_metadata(
SPATIAL_ENCODING_VERSION_KEY,
TILEDB_STRING_UTF8,
static_cast<uint32_t>(SPATIAL_ENCODING_VERSION_VAL.size()),
SPATIAL_ENCODING_VERSION_VAL.c_str());
const auto coord_space_metadata = coordinate_space.to_string();
array.put_metadata(
SOMA_COORDINATE_SPACE_KEY,
TILEDB_STRING_UTF8,
static_cast<uint32_t>(coord_space_metadata.size()),
coord_space_metadata.c_str());
}

std::unique_ptr<SOMAPointCloudDataFrame> SOMAPointCloudDataFrame::open(
Expand Down
11 changes: 11 additions & 0 deletions libtiledbsoma/src/soma/soma_point_cloud_dataframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <filesystem>

#include "soma_array.h"
#include "soma_coordinates.h"

namespace tiledbsoma {

Expand All @@ -37,6 +38,8 @@ class SOMAPointCloudDataFrame : public SOMAArray {
* @param schema Arrow schema
* @param index_columns The index column names with associated domains
* and tile extents per dimension
* @param coordinate_space The coordinate space the PointCloudDataFrame
* spatial axes are defined on.
* @param ctx SOMAContext
* @param platform_config Optional config parameter dictionary
* @param timestamp Optional the timestamp range to write SOMA metadata info
Expand All @@ -45,6 +48,7 @@ class SOMAPointCloudDataFrame : public SOMAArray {
std::string_view uri,
const std::unique_ptr<ArrowSchema>& schema,
const ArrowTable& index_columns,
const SOMACoordinateSpace& coordinate_space,
std::shared_ptr<SOMAContext> ctx,
PlatformConfig platform_config = PlatformConfig(),
std::optional<TimestampRange> timestamp = std::nullopt);
Expand Down Expand Up @@ -124,6 +128,10 @@ class SOMAPointCloudDataFrame : public SOMAArray {

using SOMAArray::open;

inline const SOMACoordinateSpace& coordinate_space() const {
return coord_space_;
};

/**
* Return the data schema, in the form of a ArrowSchema.
*
Expand All @@ -144,6 +152,9 @@ class SOMAPointCloudDataFrame : public SOMAArray {
* @return int64_t
*/
uint64_t count();

private:
SOMACoordinateSpace coord_space_;
};
} // namespace tiledbsoma

Expand Down
9 changes: 8 additions & 1 deletion libtiledbsoma/test/unit_soma_point_cloud_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

const int64_t SOMA_JOINID_DIM_MAX = 99;

TEST_CASE("SOMAPointCloudDataFrame: basic", "[SOMAPointCloudDataFrame]") {
TEST_CASE(
"SOMAPointCloudDataFrame: basic", "[point_cloud_dataframe][spatial]") {
auto ctx = std::make_shared<SOMAContext>();
std::string uri{"mem://unit-test-point-cloud-basic"};
PlatformConfig platform_config{};
Expand Down Expand Up @@ -51,11 +52,13 @@ TEST_CASE("SOMAPointCloudDataFrame: basic", "[SOMAPointCloudDataFrame]") {
// Create the point cloud.
auto [schema, index_columns] =
helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos);
SOMACoordinateSpace coord_space{};
SOMAPointCloudDataFrame::create(
uri,
std::move(schema),
ArrowTable(
std::move(index_columns.first), std::move(index_columns.second)),
coord_space,
ctx,
platform_config,
std::nullopt);
Expand Down Expand Up @@ -127,6 +130,10 @@ TEST_CASE("SOMAPointCloudDataFrame: basic", "[SOMAPointCloudDataFrame]") {
CHECK(d2 == std::vector<uint32_t>(d2span.begin(), d2span.end()));
CHECK(a0 == std::vector<double>(a0span.begin(), a0span.end()));
}
CHECK(soma_point_cloud->has_metadata("soma_encoding_version"));
CHECK(soma_point_cloud->has_metadata("soma_spatial_encoding_version"));
auto point_cloud_coord_space = soma_point_cloud->coordinate_space();
CHECK(point_cloud_coord_space == coord_space);
soma_point_cloud->close();

auto soma_object = SOMAObject::open(uri, OpenMode::read, ctx);
Expand Down