Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add external retriever to usearch so vector nodes can be externally stored and managed #171

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 67 additions & 6 deletions c/lib.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <cassert>
#include <functional>

#include <usearch/index_punned_dense.hpp>

Expand Down Expand Up @@ -48,9 +49,10 @@ scalar_kind_t to_native_scalar(usearch_scalar_kind_t kind) {
}
}

add_result_t add_(index_t* index, usearch_label_t label, void const* vector, scalar_kind_t kind) {
add_result_t add_(index_t* index, usearch_label_t label, void const* vector, scalar_kind_t kind, int32_t level,
void* tape) {
switch (kind) {
case scalar_kind_t::f32_k: return index->add(label, (f32_t const*)vector);
case scalar_kind_t::f32_k: return index->add(label, (f32_t const*)vector, level, (byte_t*)tape);
case scalar_kind_t::f64_k: return index->add(label, (f64_t const*)vector);
case scalar_kind_t::f16_k: return index->add(label, (f16_t const*)vector);
case scalar_kind_t::f8_k: return index->add(label, (f8_bits_t const*)vector);
Expand All @@ -59,6 +61,7 @@ add_result_t add_(index_t* index, usearch_label_t label, void const* vector, sca
}
}

#if USEARCH_LOOKUP_LABEL
bool get_(index_t* index, label_t label, void* vector, scalar_kind_t kind) {
switch (kind) {
case scalar_kind_t::f32_k: return index->get(label, (f32_t*)vector);
Expand All @@ -69,6 +72,7 @@ bool get_(index_t* index, label_t label, void* vector, scalar_kind_t kind) {
default: return index->empty_search_result().failed("Unknown scalar kind!");
}
}
#endif

search_result_t search_(index_t* index, void const* vector, scalar_kind_t kind, size_t n) {
switch (kind) {
Expand Down Expand Up @@ -98,6 +102,7 @@ USEARCH_EXPORT usearch_index_t usearch_init(usearch_init_options_t* options, use

index_config_t config;
config.connectivity = options->connectivity;
config.vector_alignment = sizeof(float);
index_t index = //
options->metric ? //
index_t::make( //
Expand Down Expand Up @@ -132,6 +137,41 @@ USEARCH_EXPORT void usearch_view(usearch_index_t index, char const* path, usearc
*error = result.error.what();
}

void usearch_view_mem(usearch_index_t index, char* data, usearch_error_t* error) {
serialization_result_t result = reinterpret_cast<index_t*>(index)->view_mem(data);
if (!result)
*error = result.error.what();
}

void usearch_view_mem_lazy(usearch_index_t index, char* data, usearch_error_t* error) {
serialization_result_t result = reinterpret_cast<index_t*>(index)->view_mem_lazy(data);
if (!result) {
*error = result.error.what();
// error needs to be reset. otherwise error_t destructor will raise.
// todo:: fix for the rest of the interface
result.error = nullptr;
}
}

void usearch_update_header(usearch_index_t index, char* headerp, usearch_error_t* error) {
serialization_result_t result = reinterpret_cast<index_t*>(index)->update_header(headerp);
if (!result) {
*error = result.error.what();
result.error = nullptr;
}
}

usearch_metadata_t usearch_metadata(usearch_index_t index, usearch_error_t*) {
usearch_metadata_t res;
precomputed_constants_t pre = reinterpret_cast<index_t*>(index)->metadata();

res.inverse_log_connectivity = pre.inverse_log_connectivity;
res.connectivity_max_base = pre.connectivity_max_base;
res.neighbors_bytes = pre.neighbors_bytes;
res.neighbors_base_bytes = pre.neighbors_base_bytes;
return res;
}

USEARCH_EXPORT size_t usearch_size(usearch_index_t index, usearch_error_t*) { //
return reinterpret_cast<index_t*>(index)->size();
}
Expand All @@ -153,19 +193,38 @@ USEARCH_EXPORT void usearch_reserve(usearch_index_t index, size_t capacity, usea
reinterpret_cast<index_t*>(index)->reserve(capacity);
}

USEARCH_EXPORT void usearch_add( //
USEARCH_EXPORT void usearch_add( //
usearch_index_t index, usearch_label_t label, void const* vector, usearch_scalar_kind_t kind, //
usearch_error_t* error) {
add_result_t result = add_(reinterpret_cast<index_t*>(index), label, vector, to_native_scalar(kind));
add_result_t result = add_(reinterpret_cast<index_t*>(index), label, vector, to_native_scalar(kind), -1, nullptr);
if (!result)
*error = result.error.what();
}

int32_t usearch_newnode_level(usearch_index_t index, usearch_error_t*) {
return reinterpret_cast<index_t*>(index)->newnode_level();
}

void usearch_add_external( //
usearch_index_t index, usearch_label_t label, void const* vector, void* tape, usearch_scalar_kind_t kind, //
int32_t level, usearch_error_t* error) {
add_result_t result = add_(reinterpret_cast<index_t*>(index), label, vector, to_native_scalar(kind), level, tape);
if (!result)
*error = result.error.what();
}

void usearch_set_node_retriever(usearch_index_t index, usearch_node_retriever_t retriever,
usearch_node_retriever_t retriever_mut, usearch_error_t*) {
reinterpret_cast<index_t*>(index)->set_node_retriever(retriever, retriever_mut);
}

#if USEARCH_LOOKUP_LABEL
USEARCH_EXPORT bool usearch_contains(usearch_index_t index, usearch_label_t label, usearch_error_t*) {
return reinterpret_cast<index_t*>(index)->contains(label);
}
#endif

USEARCH_EXPORT size_t usearch_search( //
USEARCH_EXPORT size_t usearch_search( //
usearch_index_t index, void const* vector, usearch_scalar_kind_t kind, size_t results_limit, //
usearch_label_t* found_labels, usearch_distance_t* found_distances, usearch_error_t* error) {
search_result_t result = search_(reinterpret_cast<index_t*>(index), vector, to_native_scalar(kind), results_limit);
Expand All @@ -177,11 +236,13 @@ USEARCH_EXPORT size_t usearch_search(
return result.dump_to(found_labels, found_distances);
}

USEARCH_EXPORT bool usearch_get( //
#if USEARCH_LOOKUP_LABEL
USEARCH_EXPORT bool usearch_get( //
usearch_index_t index, usearch_label_t label, //
void* vector, usearch_scalar_kind_t kind, usearch_error_t*) {
return get_(reinterpret_cast<index_t*>(index), label, vector, to_native_scalar(kind));
}
#endif

USEARCH_EXPORT void usearch_remove(usearch_index_t, usearch_label_t, usearch_error_t* error) {
if (error != nullptr)
Expand Down
25 changes: 24 additions & 1 deletion c/usearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@ extern "C" {
#include <stdint.h> // `size_t`

USEARCH_EXPORT typedef void* usearch_index_t;
USEARCH_EXPORT typedef uint32_t usearch_label_t;
// let this be larger, before I make it truly configurable
// lanterndb assumes this is at least 48 bits
// todo:: make this configurable
USEARCH_EXPORT typedef uint64_t usearch_label_t;
USEARCH_EXPORT typedef float usearch_distance_t;
USEARCH_EXPORT typedef char const* usearch_error_t;

USEARCH_EXPORT typedef usearch_distance_t (*usearch_metric_t)(void const*, void const*);
USEARCH_EXPORT typedef void* (*usearch_node_retriever_t)(int index);

USEARCH_EXPORT typedef enum usearch_metric_kind_t {
usearch_metric_ip_k = 0,
Expand Down Expand Up @@ -51,13 +55,24 @@ USEARCH_EXPORT typedef struct usearch_init_options_t {
size_t expansion_search;
} usearch_init_options_t;

USEARCH_EXPORT typedef struct {
double inverse_log_connectivity;
size_t connectivity_max_base;
size_t neighbors_bytes;
size_t neighbors_base_bytes;
} usearch_metadata_t;

USEARCH_EXPORT usearch_index_t usearch_init(usearch_init_options_t*, usearch_error_t*);
USEARCH_EXPORT void usearch_free(usearch_index_t, usearch_error_t*);

USEARCH_EXPORT void usearch_save(usearch_index_t, char const* path, usearch_error_t*);
USEARCH_EXPORT void usearch_load(usearch_index_t, char const* path, usearch_error_t*);
USEARCH_EXPORT void usearch_view(usearch_index_t, char const* path, usearch_error_t*);
USEARCH_EXPORT void usearch_view_mem(usearch_index_t index, char* data, usearch_error_t* error);
USEARCH_EXPORT void usearch_view_mem_lazy(usearch_index_t index, char* data, usearch_error_t* error);
USEARCH_EXPORT void usearch_update_header(usearch_index_t index, char* headerp, usearch_error_t* error);

USEARCH_EXPORT usearch_metadata_t usearch_metadata(usearch_index_t, usearch_error_t*);
USEARCH_EXPORT size_t usearch_size(usearch_index_t, usearch_error_t*);
USEARCH_EXPORT size_t usearch_capacity(usearch_index_t, usearch_error_t*);
USEARCH_EXPORT size_t usearch_dimensions(usearch_index_t, usearch_error_t*);
Expand Down Expand Up @@ -85,6 +100,14 @@ USEARCH_EXPORT bool usearch_get( //

USEARCH_EXPORT void usearch_remove(usearch_index_t, usearch_label_t, usearch_error_t*);

USEARCH_EXPORT int32_t usearch_newnode_level(usearch_index_t index, usearch_error_t* error);

USEARCH_EXPORT void usearch_set_node_retriever(usearch_index_t index, usearch_node_retriever_t retriever,
usearch_node_retriever_t retriever_mut, usearch_error_t* error);
USEARCH_EXPORT void usearch_add_external( //
usearch_index_t index, usearch_label_t label, void const* vector, void* tape, usearch_scalar_kind_t kind, //
int32_t level, usearch_error_t* error);

#ifdef __cplusplus
}
#endif
Expand Down
4 changes: 4 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.1)
option(USEARCH_USE_OPENMP "Use OpenMP for a thread pool" OFF)
option(USEARCH_USE_SIMSIMD "Use SimSIMD hardware-accelerated metrics" OFF)
option(USEARCH_USE_JEMALLOC "Use JeMalloc for faster memory allocations" OFF)
option(USEARCH_LOOKUP_LABEL "Compile with label lookup and removal tests" OFF)

# Make "Release" by default
if(NOT CMAKE_BUILD_TYPE)
Expand Down Expand Up @@ -83,6 +84,9 @@ if(${USEARCH_BUILD_TEST})
target_link_libraries(test PRIVATE Threads::Threads)
target_include_directories(test PRIVATE ${USEARCH_PUNNED_INCLUDE_DIRS})
set_target_properties(test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
if (${USEARCH_LOOKUP_LABEL})
target_compile_definitions(test PRIVATE USEARCH_LOOKUP_LABEL=1)
endif()

if(${CMAKE_VERSION} VERSION_EQUAL 3.13 OR ${CMAKE_VERSION} VERSION_GREATER 3.13)
include(CTest)
Expand Down
4 changes: 4 additions & 0 deletions cpp/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,20 @@ template <typename scalar_at, typename index_at> void test3d_punned(index_at&& i
index.add(42, view_t{&vec42[0], 3ul});

// Reconstruct
#if USEARCH_LOOKUP_LABEL
scalar_t vec42_reconstructed[3] = {0, 0, 0};
index.get(42, span_t{&vec42_reconstructed[0], 3ul});
expect(vec42_reconstructed[0] == vec42[0]);
expect(vec42_reconstructed[1] == vec42[1]);
expect(vec42_reconstructed[2] == vec42[2]);
#endif

index.add(43, view_t{&vec43[0], 3ul});
expect(index.size() == 2);
#if USEARCH_LOOKUP_LABEL
index.remove(43);
expect(index.size() == 1);
#endif
}

template <typename index_at> void test_sets(index_at&& index) {
Expand Down
Loading