Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchs/indexes/hgraph-95.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ HGRAPH/GIST/95:
datapath: "/tmp/data/gist-960-euclidean.hdf5"
type: "build,search" # build, search
index_name: "hgraph"
create_params: '{"dim":960,"dtype":"float32","metric_type":"l2","index_param":{"base_quantization_type":"sq8_uniform","max_degree":96,"ef_construction":400, "precise_quantization_type":"fp32", "use_reorder":true}}'
search_params: '{"hgraph":{"ef_search":120}}'
create_params: '{"dim":960,"dtype":"float32","metric_type":"l2","index_param":{"base_quantization_type":"sq8_uniform","max_degree":96,"ef_construction":400,"tau":0.02, "precise_quantization_type":"fp32", "use_reorder":true}}'
search_params: '{"hgraph":{"ef_search":88}}'
index_path: "/tmp/gist-960-euclidean/index/hgraph_index"
topk: 10
search_mode: "knn" # ["knn", "range", "knn_filter", "range_filter"]
Expand Down
1 change: 1 addition & 0 deletions include/vsag/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ extern const char* const HGRAPH_BASE_QUANTIZATION_TYPE;
extern const char* const HGRAPH_GRAPH_MAX_DEGREE;
extern const char* const HGRAPH_BUILD_EF_CONSTRUCTION;
extern const char* const HGRAPH_BUILD_ALPHA;
extern const char* const HGRAPH_BUILD_TAU;
extern const char* const HGRAPH_INIT_CAPACITY;
extern const char* const HGRAPH_GRAPH_TYPE;
extern const char* const HGRAPH_GRAPH_STORAGE_TYPE;
Expand Down
51 changes: 37 additions & 14 deletions src/algorithm/hgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ HGraph::HGraph(const HGraphParameterPtr& hgraph_param, const vsag::IndexCommonPa
build_by_base_(hgraph_param->build_by_base),
ef_construct_(hgraph_param->ef_construction),
alpha_(hgraph_param->alpha),
tau_(hgraph_param->tau),
select_edge_param(hgraph_param->selectedgeparam),
odescent_param_(hgraph_param->odescent_param),
graph_type_(hgraph_param->graph_type),
hierarchical_datacell_param_(hgraph_param->hierarchical_graph_param),
Expand Down Expand Up @@ -1537,13 +1539,24 @@ HGraph::graph_add_one(const void* data, int level, InnerIdType inner_id) {
label_table_->SetDuplicateId(static_cast<InnerIdType>(param.duplicate_id), inner_id);
return false;
}
mutually_connect_new_element(inner_id,
result,
this->bottom_graph_,
flatten_codes,
neighbors_mutex_,
allocator_,
alpha_);
if (select_edge_param == "alpha") {
mutually_connect_new_element<EdgeSelectionParam::ALPHA>(inner_id,
result,
this->bottom_graph_,
flatten_codes,
neighbors_mutex_,
allocator_,
alpha_);
} else if (select_edge_param == "tau") {
mutually_connect_new_element<EdgeSelectionParam::TAU>(inner_id,
result,
this->bottom_graph_,
flatten_codes,
neighbors_mutex_,
allocator_,
tau_);
}

} else {
bottom_graph_->InsertNeighborsById(inner_id, Vector<InnerIdType>(allocator_));
}
Expand All @@ -1557,13 +1570,23 @@ HGraph::graph_add_one(const void* data, int level, InnerIdType inner_id) {
// to specify which overloaded function to call
(VisitedListPtr) nullptr,
discard_stats);
mutually_connect_new_element(inner_id,
result,
route_graphs_[j],
flatten_codes,
neighbors_mutex_,
allocator_,
alpha_);
if (select_edge_param == "alpha") {
mutually_connect_new_element<EdgeSelectionParam::ALPHA>(inner_id,
result,
this->bottom_graph_,
flatten_codes,
neighbors_mutex_,
allocator_,
alpha_);
} else if (select_edge_param == "tau") {
mutually_connect_new_element<EdgeSelectionParam::TAU>(inner_id,
result,
this->bottom_graph_,
flatten_codes,
neighbors_mutex_,
allocator_,
tau_);
}
Comment on lines +1573 to +1589

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

There appears to be a copy-paste error in this block. The mutually_connect_new_element function is called with this->bottom_graph_ for both alpha and tau strategies inside the loop over route_graphs_. It should be using route_graphs_[j] to connect the new element to the correct hierarchical level, as was done in the original code. This will cause incorrect graph construction for all but the bottom layer.

Additionally, this block of code is almost identical to the one for bottom_graph_ above (lines 1034-1050). You could consider refactoring this logic into a helper function to avoid duplication and prevent such errors in the future.

            if (select_edge_param == "alpha") {
                mutually_connect_new_element<EdgeSelectionParam::ALPHA>(inner_id,
                                                                        result,
                                                                        route_graphs_[j],
                                                                        flatten_codes,
                                                                        neighbors_mutex_,
                                                                        allocator_,
                                                                        alpha_);
            } else if (select_edge_param == "tau") {
                mutually_connect_new_element<EdgeSelectionParam::TAU>(inner_id,
                                                                      result,
                                                                      route_graphs_[j],
                                                                      flatten_codes,
                                                                      neighbors_mutex_,
                                                                      allocator_,
                                                                      tau_);
            }

} else {
route_graphs_[j]->InsertNeighborsById(inner_id, Vector<InnerIdType>(allocator_));
}
Expand Down
2 changes: 2 additions & 0 deletions src/algorithm/hgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,8 @@ class HGraph : public InnerIndexInterface {

uint64_t ef_construct_{400};
float alpha_{1.0};
float tau_{0.0F};
std::string select_edge_param{"alpha"};

std::atomic<uint64_t> total_count_{0};

Expand Down
5 changes: 5 additions & 0 deletions src/algorithm/hgraph_parameter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ HGraphParameter::FromJson(const JsonType& json) {

if (json.Contains(ALPHA_KEY)) {
this->alpha = json[ALPHA_KEY].GetFloat();
this->selectedgeparam = "alpha";
} else if (json.Contains(TAU_KEY)) {
this->tau = json[TAU_KEY].GetFloat();
this->selectedgeparam = "tau";
}
Comment on lines 105 to 111

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The current logic gives precedence to alpha if both alpha and tau are present in the configuration. While this is a valid approach, it might be confusing for users who might provide both parameters by mistake. To improve clarity and prevent misconfiguration, consider adding a check to ensure that alpha and tau are mutually exclusive, and throw an exception if both are defined.

Suggested change
if (json.Contains(ALPHA_KEY)) {
this->alpha = json[ALPHA_KEY].GetFloat();
this->selectedgeparam = "alpha";
} else if (json.Contains(TAU_KEY)) {
this->tau = json[TAU_KEY].GetFloat();
this->selectedgeparam = "tau";
}
if (json.Contains(ALPHA_KEY) && json.Contains(TAU_KEY)) {
throw VsagException(ErrorType::INVALID_ARGUMENT, "alpha and tau are mutually exclusive, please provide only one.");
}
if (json.Contains(ALPHA_KEY)) {
this->alpha = json[ALPHA_KEY].GetFloat();
this->selectedgeparam = "alpha";
} else if (json.Contains(TAU_KEY)) {
this->tau = json[TAU_KEY].GetFloat();
this->selectedgeparam = "tau";
}


if (json.Contains(BUILD_THREAD_COUNT_KEY)) {
Expand Down Expand Up @@ -136,6 +140,7 @@ HGraphParameter::ToJson() const {
json[GRAPH_KEY].SetJson(this->bottom_graph_param->ToJson());
json[EF_CONSTRUCTION_KEY].SetInt(this->ef_construction);
json[ALPHA_KEY].SetFloat(this->alpha);
json[TAU_KEY].SetFloat(this->tau);
json[SUPPORT_DUPLICATE].SetBool(this->support_duplicate);
return json;
}
Expand Down
2 changes: 2 additions & 0 deletions src/algorithm/hgraph_parameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class HGraphParameter : public InnerIndexParameter {

uint64_t ef_construction{400};
float alpha{1.0F};
float tau{0.0F};
std::string selectedgeparam{"alpha"};

bool support_duplicate{false};
bool support_tombstone{false};
Expand Down
9 changes: 7 additions & 2 deletions src/algorithm/pyramid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,8 +669,13 @@ Pyramid::add_one_point(const std::shared_ptr<IndexNode>& node,
auto results = searcher_->Search(
node->graph_, codes, vl, vector, search_param, (LabelTablePtr) nullptr, discard_stats);
pool_->ReturnOne(vl);
mutually_connect_new_element(
inner_id, results, node->graph_, codes, points_mutex_, allocator_, alpha_);
if (select_edge_param == "alpha") {
mutually_connect_new_element<EdgeSelectionParam::ALPHA>(
inner_id, results, node->graph_, codes, points_mutex_, allocator_, alpha_);
} else {
mutually_connect_new_element<EdgeSelectionParam::TAU>(
inner_id, results, node->graph_, codes, points_mutex_, allocator_, tau_);
}
if (update_entry_point) {
node->entry_point_ = inner_id;
}
Expand Down
4 changes: 4 additions & 0 deletions src/algorithm/pyramid.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class Pyramid : public InnerIndexInterface {
Pyramid(const PyramidParamPtr& pyramid_param, const IndexCommonParam& common_param)
: InnerIndexInterface(pyramid_param, common_param),
alpha_(pyramid_param->alpha),
tau_(pyramid_param->tau),
select_edge_param(pyramid_param->selectedgeparam),
no_build_levels_(common_param.allocator_.get()),
odescent_param_(pyramid_param->odescent_param),
ef_construction_(pyramid_param->ef_construction),
Expand Down Expand Up @@ -223,6 +225,8 @@ class Pyramid : public InnerIndexInterface {
int64_t max_capacity_{0};
int64_t cur_element_count_{0};
float alpha_{1.0F};
float tau_{0.0F};
std::string select_edge_param{"alpha"};

std::shared_mutex resize_mutex_;
std::mutex cur_element_count_mutex_;
Expand Down
8 changes: 8 additions & 0 deletions src/algorithm/pyramid_zparameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ PyramidParameters::FromJson(const JsonType& json) {

this->base_codes_param = CreateFlattenParam(json[BASE_CODES_KEY]);

if (json.Contains(ALPHA_KEY)) {
this->alpha = json[ALPHA_KEY].GetFloat();
this->selectedgeparam = "alpha";
} else if (json.Contains(TAU_KEY)) {
this->tau = json[TAU_KEY].GetFloat();
this->selectedgeparam = "tau";
}
Comment on lines +50 to +56

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Similar to hgraph_parameter.cpp, the current logic gives precedence to alpha if both alpha and tau are present. This could lead to user confusion. It would be more robust to enforce mutual exclusivity by throwing an error if both parameters are provided in the configuration.

    if (json.Contains(ALPHA_KEY) && json.Contains(TAU_KEY)) {
        throw VsagException(ErrorType::INVALID_ARGUMENT, "alpha and tau are mutually exclusive, please provide only one.");
    }

    if (json.Contains(ALPHA_KEY)) {
        this->alpha = json[ALPHA_KEY].GetFloat();
        this->selectedgeparam = "alpha";
    } else if (json.Contains(TAU_KEY)) {
        this->tau = json[TAU_KEY].GetFloat();
        this->selectedgeparam = "tau";
    }


if (json.Contains(NO_BUILD_LEVELS)) {
const auto& no_build_levels_json = json[NO_BUILD_LEVELS];
CHECK_ARGUMENT(no_build_levels_json.IsArray(),
Expand Down
3 changes: 3 additions & 0 deletions src/algorithm/pyramid_zparameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,13 @@ struct PyramidParameters : public InnerIndexParameter {
ODescentParameterPtr odescent_param{nullptr};

std::vector<int32_t> no_build_levels;

uint64_t ef_construction{400};
int64_t max_degree{64};
std::string graph_type{GRAPH_TYPE_VALUE_NSW};
float alpha{1.2F};
float tau{0.0F};
std::string selectedgeparam{"alpha"};
uint32_t index_min_size{0};
};

Expand Down
1 change: 1 addition & 0 deletions src/constants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ const char* const HGRAPH_BASE_QUANTIZATION_TYPE = "base_quantization_type";
const char* const HGRAPH_GRAPH_MAX_DEGREE = "max_degree";
const char* const HGRAPH_BUILD_EF_CONSTRUCTION = "ef_construction";
const char* const HGRAPH_BUILD_ALPHA = "alpha";
const char* const HGRAPH_BUILD_TAU = "tau";
const char* const HGRAPH_INIT_CAPACITY = "hgraph_init_capacity";
const char* const HGRAPH_GRAPH_TYPE = "graph_type";
const char* const HGRAPH_GRAPH_STORAGE_TYPE = "graph_storage_type";
Expand Down
56 changes: 49 additions & 7 deletions src/impl/pruning_strategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@
#include "utils/lock_strategy.h"
namespace vsag {

template <EdgeSelectionParam param>
void
select_edges_by_heuristic(const DistHeapPtr& edges,
uint64_t max_size,
const FlattenInterfacePtr& flatten,
Allocator* allocator,
float alpha) {
float param_value) {
if (edges->Size() < max_size) {
return;
}
Expand All @@ -49,9 +50,21 @@ select_edges_by_heuristic(const DistHeapPtr& edges,

for (const auto& second_pair : return_list) {
float curdist = flatten->ComputePairVectors(second_pair.second, current_pair.second);
if (alpha * curdist < float_query) {
good = false;
break;

if constexpr (param == EdgeSelectionParam::ALPHA) {
if (param_value * curdist < float_query) {
good = false;
break;
}
} else {
if (curdist < (float_query - 3 * param_value)) {
good = false;
break;
}
if (float_query <= 3 * param_value) {
good = true;
break;
}
}
}
if (good) {
Expand All @@ -64,16 +77,19 @@ select_edges_by_heuristic(const DistHeapPtr& edges,
}
}

template <EdgeSelectionParam param>
InnerIdType
mutually_connect_new_element(InnerIdType cur_c,
const DistHeapPtr& top_candidates,
const GraphInterfacePtr& graph,
const FlattenInterfacePtr& flatten,
const MutexArrayPtr& neighbors_mutexes,
Allocator* allocator,
float alpha) {
float param_value) {
const size_t max_size = graph->MaximumDegree();
select_edges_by_heuristic(top_candidates, max_size, flatten, allocator, alpha);

select_edges_by_heuristic<param>(top_candidates, max_size, flatten, allocator, param_value);

if (top_candidates->Size() > max_size) {
throw VsagException(
ErrorType::INTERNAL_ERROR,
Expand Down Expand Up @@ -123,7 +139,7 @@ mutually_connect_new_element(InnerIdType cur_c,
neighbors[j]);
}

select_edges_by_heuristic(candidates, max_size, flatten, allocator, alpha);
select_edges_by_heuristic<param>(candidates, max_size, flatten, allocator, param_value);

Vector<InnerIdType> cand_neighbors(allocator);
while (not candidates->Empty()) {
Expand All @@ -136,4 +152,30 @@ mutually_connect_new_element(InnerIdType cur_c,
return next_closest_entry_point;
}

template void
select_edges_by_heuristic<EdgeSelectionParam::ALPHA>(
const DistHeapPtr&, uint64_t, const FlattenInterfacePtr&, Allocator*, float);

template void
select_edges_by_heuristic<EdgeSelectionParam::TAU>(
const DistHeapPtr&, uint64_t, const FlattenInterfacePtr&, Allocator*, float);

template InnerIdType
mutually_connect_new_element<EdgeSelectionParam::ALPHA>(InnerIdType,
const DistHeapPtr&,
const GraphInterfacePtr&,
const FlattenInterfacePtr&,
const MutexArrayPtr&,
Allocator*,
float);

template InnerIdType
mutually_connect_new_element<EdgeSelectionParam::TAU>(InnerIdType,
const DistHeapPtr&,
const GraphInterfacePtr&,
const FlattenInterfacePtr&,
const MutexArrayPtr&,
Allocator*,
float);

} // namespace vsag
36 changes: 34 additions & 2 deletions src/impl/pruning_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#pragma once

#include "inner_search_param.h"
#include "typing.h"
#include "utils/pointer_define.h"

Expand All @@ -24,20 +25,51 @@ DEFINE_POINTER(FlattenInterface);
DEFINE_POINTER(GraphInterface);
DEFINE_POINTER(MutexArray);

enum class EdgeSelectionParam { ALPHA, TAU };

template <EdgeSelectionParam Param>
void
select_edges_by_heuristic(const DistHeapPtr& edges,
uint64_t max_size,
const FlattenInterfacePtr& flatten,
Allocator* allocator,
float alpha = 1.0F);
float param_value = (Param == EdgeSelectionParam::ALPHA) ? 1.0F : 0.0F);

template <EdgeSelectionParam Param>
InnerIdType
mutually_connect_new_element(InnerIdType cur_c,
const DistHeapPtr& top_candidates,
const GraphInterfacePtr& graph,
const FlattenInterfacePtr& flatten,
const MutexArrayPtr& neighbors_mutexes,
Allocator* allocator,
float alpha = 1.0F);
float param_value = (Param == EdgeSelectionParam::ALPHA) ? 1.0F
: 0.0F);

extern template void
select_edges_by_heuristic<EdgeSelectionParam::ALPHA>(
const DistHeapPtr&, uint64_t, const FlattenInterfacePtr&, Allocator*, float);

extern template void
select_edges_by_heuristic<EdgeSelectionParam::TAU>(
const DistHeapPtr&, uint64_t, const FlattenInterfacePtr&, Allocator*, float);

extern template InnerIdType
mutually_connect_new_element<EdgeSelectionParam::ALPHA>(InnerIdType,
const DistHeapPtr&,
const GraphInterfacePtr&,
const FlattenInterfacePtr&,
const MutexArrayPtr&,
Allocator*,
float);

extern template InnerIdType
mutually_connect_new_element<EdgeSelectionParam::TAU>(InnerIdType,
const DistHeapPtr&,
const GraphInterfacePtr&,
const FlattenInterfacePtr&,
const MutexArrayPtr&,
Allocator*,
float);

} // namespace vsag
Loading
Loading