Skip to content

Commit

Permalink
add estimate feature & test
Browse files Browse the repository at this point in the history
Signed-off-by: LHT129 <[email protected]>
  • Loading branch information
LHT129 committed Jan 2, 2025
1 parent 3534971 commit 82b2041
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 6 deletions.
2 changes: 2 additions & 0 deletions include/vsag/index_feature.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ enum IndexFeature {
SUPPORT_SEARCH_DELETE_CONCURRENT, /**< Supports concurrent searching and deletion */
SUPPORT_ADD_SEARCH_DELETE_CONCURRENT, /**< Supports concurrent addition, searching, and deletion */

SUPPORT_ESTIMATE_MEMORY, /**< Supports estimate memory usage by data count */

INDEX_FEATURE_COUNT /** must be last one */
};
} // namespace vsag
16 changes: 11 additions & 5 deletions src/algorithm/hgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -886,19 +886,25 @@ HGraph::resize(uint64_t new_size) {
void
HGraph::init_features() {
// Common Init
// Build & Add
feature_list_.SetFeatures({IndexFeature::SUPPORT_BUILD,
IndexFeature::SUPPORT_BUILD_WITH_MULTI_THREAD,
IndexFeature::SUPPORT_ADD_AFTER_BUILD,
IndexFeature::SUPPORT_KNN_SEARCH,
IndexFeature::SUPPORT_ADD_AFTER_BUILD});
// search
feature_list_.SetFeatures({IndexFeature::SUPPORT_KNN_SEARCH,
IndexFeature::SUPPORT_RANGE_SEARCH,
IndexFeature::SUPPORT_KNN_SEARCH_WITH_ID_FILTER,
IndexFeature::SUPPORT_RANGE_SEARCH_WITH_ID_FILTER,
IndexFeature::SUPPORT_SEARCH_CONCURRENT,
IndexFeature::SUPPORT_DESERIALIZE_BINARY_SET,
IndexFeature::SUPPORT_RANGE_SEARCH_WITH_ID_FILTER});
// concurrency
feature_list_.SetFeature(IndexFeature::SUPPORT_SEARCH_CONCURRENT);
// serialize
feature_list_.SetFeatures({IndexFeature::SUPPORT_DESERIALIZE_BINARY_SET,
IndexFeature::SUPPORT_DESERIALIZE_FILE,
IndexFeature::SUPPORT_DESERIALIZE_READER_SET,
IndexFeature::SUPPORT_SERIALIZE_BINARY_SET,
IndexFeature::SUPPORT_SERIALIZE_FILE});
// other
feature_list_.SetFeatures({IndexFeature::SUPPORT_ESTIMATE_MEMORY});

// About Train
auto name = this->basic_flatten_codes_->GetQuantizerName();
Expand Down
4 changes: 3 additions & 1 deletion src/index/hgraph_zparameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ static const std::string HGRAPH_PARAMS_TEMPLATE =
},
"{HGRAPH_PRECISE_CODES_KEY}": {
"{IO_TYPE_KEY}": "{IO_TYPE_VALUE_BLOCK_MEMORY_IO}",
"{IO_PARAMS_KEY}": {},
"{IO_PARAMS_KEY}": {
"{BLOCK_IO_BLOCK_SIZE_KEY}": {DEFAULT_BLOCK_SIZE}
},
"codes_type": "flatten_codes",
"codes_param": {},
"{QUANTIZATION_TYPE_KEY}": "{QUANTIZATION_TYPE_VALUE_FP32}",
Expand Down
13 changes: 13 additions & 0 deletions tests/fixtures/fixtures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

#include "fixtures.h"

#include <malloc.h>
#include <unistd.h>

#include <cstdint>
#include <random>
#include <string>
Expand Down Expand Up @@ -301,5 +304,15 @@ SplitString(const std::string& s, char delimiter) {

return tokens;
}
uint64_t
GetMemoryUsageByte() {
malloc_trim(0);
sleep(1);
std::ifstream statm("/proc/self/statm");
uint64_t sizes[6];
statm >> sizes[0] >> sizes[1] >> sizes[2] >> sizes[3] >> sizes[4] >> sizes[5];
size_t pageSize = 4096;
return sizes[1] * pageSize;
}

} // namespace fixtures
3 changes: 3 additions & 0 deletions tests/fixtures/fixtures.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,4 +228,7 @@ GetFileSize(const std::string& filename);

std::vector<std::string>
SplitString(const std::string& s, char delimiter);

uint64_t
GetMemoryUsageByte();
} // Namespace fixtures
21 changes: 21 additions & 0 deletions tests/test_hgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,3 +444,24 @@ TEST_CASE_PERSISTENT_FIXTURE(fixtures::HgraphTestIndex, "HGraph Duplicate Build"
}
}
}

TEST_CASE_PERSISTENT_FIXTURE(fixtures::HgraphTestIndex, "HGraph Estimate Memory", "[ft][hgraph]") {
auto origin_size = vsag::Options::Instance().block_size_limit();
auto size = GENERATE(1024 * 1024 * 2);
auto metric_type = GENERATE("l2", "ip", "cosine");

const std::string name = "hgraph";
auto search_param = fmt::format(search_param_tmp, 200);
uint64_t estimate_count = 3000;
for (auto& dim : dims) {
for (auto& [base_quantization_str, recall] : test_cases) {
vsag::Options::Instance().set_block_size_limit(size);
auto param =
GenerateHGraphBuildParametersString(metric_type, dim, base_quantization_str);
auto dataset = pool.GetDatasetAndCreate(dim, estimate_count, metric_type);

TestEstimateMemory(name, param, dataset);
vsag::Options::Instance().set_block_size_limit(origin_size);
}
}
}
17 changes: 17 additions & 0 deletions tests/test_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,5 +537,22 @@ TestIndex::TestDuplicateAdd(const TestIndex::IndexPtr& index, const TestDatasetP
REQUIRE(add_index_2.has_value());
check_func(add_index_2.value());
}
void
TestIndex::TestEstimateMemory(const std::string& index_name,
const std::string& build_param,
const TestDatasetPtr& dataset) {
auto init_memory = fixtures::GetMemoryUsageByte();
auto index = TestFactory(index_name, build_param, /*expect_success= */ true);
REQUIRE(index->GetNumElements() == 0);
if (index->CheckFeature(vsag::SUPPORT_ESTIMATE_MEMORY)) {
auto data_size = dataset->base_->GetNumElements();
auto estimate_memory = index->EstimateMemory(data_size);
index->Build(dataset->base_);
auto end_memory = fixtures::GetMemoryUsageByte();
auto real_memory = end_memory - init_memory;
REQUIRE(estimate_memory >= static_cast<uint64_t>(real_memory));
REQUIRE(estimate_memory <= static_cast<uint64_t>(real_memory + 5 * 1024 * 1024));
}
}

} // namespace fixtures
5 changes: 5 additions & 0 deletions tests/test_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ class TestIndex {
bool expected_success = true);
static void
TestDuplicateAdd(const IndexPtr& index, const TestDatasetPtr& dataset);

static void
TestEstimateMemory(const std::string& index_name,
const std::string& build_param,
const TestDatasetPtr& dataset);
};

} // namespace fixtures

0 comments on commit 82b2041

Please sign in to comment.