From e70e50ce936029a7bfa91341df5d8a34c96c8556 Mon Sep 17 00:00:00 2001 From: LHT129 Date: Thu, 2 Jan 2025 05:05:46 +0000 Subject: [PATCH] add estimate feature & test Signed-off-by: LHT129 --- include/vsag/index_feature.h | 2 ++ src/algorithm/hgraph.cpp | 16 +++++++++++----- src/index/hgraph_zparameters.cpp | 4 +++- tests/fixtures/fixtures.cpp | 13 +++++++++++++ tests/fixtures/fixtures.h | 3 +++ tests/test_hgraph.cpp | 21 +++++++++++++++++++++ tests/test_index.cpp | 17 +++++++++++++++++ tests/test_index.h | 5 +++++ 8 files changed, 75 insertions(+), 6 deletions(-) diff --git a/include/vsag/index_feature.h b/include/vsag/index_feature.h index fa9493c3..32be1571 100644 --- a/include/vsag/index_feature.h +++ b/include/vsag/index_feature.h @@ -57,6 +57,8 @@ enum IndexFeature { SUPPORT_SEARCH_DELETE_CONCURRENT, /**< Supports concurrent searching and deletion */ SUPPORT_ADD_SEARCH_DELETE_CONCURRENT, /**< Supports concurrent addition, searching, and deletion */ + SUPPORT_ESTIMATE_MEMORY, /**< Supports estimate memory usage by data count */ + INDEX_FEATURE_COUNT /** must be last one */ }; } // namespace vsag diff --git a/src/algorithm/hgraph.cpp b/src/algorithm/hgraph.cpp index 5ac04632..a3ebeef8 100644 --- a/src/algorithm/hgraph.cpp +++ b/src/algorithm/hgraph.cpp @@ -886,19 +886,25 @@ HGraph::resize(uint64_t new_size) { void HGraph::init_features() { // Common Init + // Build & Add feature_list_.SetFeatures({IndexFeature::SUPPORT_BUILD, IndexFeature::SUPPORT_BUILD_WITH_MULTI_THREAD, - IndexFeature::SUPPORT_ADD_AFTER_BUILD, - IndexFeature::SUPPORT_KNN_SEARCH, + IndexFeature::SUPPORT_ADD_AFTER_BUILD}); + // search + feature_list_.SetFeatures({IndexFeature::SUPPORT_KNN_SEARCH, IndexFeature::SUPPORT_RANGE_SEARCH, IndexFeature::SUPPORT_KNN_SEARCH_WITH_ID_FILTER, - IndexFeature::SUPPORT_RANGE_SEARCH_WITH_ID_FILTER, - IndexFeature::SUPPORT_SEARCH_CONCURRENT, - IndexFeature::SUPPORT_DESERIALIZE_BINARY_SET, + IndexFeature::SUPPORT_RANGE_SEARCH_WITH_ID_FILTER}); + // concurrency + feature_list_.SetFeature(IndexFeature::SUPPORT_SEARCH_CONCURRENT); + // serialize + feature_list_.SetFeatures({IndexFeature::SUPPORT_DESERIALIZE_BINARY_SET, IndexFeature::SUPPORT_DESERIALIZE_FILE, IndexFeature::SUPPORT_DESERIALIZE_READER_SET, IndexFeature::SUPPORT_SERIALIZE_BINARY_SET, IndexFeature::SUPPORT_SERIALIZE_FILE}); + // other + feature_list_.SetFeatures({IndexFeature::SUPPORT_ESTIMATE_MEMORY}); // About Train auto name = this->basic_flatten_codes_->GetQuantizerName(); diff --git a/src/index/hgraph_zparameters.cpp b/src/index/hgraph_zparameters.cpp index 6f823e17..fa1c5fef 100644 --- a/src/index/hgraph_zparameters.cpp +++ b/src/index/hgraph_zparameters.cpp @@ -62,7 +62,9 @@ static const std::string HGRAPH_PARAMS_TEMPLATE = }, "{HGRAPH_PRECISE_CODES_KEY}": { "{IO_TYPE_KEY}": "{IO_TYPE_VALUE_BLOCK_MEMORY_IO}", - "{IO_PARAMS_KEY}": {}, + "{IO_PARAMS_KEY}": { + "{BLOCK_IO_BLOCK_SIZE_KEY}": {DEFAULT_BLOCK_SIZE} + }, "codes_type": "flatten_codes", "codes_param": {}, "{QUANTIZATION_TYPE_KEY}": "{QUANTIZATION_TYPE_VALUE_FP32}", diff --git a/tests/fixtures/fixtures.cpp b/tests/fixtures/fixtures.cpp index c7e71f9e..e2f6bdc8 100644 --- a/tests/fixtures/fixtures.cpp +++ b/tests/fixtures/fixtures.cpp @@ -15,6 +15,9 @@ #include "fixtures.h" +#include +#include + #include #include #include @@ -301,5 +304,15 @@ SplitString(const std::string& s, char delimiter) { return tokens; } +uint64_t +GetMemoryUsageByte() { + malloc_trim(0); + sleep(1); + std::ifstream statm("/proc/self/statm"); + uint64_t sizes[6]; + statm >> sizes[0] >> sizes[1] >> sizes[2] >> sizes[3] >> sizes[4] >> sizes[5]; + size_t pageSize = 4096; + return sizes[1] * pageSize; +} } // namespace fixtures diff --git a/tests/fixtures/fixtures.h b/tests/fixtures/fixtures.h index dc1e388c..bf704893 100644 --- a/tests/fixtures/fixtures.h +++ b/tests/fixtures/fixtures.h @@ -228,4 +228,7 @@ GetFileSize(const std::string& filename); std::vector SplitString(const std::string& s, char delimiter); + +uint64_t +GetMemoryUsageByte(); } // Namespace fixtures diff --git a/tests/test_hgraph.cpp b/tests/test_hgraph.cpp index e6967fc3..eea93ac6 100644 --- a/tests/test_hgraph.cpp +++ b/tests/test_hgraph.cpp @@ -444,3 +444,24 @@ TEST_CASE_PERSISTENT_FIXTURE(fixtures::HgraphTestIndex, "HGraph Duplicate Build" } } } + +TEST_CASE_PERSISTENT_FIXTURE(fixtures::HgraphTestIndex, "HGraph Estimate Memory", "[ft][hgraph]") { + auto origin_size = vsag::Options::Instance().block_size_limit(); + auto size = GENERATE(1024 * 1024 * 2); + auto metric_type = GENERATE("l2", "ip", "cosine"); + + const std::string name = "hgraph"; + auto search_param = fmt::format(search_param_tmp, 200); + uint64_t estimate_count = 3000; + for (auto& dim : dims) { + for (auto& [base_quantization_str, recall] : test_cases) { + vsag::Options::Instance().set_block_size_limit(size); + auto param = + GenerateHGraphBuildParametersString(metric_type, dim, base_quantization_str); + auto dataset = pool.GetDatasetAndCreate(dim, estimate_count, metric_type); + + TestEstimateMemory(name, param, dataset); + vsag::Options::Instance().set_block_size_limit(origin_size); + } + } +} diff --git a/tests/test_index.cpp b/tests/test_index.cpp index 378291a7..a036f5f0 100644 --- a/tests/test_index.cpp +++ b/tests/test_index.cpp @@ -537,5 +537,22 @@ TestIndex::TestDuplicateAdd(const TestIndex::IndexPtr& index, const TestDatasetP REQUIRE(add_index_2.has_value()); check_func(add_index_2.value()); } +void +TestIndex::TestEstimateMemory(const std::string& index_name, + const std::string& build_param, + const TestDatasetPtr& dataset) { + auto init_memory = fixtures::GetMemoryUsageByte(); + auto index = TestFactory(index_name, build_param, /*expect_success= */ true); + REQUIRE(index->GetNumElements() == 0); + if (index->CheckFeature(vsag::SUPPORT_ESTIMATE_MEMORY)) { + auto data_size = dataset->base_->GetNumElements(); + auto estimate_memory = index->EstimateMemory(data_size); + index->Build(dataset->base_); + auto end_memory = fixtures::GetMemoryUsageByte(); + auto real_memory = end_memory - init_memory; + REQUIRE(estimate_memory >= static_cast(real_memory)); + REQUIRE(estimate_memory <= static_cast(real_memory + 5 * 1024 * 1024)); + } +} } // namespace fixtures \ No newline at end of file diff --git a/tests/test_index.h b/tests/test_index.h index ba6c017e..20e943ac 100644 --- a/tests/test_index.h +++ b/tests/test_index.h @@ -136,6 +136,11 @@ class TestIndex { bool expected_success = true); static void TestDuplicateAdd(const IndexPtr& index, const TestDatasetPtr& dataset); + + static void + TestEstimateMemory(const std::string& index_name, + const std::string& build_param, + const TestDatasetPtr& dataset); }; } // namespace fixtures