-
Notifications
You must be signed in to change notification settings - Fork 80
test: add search unrelated parameter tests #1455
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 0.16
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,14 +27,28 @@ | |
| #include "vsag/vsag.h" | ||
|
|
||
| const std::string tmp_dir = "/tmp/"; | ||
|
|
||
| namespace fixtures { | ||
|
|
||
| class DiskANNTestResource { | ||
| public: | ||
| std::vector<int> dims; | ||
| std::vector<std::pair<std::string, float>> test_cases; | ||
| std::vector<std::string> metric_types; | ||
| uint64_t base_count; | ||
| }; | ||
|
|
||
| using DiskANNResourcePtr = std::shared_ptr<DiskANNTestResource>; | ||
| class DiskANNTestIndex : public fixtures::TestIndex { | ||
| public: | ||
| static TestDatasetPool pool; | ||
| static std::string | ||
| GenerateDiskANNBuildParametersString(const std::string& metric_type, | ||
| int64_t dim, | ||
| bool use_bsa = false); | ||
|
|
||
| static DiskANNResourcePtr | ||
| GetResource(bool sample = true); | ||
|
|
||
| static constexpr auto search_param_template = R"( | ||
| {{ | ||
| "diskann": {{ | ||
|
|
@@ -47,9 +61,38 @@ class DiskANNTestIndex : public fixtures::TestIndex { | |
| }} | ||
| )"; | ||
|
|
||
| constexpr static uint64_t base_count = 1000; | ||
| static TestDatasetPool pool; | ||
| static std::vector<int> dims; | ||
| static uint64_t base_count; | ||
| static const std::string name; | ||
| static const std::vector<std::pair<std::string, float>> all_test_cases; | ||
| }; | ||
| using DiskANNTestIndexPtr = std::shared_ptr<DiskANNTestIndex>; | ||
|
|
||
| TestDatasetPool DiskANNTestIndex::pool{}; | ||
| std::vector<int> DiskANNTestIndex::dims = fixtures::get_common_used_dims(2, RandomValue(0, 999)); | ||
| uint64_t DiskANNTestIndex::base_count = 1200; | ||
| const std::string DiskANNTestIndex::name = "diskann"; | ||
| const std::vector<std::pair<std::string, float>> DiskANNTestIndex::all_test_cases = { | ||
| {"fp32", 0.99}, | ||
| }; | ||
|
|
||
| DiskANNResourcePtr | ||
| DiskANNTestIndex::GetResource(bool sample) { | ||
| auto resource = std::make_shared<DiskANNTestResource>(); | ||
| if (sample) { | ||
| resource->dims = fixtures::get_common_used_dims(1, RandomValue(0, 999)); | ||
| resource->test_cases = fixtures::RandomSelect(DiskANNTestIndex::all_test_cases, 3); | ||
| resource->metric_types = fixtures::RandomSelect<std::string>({"ip", "l2", "cosine"}, 1); | ||
| resource->base_count = DiskANNTestIndex::base_count; | ||
| } else { | ||
| resource->dims = fixtures::get_common_used_dims(); | ||
| resource->test_cases = DiskANNTestIndex::all_test_cases; | ||
| resource->metric_types = {"ip", "l2", "cosine"}; | ||
| resource->base_count = DiskANNTestIndex::base_count * 10; | ||
| } | ||
| return resource; | ||
| } | ||
|
|
||
| std::string | ||
| DiskANNTestIndex::GenerateDiskANNBuildParametersString(const std::string& metric_type, | ||
|
|
@@ -89,6 +132,7 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann build test", "[ft][index][ | |
|
|
||
| TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index][diskann]") { | ||
| const std::vector<int> dims = {736, 1536, 2048, 2560, 3072}; | ||
| const std::vector<int> max_degrees = {16, 16, 32, 32, 64}; | ||
| auto metric_type = GENERATE("l2", "ip"); | ||
| const std::string name = "diskann"; | ||
| constexpr auto build_parameter_json = R"( | ||
|
|
@@ -97,7 +141,7 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index] | |
| "metric_type": "{}", | ||
| "dim": {}, | ||
| "diskann": {{ | ||
| "max_degree": 16, | ||
| "max_degree": {}, | ||
| "ef_construction": 200, | ||
| "pq_dims": {}, | ||
| "pq_sample_rate": 0.5, | ||
|
|
@@ -115,8 +159,12 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index] | |
| }} | ||
| }} | ||
| )"; | ||
| for (auto dim : dims) { | ||
| auto build_parameters_str = fmt::format(build_parameter_json, metric_type, dim, dim / 4); | ||
|
|
||
| for (uint64_t i = 0; i < dims.size(); ++i) { | ||
| auto dim = dims[i]; | ||
| auto max_degree = max_degrees[i]; | ||
| auto build_parameters_str = | ||
| fmt::format(build_parameter_json, metric_type, dim, max_degree, dim / 4); | ||
| auto search_param = fmt::format(search_param_template, dim / 4); | ||
| auto param = GenerateDiskANNBuildParametersString(metric_type, dim); | ||
| auto index = TestFactory(name, param, true); | ||
|
|
@@ -125,7 +173,7 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index] | |
| TestKnnSearch(index, dataset, search_param, 0.90, true); | ||
| TestRangeSearch(index, dataset, search_param, 0.90, 10, true); | ||
| TestRangeSearch(index, dataset, search_param, 0.45, 5, true); | ||
| TestFilterSearch(index, dataset, search_param, 0.90, true); | ||
| TestFilterSearch(index, dataset, search_param, 0.80, true); | ||
| REQUIRE(index->GetIndexType() == vsag::IndexType::DISKANN); | ||
| } | ||
| } | ||
|
|
@@ -745,3 +793,56 @@ TEST_CASE("split building process", "[ft][diskann]") { | |
| std::cout << "Recall: " << recall_full << std::endl; | ||
| REQUIRE(recall_full == recall_partial); | ||
| } | ||
|
|
||
| static void | ||
| TestDiskANNSearchUnrelatedParameter(const fixtures::DiskANNTestIndexPtr& test_index, | ||
| const fixtures::DiskANNResourcePtr& resource) { | ||
| using namespace fixtures; | ||
| auto origin_size = vsag::Options::Instance().block_size_limit(); | ||
| auto size = GENERATE(1024 * 1024 * 2); | ||
| constexpr const char* search_param = R"({ | ||
| "diskann": { | ||
| "ef_search": 200, | ||
| "io_limit": 200, | ||
| "beam_search": 4, | ||
| "-------unrelated parameters below-------": true, | ||
| "scan_buckets_count": 10 | ||
| } | ||
| })"; | ||
|
|
||
| for (auto metric_type : resource->metric_types) { | ||
| for (auto dim : resource->dims) { | ||
| for (auto& [base_quantization_str, recall] : resource->test_cases) { | ||
| INFO(fmt::format("metric_type: {}, dim: {}, base_quantization_str: {}, recall: {}", | ||
| metric_type, | ||
| dim, | ||
| base_quantization_str, | ||
| recall)); | ||
| vsag::Options::Instance().set_block_size_limit(size); | ||
| auto param = | ||
| DiskANNTestIndex::GenerateDiskANNBuildParametersString(metric_type, dim); | ||
| auto index = TestIndex::TestFactory(test_index->name, param, true); | ||
| auto dataset = DiskANNTestIndex::pool.GetDatasetAndCreate( | ||
| dim, resource->base_count, metric_type); | ||
| TestIndex::TestBuildIndex(index, dataset, true); | ||
| TestIndex::TestSearchUnrelatedParameter(index, dataset, search_param); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| TEST_CASE_PERSISTENT_FIXTURE(fixtures::DiskANNTestIndex, | ||
| "(PR) DiskANN SearchUnrelatedParameter", | ||
| "[ft][diskann][pr]") { | ||
| auto test_index = std::make_shared<DiskANNTestIndex>(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need pr tag
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| auto resource = test_index->GetResource(true); | ||
| TestDiskANNSearchUnrelatedParameter(test_index, resource); | ||
| } | ||
|
|
||
| TEST_CASE_PERSISTENT_FIXTURE(fixtures::DiskANNTestIndex, | ||
| "(Daily) DiskANN SearchUnrelatedParameter", | ||
| "[ft][diskann][daily]") { | ||
| auto test_index = std::make_shared<DiskANNTestIndex>(); | ||
| auto resource = test_index->GetResource(false); | ||
| TestDiskANNSearchUnrelatedParameter(test_index, resource); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need daily tag
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,20 +24,25 @@ | |
| #include "vsag/vsag.h" | ||
|
|
||
| namespace fixtures { | ||
|
|
||
| class HNSWTestResource { | ||
| public: | ||
| std::vector<int> dims; | ||
| std::vector<std::pair<std::string, float>> test_cases; | ||
| std::vector<std::string> metric_types; | ||
| uint64_t base_count; | ||
| }; | ||
|
|
||
| using HNSWResourcePtr = std::shared_ptr<HNSWTestResource>; | ||
| class HNSWTestIndex : public fixtures::TestIndex { | ||
| public: | ||
| static std::string | ||
| GenerateHNSWBuildParametersString(const std::string& metric_type, | ||
| int64_t dim, | ||
| bool use_static = false); | ||
|
|
||
| static TestDatasetPool pool; | ||
|
|
||
| static std::vector<int> dims; | ||
|
|
||
| static std::vector<float> valid_ratios; | ||
|
|
||
| constexpr static uint64_t base_count = 1000; | ||
| static HNSWResourcePtr | ||
| GetResource(bool sample = true); | ||
|
|
||
| constexpr static const char* search_param_tmp = R"( | ||
| {{ | ||
|
|
@@ -46,11 +51,41 @@ class HNSWTestIndex : public fixtures::TestIndex { | |
| "skip_ratio": 0.3 | ||
| }} | ||
| }})"; | ||
|
|
||
| static TestDatasetPool pool; | ||
| static std::vector<int> dims; | ||
| static std::vector<float> valid_ratios; | ||
| static uint64_t base_count; | ||
| static const std::string name; | ||
| static const std::vector<std::pair<std::string, float>> all_test_cases; | ||
| }; | ||
| using HNSWTestIndexPtr = std::shared_ptr<HNSWTestIndex>; | ||
|
|
||
| TestDatasetPool HNSWTestIndex::pool{}; | ||
| std::vector<int> HNSWTestIndex::dims = fixtures::get_common_used_dims(2, RandomValue(0, 999)); | ||
| std::vector<float> HNSWTestIndex::valid_ratios{0.01, 0.05, 0.99}; | ||
| uint64_t HNSWTestIndex::base_count = 1200; | ||
| const std::string HNSWTestIndex::name = "hnsw"; | ||
| const std::vector<std::pair<std::string, float>> HNSWTestIndex::all_test_cases = { | ||
| {"fp32", 0.99}, | ||
| }; | ||
|
|
||
| HNSWResourcePtr | ||
| HNSWTestIndex::GetResource(bool sample) { | ||
| auto resource = std::make_shared<HNSWTestResource>(); | ||
| if (sample) { | ||
| resource->dims = fixtures::get_common_used_dims(1, RandomValue(0, 999)); | ||
| resource->test_cases = fixtures::RandomSelect(HNSWTestIndex::all_test_cases, 3); | ||
| resource->metric_types = fixtures::RandomSelect<std::string>({"ip", "l2", "cosine"}, 1); | ||
| resource->base_count = HNSWTestIndex::base_count; | ||
| } else { | ||
| resource->dims = fixtures::get_common_used_dims(); | ||
| resource->test_cases = HNSWTestIndex::all_test_cases; | ||
| resource->metric_types = {"ip", "l2", "cosine"}; | ||
| resource->base_count = HNSWTestIndex::base_count * 10; | ||
| } | ||
| return resource; | ||
| } | ||
|
|
||
| std::string | ||
| HNSWTestIndex::GenerateHNSWBuildParametersString(const std::string& metric_type, | ||
|
|
@@ -682,3 +717,57 @@ TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex, | |
| auto result_immutable = index->SetImmutable(); | ||
| REQUIRE_FALSE(result_immutable.has_value()); | ||
| } | ||
|
|
||
| static void | ||
| TestHNSWSearchUnrelatedParameter(const fixtures::HNSWTestIndexPtr& test_index, | ||
| const fixtures::HNSWResourcePtr& resource) { | ||
| using namespace fixtures; | ||
| auto origin_size = vsag::Options::Instance().block_size_limit(); | ||
| auto size = GENERATE(1024 * 1024 * 2); | ||
| constexpr const char* search_param = R"({ | ||
| "hnsw": { | ||
| "ef_search": 200, | ||
| "-------unrelated parameters below-------": true, | ||
| "use_reorder": true, | ||
| "scan_buckets_count": 10 | ||
| }, | ||
| "diskann": { | ||
| "parameters used in other index": "hnsw" | ||
| } | ||
| })"; | ||
|
|
||
| for (auto metric_type : resource->metric_types) { | ||
| for (auto dim : resource->dims) { | ||
| for (auto& [base_quantization_str, recall] : resource->test_cases) { | ||
| INFO(fmt::format("metric_type: {}, dim: {}, base_quantization_str: {}, recall: {}", | ||
| metric_type, | ||
| dim, | ||
| base_quantization_str, | ||
| recall)); | ||
| vsag::Options::Instance().set_block_size_limit(size); | ||
| auto param = HNSWTestIndex::GenerateHNSWBuildParametersString(metric_type, dim); | ||
| auto index = TestIndex::TestFactory(test_index->name, param, true); | ||
| auto dataset = | ||
| HNSWTestIndex::pool.GetDatasetAndCreate(dim, resource->base_count, metric_type); | ||
| TestIndex::TestBuildIndex(index, dataset, true); | ||
| TestIndex::TestSearchUnrelatedParameter(index, dataset, search_param); | ||
| } | ||
|
Comment on lines
+741
to
+754
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The loop over |
||
| } | ||
| } | ||
| } | ||
|
|
||
| TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex, | ||
| "(PR) HNSW SearchUnrelatedParameter", | ||
| "[ft][hnsw][pr]") { | ||
| auto test_index = std::make_shared<HNSWTestIndex>(); | ||
| auto resource = test_index->GetResource(true); | ||
| TestHNSWSearchUnrelatedParameter(test_index, resource); | ||
| } | ||
|
|
||
| TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex, | ||
| "(Daily) HNSW SearchUnrelatedParameter", | ||
| "[ft][hnsw][daily]") { | ||
| auto test_index = std::make_shared<HNSWTestIndex>(); | ||
| auto resource = test_index->GetResource(false); | ||
| TestHNSWSearchUnrelatedParameter(test_index, resource); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The loop over
resource->test_casesis redundant because the variablesbase_quantization_strandrecallare only used for logging. TheGenerateDiskANNBuildParametersStringfunction doesn't usebase_quantization_str, so the test logic inside the loop is identical for each iteration. To avoid redundant test executions, especially if more test cases are added later, consider moving the test logic outside of this loop.