diff --git a/.github/workflows/asan_build_and_test.yml b/.github/workflows/asan_build_and_test.yml index c0230619b..9ebfaf6b2 100644 --- a/.github/workflows/asan_build_and_test.yml +++ b/.github/workflows/asan_build_and_test.yml @@ -29,7 +29,7 @@ jobs: save: ${{ github.event_name != 'pull_request' }} key: build-${{ hashFiles('./CMakeLists.txt') }}-${{ hashFiles('./.circleci/fresh_ci_cache.commit') }} - name: Make Asan - run: export CMAKE_GENERATOR="Ninja"; make asan + run: export CMAKE_GENERATOR="Ninja"; export VSAG_ENABLE_TOOLS=OFF; make asan - name: Clean run: | find ./build -type f -name "*.o" -exec rm -f {} + diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index aa45cc360..8fa4e31b1 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -16,7 +16,11 @@ jobs: cancel-in-progress: ${{ github.event_name == 'pull_request' }} container: image: vsaglib/vsag:ci-x86 + volumes: + - /opt:/useless steps: + - name: Free Disk Space (Ubuntu) + run: rm -rf /useless/* - uses: actions/checkout@v4 with: fetch-depth: '0' diff --git a/tests/test_diskann.cpp b/tests/test_diskann.cpp index 7e552efc5..81b6fb664 100644 --- a/tests/test_diskann.cpp +++ b/tests/test_diskann.cpp @@ -27,14 +27,28 @@ #include "vsag/vsag.h" const std::string tmp_dir = "/tmp/"; + namespace fixtures { + +class DiskANNTestResource { +public: + std::vector dims; + std::vector> test_cases; + std::vector metric_types; + uint64_t base_count; +}; + +using DiskANNResourcePtr = std::shared_ptr; class DiskANNTestIndex : public fixtures::TestIndex { public: - static TestDatasetPool pool; static std::string GenerateDiskANNBuildParametersString(const std::string& metric_type, int64_t dim, bool use_bsa = false); + + static DiskANNResourcePtr + GetResource(bool sample = true); + static constexpr auto search_param_template = R"( {{ "diskann": {{ @@ -47,9 +61,38 @@ class DiskANNTestIndex : public fixtures::TestIndex { }} )"; - constexpr static uint64_t base_count = 1000; + static TestDatasetPool pool; + static std::vector dims; + static uint64_t base_count; + static const std::string name; + static const std::vector> all_test_cases; }; +using DiskANNTestIndexPtr = std::shared_ptr; + TestDatasetPool DiskANNTestIndex::pool{}; +std::vector DiskANNTestIndex::dims = fixtures::get_common_used_dims(2, RandomValue(0, 999)); +uint64_t DiskANNTestIndex::base_count = 1200; +const std::string DiskANNTestIndex::name = "diskann"; +const std::vector> DiskANNTestIndex::all_test_cases = { + {"fp32", 0.99}, +}; + +DiskANNResourcePtr +DiskANNTestIndex::GetResource(bool sample) { + auto resource = std::make_shared(); + if (sample) { + resource->dims = fixtures::get_common_used_dims(1, RandomValue(0, 999)); + resource->test_cases = fixtures::RandomSelect(DiskANNTestIndex::all_test_cases, 3); + resource->metric_types = fixtures::RandomSelect({"ip", "l2", "cosine"}, 1); + resource->base_count = DiskANNTestIndex::base_count; + } else { + resource->dims = fixtures::get_common_used_dims(); + resource->test_cases = DiskANNTestIndex::all_test_cases; + resource->metric_types = {"ip", "l2", "cosine"}; + resource->base_count = DiskANNTestIndex::base_count * 10; + } + return resource; +} std::string DiskANNTestIndex::GenerateDiskANNBuildParametersString(const std::string& metric_type, @@ -89,6 +132,7 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann build test", "[ft][index][ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index][diskann]") { const std::vector dims = {736, 1536, 2048, 2560, 3072}; + const std::vector max_degrees = {16, 16, 32, 32, 64}; auto metric_type = GENERATE("l2", "ip"); const std::string name = "diskann"; constexpr auto build_parameter_json = R"( @@ -97,7 +141,7 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index] "metric_type": "{}", "dim": {}, "diskann": {{ - "max_degree": 16, + "max_degree": {}, "ef_construction": 200, "pq_dims": {}, "pq_sample_rate": 0.5, @@ -115,8 +159,12 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index] }} }} )"; - for (auto dim : dims) { - auto build_parameters_str = fmt::format(build_parameter_json, metric_type, dim, dim / 4); + + for (uint64_t i = 0; i < dims.size(); ++i) { + auto dim = dims[i]; + auto max_degree = max_degrees[i]; + auto build_parameters_str = + fmt::format(build_parameter_json, metric_type, dim, max_degree, dim / 4); auto search_param = fmt::format(search_param_template, dim / 4); auto param = GenerateDiskANNBuildParametersString(metric_type, dim); auto index = TestFactory(name, param, true); @@ -125,7 +173,7 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index] TestKnnSearch(index, dataset, search_param, 0.90, true); TestRangeSearch(index, dataset, search_param, 0.90, 10, true); TestRangeSearch(index, dataset, search_param, 0.45, 5, true); - TestFilterSearch(index, dataset, search_param, 0.90, true); + TestFilterSearch(index, dataset, search_param, 0.80, true); REQUIRE(index->GetIndexType() == vsag::IndexType::DISKANN); } } @@ -745,3 +793,56 @@ TEST_CASE("split building process", "[ft][diskann]") { std::cout << "Recall: " << recall_full << std::endl; REQUIRE(recall_full == recall_partial); } + +static void +TestDiskANNSearchUnrelatedParameter(const fixtures::DiskANNTestIndexPtr& test_index, + const fixtures::DiskANNResourcePtr& resource) { + using namespace fixtures; + auto origin_size = vsag::Options::Instance().block_size_limit(); + auto size = GENERATE(1024 * 1024 * 2); + constexpr const char* search_param = R"({ + "diskann": { + "ef_search": 200, + "io_limit": 200, + "beam_search": 4, + "-------unrelated parameters below-------": true, + "scan_buckets_count": 10 + } + })"; + + for (auto metric_type : resource->metric_types) { + for (auto dim : resource->dims) { + for (auto& [base_quantization_str, recall] : resource->test_cases) { + INFO(fmt::format("metric_type: {}, dim: {}, base_quantization_str: {}, recall: {}", + metric_type, + dim, + base_quantization_str, + recall)); + vsag::Options::Instance().set_block_size_limit(size); + auto param = + DiskANNTestIndex::GenerateDiskANNBuildParametersString(metric_type, dim); + auto index = TestIndex::TestFactory(test_index->name, param, true); + auto dataset = DiskANNTestIndex::pool.GetDatasetAndCreate( + dim, resource->base_count, metric_type); + TestIndex::TestBuildIndex(index, dataset, true); + TestIndex::TestSearchUnrelatedParameter(index, dataset, search_param); + } + } + } +} + +TEST_CASE_PERSISTENT_FIXTURE(fixtures::DiskANNTestIndex, + "(PR) DiskANN SearchUnrelatedParameter", + "[ft][diskann][pr]") { + auto test_index = std::make_shared(); + auto resource = test_index->GetResource(true); + TestDiskANNSearchUnrelatedParameter(test_index, resource); +} + +TEST_CASE_PERSISTENT_FIXTURE(fixtures::DiskANNTestIndex, + "(Daily) DiskANN SearchUnrelatedParameter", + "[ft][diskann][daily]") { + auto test_index = std::make_shared(); + auto resource = test_index->GetResource(false); + TestDiskANNSearchUnrelatedParameter(test_index, resource); +} diff --git a/tests/test_hgraph.cpp b/tests/test_hgraph.cpp index 3220d7999..8167321e6 100644 --- a/tests/test_hgraph.cpp +++ b/tests/test_hgraph.cpp @@ -1948,3 +1948,57 @@ TEST_CASE("[Daily]HGraph Disk IO Type Index", "[ft][hgraph][serialization][daily auto resource = test_index->GetResource(false); TestHGraphDiskIOType(test_index, resource); } + +static void +TestHGraphSearchUnrelatedParameter(const fixtures::HGraphTestIndexPtr& test_index, + const fixtures::HGraphResourcePtr& resource) { + using namespace fixtures; + auto origin_size = vsag::Options::Instance().block_size_limit(); + auto size = GENERATE(1024 * 1024 * 2); + constexpr const char* search_param = R"({ + "hgraph": { + "ef_search": 200, + "-------unrelated parameters below-------": true, + "io_limit": 200, + "beam_search": 4, + "scan_buckets_count": 10 + } + })"; + + for (auto metric_type : resource->metric_types) { + for (auto dim : resource->dims) { + for (auto& [base_quantization_str, recall] : resource->test_cases) { + INFO(fmt::format("metric_type: {}, dim: {}, base_quantization_str: {}, recall: {}", + metric_type, + dim, + base_quantization_str, + recall)); + vsag::Options::Instance().set_block_size_limit(size); + HGraphTestIndex::HGraphBuildParam build_param( + metric_type, dim, base_quantization_str); + auto param = HGraphTestIndex::GenerateHGraphBuildParametersString(build_param); + auto index = TestIndex::TestFactory(test_index->name, param, true); + auto dataset = HGraphTestIndex::pool.GetDatasetAndCreate( + dim, resource->base_count, metric_type); + TestIndex::TestBuildIndex(index, dataset, true); + TestIndex::TestSearchUnrelatedParameter(index, dataset, search_param); + } + } + } +} + +TEST_CASE_PERSISTENT_FIXTURE(fixtures::HGraphTestIndex, + "(PR) HGraph SearchUnrelatedParameter", + "[ft][hgraph][pr]") { + auto test_index = std::make_shared(); + auto resource = test_index->GetResource(true); + TestHGraphSearchUnrelatedParameter(test_index, resource); +} + +TEST_CASE_PERSISTENT_FIXTURE(fixtures::HGraphTestIndex, + "(Daily) HGraph SearchUnrelatedParameter", + "[ft][hgraph][daily]") { + auto test_index = std::make_shared(); + auto resource = test_index->GetResource(false); + TestHGraphSearchUnrelatedParameter(test_index, resource); +} diff --git a/tests/test_hnsw.cpp b/tests/test_hnsw.cpp index 4f2d51482..6ab89477a 100644 --- a/tests/test_hnsw.cpp +++ b/tests/test_hnsw.cpp @@ -24,6 +24,16 @@ #include "vsag/vsag.h" namespace fixtures { + +class HNSWTestResource { +public: + std::vector dims; + std::vector> test_cases; + std::vector metric_types; + uint64_t base_count; +}; + +using HNSWResourcePtr = std::shared_ptr; class HNSWTestIndex : public fixtures::TestIndex { public: static std::string @@ -31,13 +41,8 @@ class HNSWTestIndex : public fixtures::TestIndex { int64_t dim, bool use_static = false); - static TestDatasetPool pool; - - static std::vector dims; - - static std::vector valid_ratios; - - constexpr static uint64_t base_count = 1000; + static HNSWResourcePtr + GetResource(bool sample = true); constexpr static const char* search_param_tmp = R"( {{ @@ -46,11 +51,41 @@ class HNSWTestIndex : public fixtures::TestIndex { "skip_ratio": 0.3 }} }})"; + + static TestDatasetPool pool; + static std::vector dims; + static std::vector valid_ratios; + static uint64_t base_count; + static const std::string name; + static const std::vector> all_test_cases; }; +using HNSWTestIndexPtr = std::shared_ptr; TestDatasetPool HNSWTestIndex::pool{}; std::vector HNSWTestIndex::dims = fixtures::get_common_used_dims(2, RandomValue(0, 999)); std::vector HNSWTestIndex::valid_ratios{0.01, 0.05, 0.99}; +uint64_t HNSWTestIndex::base_count = 1200; +const std::string HNSWTestIndex::name = "hnsw"; +const std::vector> HNSWTestIndex::all_test_cases = { + {"fp32", 0.99}, +}; + +HNSWResourcePtr +HNSWTestIndex::GetResource(bool sample) { + auto resource = std::make_shared(); + if (sample) { + resource->dims = fixtures::get_common_used_dims(1, RandomValue(0, 999)); + resource->test_cases = fixtures::RandomSelect(HNSWTestIndex::all_test_cases, 3); + resource->metric_types = fixtures::RandomSelect({"ip", "l2", "cosine"}, 1); + resource->base_count = HNSWTestIndex::base_count; + } else { + resource->dims = fixtures::get_common_used_dims(); + resource->test_cases = HNSWTestIndex::all_test_cases; + resource->metric_types = {"ip", "l2", "cosine"}; + resource->base_count = HNSWTestIndex::base_count * 10; + } + return resource; +} std::string HNSWTestIndex::GenerateHNSWBuildParametersString(const std::string& metric_type, @@ -682,3 +717,57 @@ TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex, auto result_immutable = index->SetImmutable(); REQUIRE_FALSE(result_immutable.has_value()); } + +static void +TestHNSWSearchUnrelatedParameter(const fixtures::HNSWTestIndexPtr& test_index, + const fixtures::HNSWResourcePtr& resource) { + using namespace fixtures; + auto origin_size = vsag::Options::Instance().block_size_limit(); + auto size = GENERATE(1024 * 1024 * 2); + constexpr const char* search_param = R"({ + "hnsw": { + "ef_search": 200, + "-------unrelated parameters below-------": true, + "use_reorder": true, + "scan_buckets_count": 10 + }, + "diskann": { + "parameters used in other index": "hnsw" + } + })"; + + for (auto metric_type : resource->metric_types) { + for (auto dim : resource->dims) { + for (auto& [base_quantization_str, recall] : resource->test_cases) { + INFO(fmt::format("metric_type: {}, dim: {}, base_quantization_str: {}, recall: {}", + metric_type, + dim, + base_quantization_str, + recall)); + vsag::Options::Instance().set_block_size_limit(size); + auto param = HNSWTestIndex::GenerateHNSWBuildParametersString(metric_type, dim); + auto index = TestIndex::TestFactory(test_index->name, param, true); + auto dataset = + HNSWTestIndex::pool.GetDatasetAndCreate(dim, resource->base_count, metric_type); + TestIndex::TestBuildIndex(index, dataset, true); + TestIndex::TestSearchUnrelatedParameter(index, dataset, search_param); + } + } + } +} + +TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex, + "(PR) HNSW SearchUnrelatedParameter", + "[ft][hnsw][pr]") { + auto test_index = std::make_shared(); + auto resource = test_index->GetResource(true); + TestHNSWSearchUnrelatedParameter(test_index, resource); +} + +TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex, + "(Daily) HNSW SearchUnrelatedParameter", + "[ft][hnsw][daily]") { + auto test_index = std::make_shared(); + auto resource = test_index->GetResource(false); + TestHNSWSearchUnrelatedParameter(test_index, resource); +} diff --git a/tests/test_index.cpp b/tests/test_index.cpp index 6ec8de409..4ae48d3ca 100644 --- a/tests/test_index.cpp +++ b/tests/test_index.cpp @@ -2226,4 +2226,24 @@ TestIndex::TestConcurrentAddSearchRemove(const TestIndex::IndexPtr& index, } } +void +TestIndex::TestSearchUnrelatedParameter(const IndexPtr& index, + const TestDatasetPtr& dataset, + const std::string& search_param) { + auto queries = dataset->query_; + auto query_count = queries->GetNumElements(); + auto dim = queries->GetDim(); + for (auto i = 0; i < query_count; ++i) { + auto query = vsag::Dataset::Make(); + query->NumElements(1) + ->Dim(dim) + ->Float32Vectors(queries->GetFloat32Vectors() + i * dim) + ->SparseVectors(queries->GetSparseVectors() + i) + ->Paths(queries->GetPaths() + i) + ->Owner(false); + auto res = index->KnnSearch(query, 10, search_param); + REQUIRE(res.has_value()); + } +} + } // namespace fixtures diff --git a/tests/test_index.h b/tests/test_index.h index f4500eeeb..94b7c7653 100644 --- a/tests/test_index.h +++ b/tests/test_index.h @@ -289,7 +289,12 @@ class TestIndex { const TestDatasetPtr& dataset, const std::string& search_param); - constexpr static float RECALL_THRESHOLD = 0.95; + static void + TestSearchUnrelatedParameter(const IndexPtr& index, + const TestDatasetPtr& dataset, + const std::string& search_param); + + constexpr static float RECALL_THRESHOLD = 0.85F; }; } // namespace fixtures diff --git a/tests/test_sindi.cpp b/tests/test_sindi.cpp index 9732f783c..07b588bac 100644 --- a/tests/test_sindi.cpp +++ b/tests/test_sindi.cpp @@ -213,3 +213,27 @@ TEST_CASE_PERSISTENT_FIXTURE(fixtures::SINDITestIndex, "SINDI Serialize File", " } vsag::Options::Instance().set_block_size_limit(origin_size); } + +TEST_CASE_PERSISTENT_FIXTURE(fixtures::SINDITestIndex, + "SINDI Search Unreleated Param", + "[ft][sindi]") { + fixtures::SINDIParam param; + param.use_reorder = GENERATE(true, false); + auto build_param = fixtures::SINDITestIndex::GenerateBuildParameter(param); + auto index = TestFactory("sindi", build_param, true); + auto dataset = pool.GetSparseDatasetAndCreate(base_count, 128, 0.8); + constexpr const char* search_param = R"( + { + "sindi": + { + "n_candidate": 20, + "query_prune_ratio": 0.0, + "term_prune_ratio": 0.0, + "-------unrelated parameters below-------": true, + "io_limit": 200, + "beam_search": 4, + "scan_buckets_count": 10 + } + })"; + TestSearchUnrelatedParameter(index, dataset, search_param); +}