Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/asan_build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
save: ${{ github.event_name != 'pull_request' }}
key: build-${{ hashFiles('./CMakeLists.txt') }}-${{ hashFiles('./.circleci/fresh_ci_cache.commit') }}
- name: Make Asan
run: export CMAKE_GENERATOR="Ninja"; make asan
run: export CMAKE_GENERATOR="Ninja"; export VSAG_ENABLE_TOOLS=OFF; make asan
- name: Clean
run: |
find ./build -type f -name "*.o" -exec rm -f {} +
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ jobs:
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
container:
image: vsaglib/vsag:ci-x86
volumes:
- /opt:/useless
steps:
- name: Free Disk Space (Ubuntu)
run: rm -rf /useless/*
- uses: actions/checkout@v4
with:
fetch-depth: '0'
Expand Down
113 changes: 107 additions & 6 deletions tests/test_diskann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,28 @@
#include "vsag/vsag.h"

const std::string tmp_dir = "/tmp/";

namespace fixtures {

class DiskANNTestResource {
public:
std::vector<int> dims;
std::vector<std::pair<std::string, float>> test_cases;
std::vector<std::string> metric_types;
uint64_t base_count;
};

using DiskANNResourcePtr = std::shared_ptr<DiskANNTestResource>;
class DiskANNTestIndex : public fixtures::TestIndex {
public:
static TestDatasetPool pool;
static std::string
GenerateDiskANNBuildParametersString(const std::string& metric_type,
int64_t dim,
bool use_bsa = false);

static DiskANNResourcePtr
GetResource(bool sample = true);

static constexpr auto search_param_template = R"(
{{
"diskann": {{
Expand All @@ -47,9 +61,38 @@ class DiskANNTestIndex : public fixtures::TestIndex {
}}
)";

constexpr static uint64_t base_count = 1000;
static TestDatasetPool pool;
static std::vector<int> dims;
static uint64_t base_count;
static const std::string name;
static const std::vector<std::pair<std::string, float>> all_test_cases;
};
using DiskANNTestIndexPtr = std::shared_ptr<DiskANNTestIndex>;

TestDatasetPool DiskANNTestIndex::pool{};
std::vector<int> DiskANNTestIndex::dims = fixtures::get_common_used_dims(2, RandomValue(0, 999));
uint64_t DiskANNTestIndex::base_count = 1200;
const std::string DiskANNTestIndex::name = "diskann";
const std::vector<std::pair<std::string, float>> DiskANNTestIndex::all_test_cases = {
{"fp32", 0.99},
};

DiskANNResourcePtr
DiskANNTestIndex::GetResource(bool sample) {
auto resource = std::make_shared<DiskANNTestResource>();
if (sample) {
resource->dims = fixtures::get_common_used_dims(1, RandomValue(0, 999));
resource->test_cases = fixtures::RandomSelect(DiskANNTestIndex::all_test_cases, 3);
resource->metric_types = fixtures::RandomSelect<std::string>({"ip", "l2", "cosine"}, 1);
resource->base_count = DiskANNTestIndex::base_count;
} else {
resource->dims = fixtures::get_common_used_dims();
resource->test_cases = DiskANNTestIndex::all_test_cases;
resource->metric_types = {"ip", "l2", "cosine"};
resource->base_count = DiskANNTestIndex::base_count * 10;
}
return resource;
}

std::string
DiskANNTestIndex::GenerateDiskANNBuildParametersString(const std::string& metric_type,
Expand Down Expand Up @@ -89,6 +132,7 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann build test", "[ft][index][

TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index][diskann]") {
const std::vector<int> dims = {736, 1536, 2048, 2560, 3072};
const std::vector<int> max_degrees = {16, 16, 32, 32, 64};
auto metric_type = GENERATE("l2", "ip");
const std::string name = "diskann";
constexpr auto build_parameter_json = R"(
Expand All @@ -97,7 +141,7 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index]
"metric_type": "{}",
"dim": {},
"diskann": {{
"max_degree": 16,
"max_degree": {},
"ef_construction": 200,
"pq_dims": {},
"pq_sample_rate": 0.5,
Expand All @@ -115,8 +159,12 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index]
}}
}}
)";
for (auto dim : dims) {
auto build_parameters_str = fmt::format(build_parameter_json, metric_type, dim, dim / 4);

for (uint64_t i = 0; i < dims.size(); ++i) {
auto dim = dims[i];
auto max_degree = max_degrees[i];
auto build_parameters_str =
fmt::format(build_parameter_json, metric_type, dim, max_degree, dim / 4);
auto search_param = fmt::format(search_param_template, dim / 4);
auto param = GenerateDiskANNBuildParametersString(metric_type, dim);
auto index = TestFactory(name, param, true);
Expand All @@ -125,7 +173,7 @@ TEST_CASE_METHOD(fixtures::DiskANNTestIndex, "diskann pq_dim test", "[ft][index]
TestKnnSearch(index, dataset, search_param, 0.90, true);
TestRangeSearch(index, dataset, search_param, 0.90, 10, true);
TestRangeSearch(index, dataset, search_param, 0.45, 5, true);
TestFilterSearch(index, dataset, search_param, 0.90, true);
TestFilterSearch(index, dataset, search_param, 0.80, true);
REQUIRE(index->GetIndexType() == vsag::IndexType::DISKANN);
}
}
Expand Down Expand Up @@ -745,3 +793,56 @@ TEST_CASE("split building process", "[ft][diskann]") {
std::cout << "Recall: " << recall_full << std::endl;
REQUIRE(recall_full == recall_partial);
}

static void
TestDiskANNSearchUnrelatedParameter(const fixtures::DiskANNTestIndexPtr& test_index,
const fixtures::DiskANNResourcePtr& resource) {
using namespace fixtures;
auto origin_size = vsag::Options::Instance().block_size_limit();
auto size = GENERATE(1024 * 1024 * 2);
constexpr const char* search_param = R"({
"diskann": {
"ef_search": 200,
"io_limit": 200,
"beam_search": 4,
"-------unrelated parameters below-------": true,
"scan_buckets_count": 10
}
})";

for (auto metric_type : resource->metric_types) {
for (auto dim : resource->dims) {
for (auto& [base_quantization_str, recall] : resource->test_cases) {
INFO(fmt::format("metric_type: {}, dim: {}, base_quantization_str: {}, recall: {}",
metric_type,
dim,
base_quantization_str,
recall));
vsag::Options::Instance().set_block_size_limit(size);
auto param =
DiskANNTestIndex::GenerateDiskANNBuildParametersString(metric_type, dim);
auto index = TestIndex::TestFactory(test_index->name, param, true);
auto dataset = DiskANNTestIndex::pool.GetDatasetAndCreate(
dim, resource->base_count, metric_type);
TestIndex::TestBuildIndex(index, dataset, true);
TestIndex::TestSearchUnrelatedParameter(index, dataset, search_param);
}
Comment on lines +815 to +829

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The loop over resource->test_cases is redundant because the variables base_quantization_str and recall are only used for logging. The GenerateDiskANNBuildParametersString function doesn't use base_quantization_str, so the test logic inside the loop is identical for each iteration. To avoid redundant test executions, especially if more test cases are added later, consider moving the test logic outside of this loop.

}
}
}

TEST_CASE_PERSISTENT_FIXTURE(fixtures::DiskANNTestIndex,
"(PR) DiskANN SearchUnrelatedParameter",
"[ft][diskann][pr]") {
auto test_index = std::make_shared<DiskANNTestIndex>();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need pr tag

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

auto resource = test_index->GetResource(true);
TestDiskANNSearchUnrelatedParameter(test_index, resource);
}

TEST_CASE_PERSISTENT_FIXTURE(fixtures::DiskANNTestIndex,
"(Daily) DiskANN SearchUnrelatedParameter",
"[ft][diskann][daily]") {
auto test_index = std::make_shared<DiskANNTestIndex>();
auto resource = test_index->GetResource(false);
TestDiskANNSearchUnrelatedParameter(test_index, resource);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need daily tag

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}
54 changes: 54 additions & 0 deletions tests/test_hgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1948,3 +1948,57 @@ TEST_CASE("[Daily]HGraph Disk IO Type Index", "[ft][hgraph][serialization][daily
auto resource = test_index->GetResource(false);
TestHGraphDiskIOType(test_index, resource);
}

static void
TestHGraphSearchUnrelatedParameter(const fixtures::HGraphTestIndexPtr& test_index,
const fixtures::HGraphResourcePtr& resource) {
using namespace fixtures;
auto origin_size = vsag::Options::Instance().block_size_limit();
auto size = GENERATE(1024 * 1024 * 2);
constexpr const char* search_param = R"({
"hgraph": {
"ef_search": 200,
"-------unrelated parameters below-------": true,
"io_limit": 200,
"beam_search": 4,
"scan_buckets_count": 10
}
})";

for (auto metric_type : resource->metric_types) {
for (auto dim : resource->dims) {
for (auto& [base_quantization_str, recall] : resource->test_cases) {
INFO(fmt::format("metric_type: {}, dim: {}, base_quantization_str: {}, recall: {}",
metric_type,
dim,
base_quantization_str,
recall));
vsag::Options::Instance().set_block_size_limit(size);
HGraphTestIndex::HGraphBuildParam build_param(
metric_type, dim, base_quantization_str);
auto param = HGraphTestIndex::GenerateHGraphBuildParametersString(build_param);
auto index = TestIndex::TestFactory(test_index->name, param, true);
auto dataset = HGraphTestIndex::pool.GetDatasetAndCreate(
dim, resource->base_count, metric_type);
TestIndex::TestBuildIndex(index, dataset, true);
TestIndex::TestSearchUnrelatedParameter(index, dataset, search_param);
}
}
}
}

TEST_CASE_PERSISTENT_FIXTURE(fixtures::HGraphTestIndex,
"(PR) HGraph SearchUnrelatedParameter",
"[ft][hgraph][pr]") {
auto test_index = std::make_shared<HGraphTestIndex>();
auto resource = test_index->GetResource(true);
TestHGraphSearchUnrelatedParameter(test_index, resource);
}

TEST_CASE_PERSISTENT_FIXTURE(fixtures::HGraphTestIndex,
"(Daily) HGraph SearchUnrelatedParameter",
"[ft][hgraph][daily]") {
auto test_index = std::make_shared<HGraphTestIndex>();
auto resource = test_index->GetResource(false);
TestHGraphSearchUnrelatedParameter(test_index, resource);
}
103 changes: 96 additions & 7 deletions tests/test_hnsw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,25 @@
#include "vsag/vsag.h"

namespace fixtures {

class HNSWTestResource {
public:
std::vector<int> dims;
std::vector<std::pair<std::string, float>> test_cases;
std::vector<std::string> metric_types;
uint64_t base_count;
};

using HNSWResourcePtr = std::shared_ptr<HNSWTestResource>;
class HNSWTestIndex : public fixtures::TestIndex {
public:
static std::string
GenerateHNSWBuildParametersString(const std::string& metric_type,
int64_t dim,
bool use_static = false);

static TestDatasetPool pool;

static std::vector<int> dims;

static std::vector<float> valid_ratios;

constexpr static uint64_t base_count = 1000;
static HNSWResourcePtr
GetResource(bool sample = true);

constexpr static const char* search_param_tmp = R"(
{{
Expand All @@ -46,11 +51,41 @@ class HNSWTestIndex : public fixtures::TestIndex {
"skip_ratio": 0.3
}}
}})";

static TestDatasetPool pool;
static std::vector<int> dims;
static std::vector<float> valid_ratios;
static uint64_t base_count;
static const std::string name;
static const std::vector<std::pair<std::string, float>> all_test_cases;
};
using HNSWTestIndexPtr = std::shared_ptr<HNSWTestIndex>;

TestDatasetPool HNSWTestIndex::pool{};
std::vector<int> HNSWTestIndex::dims = fixtures::get_common_used_dims(2, RandomValue(0, 999));
std::vector<float> HNSWTestIndex::valid_ratios{0.01, 0.05, 0.99};
uint64_t HNSWTestIndex::base_count = 1200;
const std::string HNSWTestIndex::name = "hnsw";
const std::vector<std::pair<std::string, float>> HNSWTestIndex::all_test_cases = {
{"fp32", 0.99},
};

HNSWResourcePtr
HNSWTestIndex::GetResource(bool sample) {
auto resource = std::make_shared<HNSWTestResource>();
if (sample) {
resource->dims = fixtures::get_common_used_dims(1, RandomValue(0, 999));
resource->test_cases = fixtures::RandomSelect(HNSWTestIndex::all_test_cases, 3);
resource->metric_types = fixtures::RandomSelect<std::string>({"ip", "l2", "cosine"}, 1);
resource->base_count = HNSWTestIndex::base_count;
} else {
resource->dims = fixtures::get_common_used_dims();
resource->test_cases = HNSWTestIndex::all_test_cases;
resource->metric_types = {"ip", "l2", "cosine"};
resource->base_count = HNSWTestIndex::base_count * 10;
}
return resource;
}

std::string
HNSWTestIndex::GenerateHNSWBuildParametersString(const std::string& metric_type,
Expand Down Expand Up @@ -682,3 +717,57 @@ TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex,
auto result_immutable = index->SetImmutable();
REQUIRE_FALSE(result_immutable.has_value());
}

static void
TestHNSWSearchUnrelatedParameter(const fixtures::HNSWTestIndexPtr& test_index,
const fixtures::HNSWResourcePtr& resource) {
using namespace fixtures;
auto origin_size = vsag::Options::Instance().block_size_limit();
auto size = GENERATE(1024 * 1024 * 2);
constexpr const char* search_param = R"({
"hnsw": {
"ef_search": 200,
"-------unrelated parameters below-------": true,
"use_reorder": true,
"scan_buckets_count": 10
},
"diskann": {
"parameters used in other index": "hnsw"
}
})";

for (auto metric_type : resource->metric_types) {
for (auto dim : resource->dims) {
for (auto& [base_quantization_str, recall] : resource->test_cases) {
INFO(fmt::format("metric_type: {}, dim: {}, base_quantization_str: {}, recall: {}",
metric_type,
dim,
base_quantization_str,
recall));
vsag::Options::Instance().set_block_size_limit(size);
auto param = HNSWTestIndex::GenerateHNSWBuildParametersString(metric_type, dim);
auto index = TestIndex::TestFactory(test_index->name, param, true);
auto dataset =
HNSWTestIndex::pool.GetDatasetAndCreate(dim, resource->base_count, metric_type);
TestIndex::TestBuildIndex(index, dataset, true);
TestIndex::TestSearchUnrelatedParameter(index, dataset, search_param);
}
Comment on lines +741 to +754

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The loop over resource->test_cases is redundant. The variables base_quantization_str and recall are only used for logging, and GenerateHNSWBuildParametersString doesn't use base_quantization_str. This means the test logic inside the loop is the same for each iteration. Consider moving the test logic outside of this loop to avoid redundant test runs.

}
}
}

TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex,
"(PR) HNSW SearchUnrelatedParameter",
"[ft][hnsw][pr]") {
auto test_index = std::make_shared<HNSWTestIndex>();
auto resource = test_index->GetResource(true);
TestHNSWSearchUnrelatedParameter(test_index, resource);
}

TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex,
"(Daily) HNSW SearchUnrelatedParameter",
"[ft][hnsw][daily]") {
auto test_index = std::make_shared<HNSWTestIndex>();
auto resource = test_index->GetResource(false);
TestHNSWSearchUnrelatedParameter(test_index, resource);
}
Loading
Loading