Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix metric FD verifier issues #167

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions src/algorithms/metric/metric_verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ unsigned long long MetricVerifier::ExecuteInternal() {
points_calculator_ = std::make_unique<PointsCalculator>(dist_from_null_is_infinity_,
typed_relation_, rhs_indices_);
highlight_calculator_ = std::make_unique<HighlightCalculator>(typed_relation_, rhs_indices_);
assert(points_calculator_.get() != nullptr || highlight_calculator_.get() != nullptr);

VerifyMetricFD();

Expand Down Expand Up @@ -234,7 +233,8 @@ void MetricVerifier::VisualizeHighlights() const {
std::string value = GetStringValue(rhs_indices_, highlight.data_index);
std::string begin_desc;
if (!is_empty) {
begin_desc = std::string("[") + (highlight.max_distance <= parameter_ ? "✓" : "X") +
begin_desc = std::string("[") +
(IsGreaterThanParameter(highlight.max_distance) ? "X" : "✓") +
"] | max dist: " + std::to_string(highlight.max_distance) + "\t| ";
}
std::string end_desc;
Expand Down Expand Up @@ -427,7 +427,7 @@ bool MetricVerifier::CompareNumericValues(
} else if (type.Compare(points[i].point, min_value) == model::CompareResult::kLess) {
min_value = points[i].point;
}
if (type.Dist(max_value, min_value) > parameter_) {
if (IsGreaterThanParameter(type.Dist(max_value, min_value))) {
return false;
}
}
Expand All @@ -442,7 +442,7 @@ bool MetricVerifier::ApproxVerifyCluster(std::vector<T> const& points,
}
return std::all_of(std::next(points.cbegin()), points.cend(),
[this, &points, &dist_func](auto const& p) {
return dist_func(points[0], p) * 2 <= parameter_;
return IsLessOrEqualThanParameter(dist_func(points[0], p) * 2);
});
}

Expand All @@ -451,7 +451,7 @@ bool MetricVerifier::BruteVerifyCluster(std::vector<IndexedPoint<T>> const& poin
DistanceFunction<T> const& dist_func) const {
for (size_t i = 0; i + 1 < points.size(); ++i) {
for (size_t j = i + 1; j < points.size(); ++j) {
if (dist_func(points[i].point, points[j].point) > parameter_) {
if (IsGreaterThanParameter(dist_func(points[i].point, points[j].point))) {
return false;
}
}
Expand All @@ -462,8 +462,18 @@ bool MetricVerifier::BruteVerifyCluster(std::vector<IndexedPoint<T>> const& poin
bool MetricVerifier::CalipersCompareNumericValues(std::vector<util::Point>& points) const {
auto pairs = util::GetAntipodalPairs(util::CalculateConvexHull(points));
return std::all_of(pairs.cbegin(), pairs.cend(), [this](auto const& pair) {
return util::Point::EuclideanDistance(pair.first, pair.second) <= parameter_;
return IsLessOrEqualThanParameter(util::Point::EuclideanDistance(pair.first, pair.second));
});
}

static constexpr long double epsilon = 1e-15;

bool MetricVerifier::IsGreaterThanParameter(long double value) const {
return value > parameter_ + epsilon;
}
Comment on lines +471 to +473
Copy link
Collaborator

@polyntsov polyntsov Feb 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Вот тут сложно. Есть конкретный пример/тест, для которого сравнение нужно делать так? То есть какую конкретно проблему мы решаем

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

В cosine метрике расстояния между всеми значениями 0, но с параметром 0 зависимость не выполняется.
Я тест ещё добавил для этого случая.

Copy link
Collaborator

@polyntsov polyntsov Feb 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

но с параметром 0 зависимость не выполняется

Если параметр равен 0, то по определению метрической зависимости она не выполняется?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Если расстояния между всеми значеиниями 0, то в данном случая при любом параметре должна зависимость выполняться. Но с параметром 0 она не выполнялась. 0 <= 0 давал false

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

То есть в сущности проблема в обычном сравнении doubleов. Не уверен, что фиксированный эпсилон это лучший способ тут, хм

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

мб посмотреть, как с этим сражается gtest? есть EXPECT_DOUBLE_EQ, например, посмотреть, что там под капотом

https://groups.google.com/g/googletestframework/c/7ZoZWlfxpg4/m/4dHJaUbtWqAJ


bool MetricVerifier::IsLessOrEqualThanParameter(long double value) const {
return !IsGreaterThanParameter(value);
}

} // namespace algos::metric
11 changes: 11 additions & 0 deletions src/algorithms/metric/metric_verifier.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <algorithm>
#include <cassert>
#include <cstddef>
#include <filesystem>
#include <functional>
Expand Down Expand Up @@ -54,6 +55,9 @@ class MetricVerifier : public algos::Primitive {
model::StringType const& type,
std::unordered_map<std::string, util::QGramVector>& q_gram_map) const;

bool IsGreaterThanParameter(long double value) const;
bool IsLessOrEqualThanParameter(long double value) const;

bool CheckMFDFailIfHasNulls(bool has_nulls) const {
return dist_from_null_is_infinity_ && has_nulls;
}
Expand Down Expand Up @@ -101,6 +105,7 @@ class MetricVerifier : public algos::Primitive {
}

std::vector<std::vector<Highlight>> const& GetHighlights() const {
assert(highlight_calculator_);
return highlight_calculator_->GetHighlights();
}

Expand All @@ -109,21 +114,27 @@ class MetricVerifier : public algos::Primitive {
}

void SortHighlightsByDistanceAscending() {
assert(highlight_calculator_);
highlight_calculator_->SortHighlightsByDistanceAscending();
}
void SortHighlightsByDistanceDescending() {
assert(highlight_calculator_);
highlight_calculator_->SortHighlightsByDistanceDescending();
}
void SortHighlightsByFurthestIndexAscending() {
assert(highlight_calculator_);
highlight_calculator_->SortHighlightsByFurthestIndexAscending();
}
void SortHighlightsByFurthestIndexDescending() {
assert(highlight_calculator_);
highlight_calculator_->SortHighlightsByFurthestIndexDescending();
}
void SortHighlightsByIndexAscending() {
assert(highlight_calculator_);
highlight_calculator_->SortHighlightsByIndexAscending();
}
void SortHighlightsByIndexDescending() {
assert(highlight_calculator_);
highlight_calculator_->SortHighlightsByIndexDescending();
}

Expand Down
3 changes: 2 additions & 1 deletion tests/test_metric_verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ INSTANTIATE_TEST_SUITE_P(
MetricVerifyingParams("euclidean", 4.5, {0}, {12, 11}, "TestMetric.csv",
"calipers"),
MetricVerifyingParams("euclidean", 6.0091679956547, {0}, {13, 14, 15},
"TestMetric.csv")));
"TestMetric.csv"),
MetricVerifyingParams("cosine", 0, {0}, {0}, "WDC_astronomical.csv")));

constexpr long double inf = std::numeric_limits<long double>::infinity();

Expand Down