Skip to content

Commit

Permalink
Fixed potential data race between Histogram count and buckets (#71)
Browse files Browse the repository at this point in the history
* Fixed potential data race between Histogram count and buckets

* Add missing header

* Clarified about 'sum' in README
  • Loading branch information
DarkWanderer authored Oct 11, 2024
1 parent 5e352b2 commit e7a6ca4
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 28 deletions.
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# metrics-cpp
# metrics-cpp

[![Build](https://github.com/DarkWanderer/metrics-cpp/actions/workflows/build.yml/badge.svg)](https://github.com/DarkWanderer/metrics-cpp/actions/workflows/build.yml)
![Readiness](https://img.shields.io/badge/readiness-beta-yellow)
Expand Down Expand Up @@ -28,9 +28,11 @@ The design goals of this library are the following:

## Limitations & compromises

Due to limited number of locks employed, there is no strong consistency guarantee between different metrics. If a particular thread changes two counters and serialization happens in the middle, you may see a value for one counter increasing but not for the other - until the next time metrics are collected. Hence, care must be taken when creating alerts based on metrics differential. Another compromise stemming from minimized locking requirements is inability to _remove_ metrics from a `Registry` - however, that is something which is not supported by Prometheus anyway

Boost::accumulators do not correctly work under MacOS, which prevents Summary class from working - more throrough investigation pending
* Due to limited number of locks employed, there is no strong consistency guarantee between different metrics
* If a particular thread changes two counters and serialization happens in the middle, you may see a value for one counter increasing but not for the other - until the next time metrics are collected. Hence, care must be taken when creating alerts based on metrics differential
* For same reason, histogram 'sum' may be out of sync with total count - skewing the average value with ⅟n asymptotic upper bound
* It's not possible to _remove_ metrics from a `Registry` - conceptually shared with Prometheus
* Boost::accumulators do not correctly work under MacOS, which prevents Summary class from working there - more throrough investigation pending

## Readiness

Expand Down
4 changes: 3 additions & 1 deletion src/common/json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ namespace Metrics {
serialized["type"] = "histogram";

auto s = std::static_pointer_cast<IHistogram>(metric);
serialized["count"] = s->count();
serialized["sum"] = s->sum();

json::array buckets;
Expand All @@ -65,6 +64,9 @@ namespace Metrics {
v["bound"] = kv.first;
v["count"] = kv.second;
buckets.emplace_back(v);

if (kv.first == numeric_limits<double>::infinity())
serialized["count"] = kv.second;
}
serialized["buckets"] = buckets;
}
Expand Down
50 changes: 32 additions & 18 deletions src/common/metrics.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#include <metrics/metric.h>

#include <algorithm>
#include <mutex>
#include <atomic>
#include <vector>
#include <list>
#include <map>
#include <mutex>
#include <numeric>
#include <vector>
#include <iterator>

using namespace std;

Expand Down Expand Up @@ -102,32 +104,37 @@ namespace Metrics

class HistogramImpl : public IHistogram {
private:
vector<double> m_bounds;
const vector<double> m_bounds;
vector<CounterImpl> m_counts;
CounterImpl m_count;
GaugeImpl m_sum;

static vector<double> preprocessBounds(const vector<double>& input) {
vector<double> bounds;

// Reserve extra for +Inf
bounds.reserve(bounds.size() + 1);
copy(input.begin(), input.end(), back_inserter(bounds));

// Add mandatory +Inf bound
bounds.push_back(numeric_limits<double>::infinity());
sort(bounds.begin(), bounds.end());
auto last = unique(bounds.begin(), bounds.end());
bounds.erase(last, bounds.end());
return bounds;
}
public:
HistogramImpl(const vector<double>& bounds)
HistogramImpl(const vector<double>& bounds) :
m_bounds(preprocessBounds(bounds)), m_counts(m_bounds.size()), m_sum()
{
m_bounds = bounds;
sort(m_bounds.begin(), m_bounds.end());
auto last = unique(m_bounds.begin(), m_bounds.end());
m_bounds.erase(last, m_bounds.end());
m_counts = vector<CounterImpl>(m_bounds.size());
}

HistogramImpl(const HistogramImpl&) = delete;

IHistogram& observe(double value) override {
m_count++;
m_sum += value;
auto bound = lower_bound(m_bounds.begin(), m_bounds.end(), value);
if (bound != m_bounds.end())
{
auto index = distance(m_bounds.begin(), bound);
m_counts[index]++;
}
auto bound = lower_bound(m_bounds.begin(), m_bounds.end(), value); // Guaranteed to find because of the infinity bound
auto index = distance(m_bounds.begin(), bound);
m_counts[index]++;

Check warning on line 137 in src/common/metrics.cpp

View workflow job for this annotation

GitHub Actions / Build (windows, asan, x64) / Build

'argument': conversion from '__int64' to 'const unsigned __int64', signed/unsigned mismatch

Check warning on line 137 in src/common/metrics.cpp

View workflow job for this annotation

GitHub Actions / Build (windows, debug, x64) / Build

'argument': conversion from '__int64' to 'const unsigned __int64', signed/unsigned mismatch
return *this;
}

Expand All @@ -145,7 +152,14 @@ namespace Metrics
return result;
};

uint64_t count() const override { return m_count; };
uint64_t count() const override {
uint64_t result = 0;
for (auto& c : m_counts) {
result += c.value();
}
return result;
};

double sum() const override { return m_sum; };
};

Expand Down
Empty file removed src/metrics/histogram.cpp
Empty file.
1 change: 1 addition & 0 deletions src/metrics/summary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ namespace Metrics {
};

std::shared_ptr<ISummary> makeSummary(const vector<double>& quantiles, double error) {
// Explicit copy
auto q = quantiles;
sort(q.begin(), q.end());
return std::make_shared<SummaryImpl>(q, error);
Expand Down
7 changes: 6 additions & 1 deletion src/prometheus/prometheus_serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <metrics/sink.h>

#include <iostream>
#include <limits>
#include <sstream>

using namespace std;
Expand Down Expand Up @@ -55,15 +56,19 @@ namespace Metrics {

void serialize(ostream& os, const string& name, const Labels& labels, const IHistogram& histogram)
{
uint64_t count = 0;
for (auto& value : histogram.values()) {
os << name << '{';
for (auto kv = labels.cbegin(); kv != labels.cend(); kv++) {
os << kv->first << "=\"" << kv->second << '"' << ',';
}
os << "le=\"" << value.first << "\"} " << value.second << endl;

if (value.first == numeric_limits<double>::infinity())
count = value.second;
}
os << name << "_sum" << labels << ' ' << histogram.sum() << endl;
os << name << "_count" << labels << ' ' << histogram.count() << endl;
os << name << "_count" << labels << ' ' << count << endl;
}

void serialize(ostream& os, const string& name, const Labels& labels, const std::shared_ptr<IMetric> metric)
Expand Down
8 changes: 5 additions & 3 deletions test/serialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,14 @@ gauge2{another="label"} 200
histogram1{le="1"} 1
histogram1{le="2"} 2
histogram1{le="5"} 2
histogram1{le="inf"} 2
histogram1_sum 3
histogram1_count 2
# TYPE histogram2 histogram
histogram2{more="labels",le="1"} 0
histogram2{more="labels",le="2"} 0
histogram2{more="labels",le="5"} 2
histogram2{more="labels",le="inf"} 2
histogram2_sum{more="labels"} 7
histogram2_count{more="labels"} 2
# TYPE summary1 summary
Expand All @@ -70,7 +72,7 @@ TEST_CASE("Serialize.Json", "[json]")
auto registry = createReferenceRegistry();
auto result = Metrics::Json::serializeJson(registry);

CHECK_THAT(result, Equals(R"([{"name":"counter1","type":"counter","value":1},{"name":"counter2","labels":{"label":"value1"},"type":"counter","value":1},{"name":"counter2","labels":{"label":"value2"},"type":"counter","value":2},{"name":"gauge1","type":"gauge","value":1E2},{"name":"gauge2","labels":{"another":"label"},"type":"gauge","value":2E2},{"name":"histogram1","type":"histogram","count":2,"sum":3E0,"buckets":[{"bound":1E0,"count":1},{"bound":2E0,"count":2},{"bound":5E0,"count":2}]},{"name":"histogram2","labels":{"more":"labels"},"type":"histogram","count":2,"sum":7E0,"buckets":[{"bound":1E0,"count":0},{"bound":2E0,"count":0},{"bound":5E0,"count":2}]},{"name":"summary1","type":"summary","count":3,"sum":6E0,"quantiles":[{"quantile":5E-1,"count":2},{"quantile":9E-1,"count":3},{"quantile":9.9E-1,"count":3},{"quantile":9.99E-1,"count":3}]},{"name":"summary2","labels":{"summary":"label"},"type":"summary","count":3,"sum":1.1E1,"quantiles":[{"quantile":5E-1,"count":3},{"quantile":9E-1,"count":5},{"quantile":9.9E-1,"count":5},{"quantile":9.99E-1,"count":5}]}])"));
CHECK_THAT(result, Equals(R"([{"name":"counter1","type":"counter","value":1},{"name":"counter2","labels":{"label":"value1"},"type":"counter","value":1},{"name":"counter2","labels":{"label":"value2"},"type":"counter","value":2},{"name":"gauge1","type":"gauge","value":1E2},{"name":"gauge2","labels":{"another":"label"},"type":"gauge","value":2E2},{"name":"histogram1","type":"histogram","sum":3E0,"count":2,"buckets":[{"bound":1E0,"count":1},{"bound":2E0,"count":2},{"bound":5E0,"count":2},{"bound":1e99999,"count":2}]},{"name":"histogram2","labels":{"more":"labels"},"type":"histogram","sum":7E0,"count":2,"buckets":[{"bound":1E0,"count":0},{"bound":2E0,"count":0},{"bound":5E0,"count":2},{"bound":1e99999,"count":2}]},{"name":"summary1","type":"summary","count":3,"sum":6E0,"quantiles":[{"quantile":5E-1,"count":2},{"quantile":9E-1,"count":3},{"quantile":9.9E-1,"count":3},{"quantile":9.99E-1,"count":3}]},{"name":"summary2","labels":{"summary":"label"},"type":"summary","count":3,"sum":1.1E1,"quantiles":[{"quantile":5E-1,"count":3},{"quantile":9E-1,"count":5},{"quantile":9.9E-1,"count":5},{"quantile":9.99E-1,"count":5}]}])"));
}

TEST_CASE("Serialize.Jsonl", "[jsonl]")
Expand All @@ -83,8 +85,8 @@ TEST_CASE("Serialize.Jsonl", "[jsonl]")
{"name":"counter2","labels":{"label":"value2"},"type":"counter","value":2}
{"name":"gauge1","type":"gauge","value":1E2}
{"name":"gauge2","labels":{"another":"label"},"type":"gauge","value":2E2}
{"name":"histogram1","type":"histogram","count":2,"sum":3E0,"buckets":[{"bound":1E0,"count":1},{"bound":2E0,"count":2},{"bound":5E0,"count":2}]}
{"name":"histogram2","labels":{"more":"labels"},"type":"histogram","count":2,"sum":7E0,"buckets":[{"bound":1E0,"count":0},{"bound":2E0,"count":0},{"bound":5E0,"count":2}]}
{"name":"histogram1","type":"histogram","sum":3E0,"count":2,"buckets":[{"bound":1E0,"count":1},{"bound":2E0,"count":2},{"bound":5E0,"count":2},{"bound":1e99999,"count":2}]}
{"name":"histogram2","labels":{"more":"labels"},"type":"histogram","sum":7E0,"count":2,"buckets":[{"bound":1E0,"count":0},{"bound":2E0,"count":0},{"bound":5E0,"count":2},{"bound":1e99999,"count":2}]}
{"name":"summary1","type":"summary","count":3,"sum":6E0,"quantiles":[{"quantile":5E-1,"count":2},{"quantile":9E-1,"count":3},{"quantile":9.9E-1,"count":3},{"quantile":9.99E-1,"count":3}]}
{"name":"summary2","labels":{"summary":"label"},"type":"summary","count":3,"sum":1.1E1,"quantiles":[{"quantile":5E-1,"count":3},{"quantile":9E-1,"count":5},{"quantile":9.9E-1,"count":5},{"quantile":9.99E-1,"count":5}]}
)"));
Expand Down
2 changes: 1 addition & 1 deletion test/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ TEST_CASE("Metric.Histogram", "[metric][histogram]")

CHECK(histogram.sum() == 13);
CHECK(histogram.count() == 4);
CHECK(values.size() == 3);
CHECK(values.size() == 4);
CHECK(values[0].first == 1.);
CHECK(values[1].first == 2.);
CHECK(values[2].first == 5.);
Expand Down

0 comments on commit e7a6ca4

Please sign in to comment.