Skip to content

Commit e7a6ca4

Browse files
authored
Fixed potential data race between Histogram count and buckets (#71)
* Fixed potential data race between Histogram count and buckets * Add missing header * Clarified about 'sum' in README
1 parent 5e352b2 commit e7a6ca4

File tree

8 files changed

+54
-28
lines changed

8 files changed

+54
-28
lines changed

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# metrics-cpp
1+
# metrics-cpp
22

33
[![Build](https://github.com/DarkWanderer/metrics-cpp/actions/workflows/build.yml/badge.svg)](https://github.com/DarkWanderer/metrics-cpp/actions/workflows/build.yml)
44
![Readiness](https://img.shields.io/badge/readiness-beta-yellow)
@@ -28,9 +28,11 @@ The design goals of this library are the following:
2828

2929
## Limitations & compromises
3030

31-
Due to limited number of locks employed, there is no strong consistency guarantee between different metrics. If a particular thread changes two counters and serialization happens in the middle, you may see a value for one counter increasing but not for the other - until the next time metrics are collected. Hence, care must be taken when creating alerts based on metrics differential. Another compromise stemming from minimized locking requirements is inability to _remove_ metrics from a `Registry` - however, that is something which is not supported by Prometheus anyway
32-
33-
Boost::accumulators do not correctly work under MacOS, which prevents Summary class from working - more throrough investigation pending
31+
* Due to limited number of locks employed, there is no strong consistency guarantee between different metrics
32+
* If a particular thread changes two counters and serialization happens in the middle, you may see a value for one counter increasing but not for the other - until the next time metrics are collected. Hence, care must be taken when creating alerts based on metrics differential
33+
* For same reason, histogram 'sum' may be out of sync with total count - skewing the average value with ⅟n asymptotic upper bound
34+
* It's not possible to _remove_ metrics from a `Registry` - conceptually shared with Prometheus
35+
* Boost::accumulators do not correctly work under MacOS, which prevents Summary class from working there - more throrough investigation pending
3436

3537
## Readiness
3638

src/common/json.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ namespace Metrics {
5555
serialized["type"] = "histogram";
5656

5757
auto s = std::static_pointer_cast<IHistogram>(metric);
58-
serialized["count"] = s->count();
5958
serialized["sum"] = s->sum();
6059

6160
json::array buckets;
@@ -65,6 +64,9 @@ namespace Metrics {
6564
v["bound"] = kv.first;
6665
v["count"] = kv.second;
6766
buckets.emplace_back(v);
67+
68+
if (kv.first == numeric_limits<double>::infinity())
69+
serialized["count"] = kv.second;
6870
}
6971
serialized["buckets"] = buckets;
7072
}

src/common/metrics.cpp

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
#include <metrics/metric.h>
22

33
#include <algorithm>
4-
#include <mutex>
54
#include <atomic>
6-
#include <vector>
75
#include <list>
86
#include <map>
7+
#include <mutex>
8+
#include <numeric>
9+
#include <vector>
10+
#include <iterator>
911

1012
using namespace std;
1113

@@ -102,32 +104,37 @@ namespace Metrics
102104

103105
class HistogramImpl : public IHistogram {
104106
private:
105-
vector<double> m_bounds;
107+
const vector<double> m_bounds;
106108
vector<CounterImpl> m_counts;
107-
CounterImpl m_count;
108109
GaugeImpl m_sum;
109110

111+
static vector<double> preprocessBounds(const vector<double>& input) {
112+
vector<double> bounds;
113+
114+
// Reserve extra for +Inf
115+
bounds.reserve(bounds.size() + 1);
116+
copy(input.begin(), input.end(), back_inserter(bounds));
117+
118+
// Add mandatory +Inf bound
119+
bounds.push_back(numeric_limits<double>::infinity());
120+
sort(bounds.begin(), bounds.end());
121+
auto last = unique(bounds.begin(), bounds.end());
122+
bounds.erase(last, bounds.end());
123+
return bounds;
124+
}
110125
public:
111-
HistogramImpl(const vector<double>& bounds)
126+
HistogramImpl(const vector<double>& bounds) :
127+
m_bounds(preprocessBounds(bounds)), m_counts(m_bounds.size()), m_sum()
112128
{
113-
m_bounds = bounds;
114-
sort(m_bounds.begin(), m_bounds.end());
115-
auto last = unique(m_bounds.begin(), m_bounds.end());
116-
m_bounds.erase(last, m_bounds.end());
117-
m_counts = vector<CounterImpl>(m_bounds.size());
118129
}
119130

120131
HistogramImpl(const HistogramImpl&) = delete;
121132

122133
IHistogram& observe(double value) override {
123-
m_count++;
124134
m_sum += value;
125-
auto bound = lower_bound(m_bounds.begin(), m_bounds.end(), value);
126-
if (bound != m_bounds.end())
127-
{
128-
auto index = distance(m_bounds.begin(), bound);
129-
m_counts[index]++;
130-
}
135+
auto bound = lower_bound(m_bounds.begin(), m_bounds.end(), value); // Guaranteed to find because of the infinity bound
136+
auto index = distance(m_bounds.begin(), bound);
137+
m_counts[index]++;
131138
return *this;
132139
}
133140

@@ -145,7 +152,14 @@ namespace Metrics
145152
return result;
146153
};
147154

148-
uint64_t count() const override { return m_count; };
155+
uint64_t count() const override {
156+
uint64_t result = 0;
157+
for (auto& c : m_counts) {
158+
result += c.value();
159+
}
160+
return result;
161+
};
162+
149163
double sum() const override { return m_sum; };
150164
};
151165

src/metrics/histogram.cpp

Whitespace-only changes.

src/metrics/summary.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ namespace Metrics {
6666
};
6767

6868
std::shared_ptr<ISummary> makeSummary(const vector<double>& quantiles, double error) {
69+
// Explicit copy
6970
auto q = quantiles;
7071
sort(q.begin(), q.end());
7172
return std::make_shared<SummaryImpl>(q, error);

src/prometheus/prometheus_serialize.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <metrics/sink.h>
33

44
#include <iostream>
5+
#include <limits>
56
#include <sstream>
67

78
using namespace std;
@@ -55,15 +56,19 @@ namespace Metrics {
5556

5657
void serialize(ostream& os, const string& name, const Labels& labels, const IHistogram& histogram)
5758
{
59+
uint64_t count = 0;
5860
for (auto& value : histogram.values()) {
5961
os << name << '{';
6062
for (auto kv = labels.cbegin(); kv != labels.cend(); kv++) {
6163
os << kv->first << "=\"" << kv->second << '"' << ',';
6264
}
6365
os << "le=\"" << value.first << "\"} " << value.second << endl;
66+
67+
if (value.first == numeric_limits<double>::infinity())
68+
count = value.second;
6469
}
6570
os << name << "_sum" << labels << ' ' << histogram.sum() << endl;
66-
os << name << "_count" << labels << ' ' << histogram.count() << endl;
71+
os << name << "_count" << labels << ' ' << count << endl;
6772
}
6873

6974
void serialize(ostream& os, const string& name, const Labels& labels, const std::shared_ptr<IMetric> metric)

test/serialization.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,14 @@ gauge2{another="label"} 200
4040
histogram1{le="1"} 1
4141
histogram1{le="2"} 2
4242
histogram1{le="5"} 2
43+
histogram1{le="inf"} 2
4344
histogram1_sum 3
4445
histogram1_count 2
4546
# TYPE histogram2 histogram
4647
histogram2{more="labels",le="1"} 0
4748
histogram2{more="labels",le="2"} 0
4849
histogram2{more="labels",le="5"} 2
50+
histogram2{more="labels",le="inf"} 2
4951
histogram2_sum{more="labels"} 7
5052
histogram2_count{more="labels"} 2
5153
# TYPE summary1 summary
@@ -70,7 +72,7 @@ TEST_CASE("Serialize.Json", "[json]")
7072
auto registry = createReferenceRegistry();
7173
auto result = Metrics::Json::serializeJson(registry);
7274

73-
CHECK_THAT(result, Equals(R"([{"name":"counter1","type":"counter","value":1},{"name":"counter2","labels":{"label":"value1"},"type":"counter","value":1},{"name":"counter2","labels":{"label":"value2"},"type":"counter","value":2},{"name":"gauge1","type":"gauge","value":1E2},{"name":"gauge2","labels":{"another":"label"},"type":"gauge","value":2E2},{"name":"histogram1","type":"histogram","count":2,"sum":3E0,"buckets":[{"bound":1E0,"count":1},{"bound":2E0,"count":2},{"bound":5E0,"count":2}]},{"name":"histogram2","labels":{"more":"labels"},"type":"histogram","count":2,"sum":7E0,"buckets":[{"bound":1E0,"count":0},{"bound":2E0,"count":0},{"bound":5E0,"count":2}]},{"name":"summary1","type":"summary","count":3,"sum":6E0,"quantiles":[{"quantile":5E-1,"count":2},{"quantile":9E-1,"count":3},{"quantile":9.9E-1,"count":3},{"quantile":9.99E-1,"count":3}]},{"name":"summary2","labels":{"summary":"label"},"type":"summary","count":3,"sum":1.1E1,"quantiles":[{"quantile":5E-1,"count":3},{"quantile":9E-1,"count":5},{"quantile":9.9E-1,"count":5},{"quantile":9.99E-1,"count":5}]}])"));
75+
CHECK_THAT(result, Equals(R"([{"name":"counter1","type":"counter","value":1},{"name":"counter2","labels":{"label":"value1"},"type":"counter","value":1},{"name":"counter2","labels":{"label":"value2"},"type":"counter","value":2},{"name":"gauge1","type":"gauge","value":1E2},{"name":"gauge2","labels":{"another":"label"},"type":"gauge","value":2E2},{"name":"histogram1","type":"histogram","sum":3E0,"count":2,"buckets":[{"bound":1E0,"count":1},{"bound":2E0,"count":2},{"bound":5E0,"count":2},{"bound":1e99999,"count":2}]},{"name":"histogram2","labels":{"more":"labels"},"type":"histogram","sum":7E0,"count":2,"buckets":[{"bound":1E0,"count":0},{"bound":2E0,"count":0},{"bound":5E0,"count":2},{"bound":1e99999,"count":2}]},{"name":"summary1","type":"summary","count":3,"sum":6E0,"quantiles":[{"quantile":5E-1,"count":2},{"quantile":9E-1,"count":3},{"quantile":9.9E-1,"count":3},{"quantile":9.99E-1,"count":3}]},{"name":"summary2","labels":{"summary":"label"},"type":"summary","count":3,"sum":1.1E1,"quantiles":[{"quantile":5E-1,"count":3},{"quantile":9E-1,"count":5},{"quantile":9.9E-1,"count":5},{"quantile":9.99E-1,"count":5}]}])"));
7476
}
7577

7678
TEST_CASE("Serialize.Jsonl", "[jsonl]")
@@ -83,8 +85,8 @@ TEST_CASE("Serialize.Jsonl", "[jsonl]")
8385
{"name":"counter2","labels":{"label":"value2"},"type":"counter","value":2}
8486
{"name":"gauge1","type":"gauge","value":1E2}
8587
{"name":"gauge2","labels":{"another":"label"},"type":"gauge","value":2E2}
86-
{"name":"histogram1","type":"histogram","count":2,"sum":3E0,"buckets":[{"bound":1E0,"count":1},{"bound":2E0,"count":2},{"bound":5E0,"count":2}]}
87-
{"name":"histogram2","labels":{"more":"labels"},"type":"histogram","count":2,"sum":7E0,"buckets":[{"bound":1E0,"count":0},{"bound":2E0,"count":0},{"bound":5E0,"count":2}]}
88+
{"name":"histogram1","type":"histogram","sum":3E0,"count":2,"buckets":[{"bound":1E0,"count":1},{"bound":2E0,"count":2},{"bound":5E0,"count":2},{"bound":1e99999,"count":2}]}
89+
{"name":"histogram2","labels":{"more":"labels"},"type":"histogram","sum":7E0,"count":2,"buckets":[{"bound":1E0,"count":0},{"bound":2E0,"count":0},{"bound":5E0,"count":2},{"bound":1e99999,"count":2}]}
8890
{"name":"summary1","type":"summary","count":3,"sum":6E0,"quantiles":[{"quantile":5E-1,"count":2},{"quantile":9E-1,"count":3},{"quantile":9.9E-1,"count":3},{"quantile":9.99E-1,"count":3}]}
8991
{"name":"summary2","labels":{"summary":"label"},"type":"summary","count":3,"sum":1.1E1,"quantiles":[{"quantile":5E-1,"count":3},{"quantile":9E-1,"count":5},{"quantile":9.9E-1,"count":5},{"quantile":9.99E-1,"count":5}]}
9092
)"));

test/tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ TEST_CASE("Metric.Histogram", "[metric][histogram]")
9595

9696
CHECK(histogram.sum() == 13);
9797
CHECK(histogram.count() == 4);
98-
CHECK(values.size() == 3);
98+
CHECK(values.size() == 4);
9999
CHECK(values[0].first == 1.);
100100
CHECK(values[1].first == 2.);
101101
CHECK(values[2].first == 5.);

0 commit comments

Comments
 (0)