Skip to content

Commit

Permalink
Add a mean aggregation function
Browse files Browse the repository at this point in the history
I needed this for an ingress/egress chart and was surprised we didn't
have that yet. This was easy enough to implement, so I just went ahead
and did it.
  • Loading branch information
dominiklohmann committed May 11, 2024
1 parent a34ec47 commit c5996ca
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 0 deletions.
105 changes: 105 additions & 0 deletions libtenzir/builtins/aggregation-functions/mean.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// _ _____ __________
// | | / / _ | / __/_ __/ Visibility
// | |/ / __ |_\ \ / / Across
// |___/_/ |_/___/ /_/ Space and Time
//
// SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
// SPDX-License-Identifier: BSD-3-Clause

#include <tenzir/aggregation_function.hpp>
#include <tenzir/plugin.hpp>

namespace tenzir::plugins::mean {

namespace {

template <basic_type Type>
class mean_function final : public aggregation_function {
public:
explicit mean_function(type input_type) noexcept
: aggregation_function(std::move(input_type)) {
// nop
}

private:
auto output_type() const -> type override {
return type{double_type{}};
}

auto add(const data_view& view) -> void override {
using view_type = tenzir::view<type_to_data_t<Type>>;
if (caf::holds_alternative<caf::none_t>(view)) {
return;
}
count_ += 1;
mean_ += (static_cast<double>(caf::get<view_type>(view)) - mean_) / count_;
}

void add(const arrow::Array& array) override {
const auto& typed_array = caf::get<type_to_arrow_array_t<Type>>(array);
for (auto&& value : values(Type{}, typed_array)) {
if (not value) {
continue;
}
count_ += 1;
mean_ += (static_cast<double>(*value) - mean_) / count_;
}
}

auto finish() && -> caf::expected<data> override {
if (count_ == 0) {
return data{};
}
return data{mean_};
}

double mean_ = {};
size_t count_ = {};
};

class plugin : public virtual aggregation_function_plugin {
caf::error initialize([[maybe_unused]] const record& plugin_config,
[[maybe_unused]] const record& global_config) override {
return {};
}

[[nodiscard]] std::string name() const override {
return "mean";
};

[[nodiscard]] caf::expected<std::unique_ptr<aggregation_function>>
make_aggregation_function(const type& input_type) const override {
auto f = detail::overload{
[&](const uint64_type&)
-> caf::expected<std::unique_ptr<aggregation_function>> {
return std::make_unique<mean_function<uint64_type>>(input_type);
},
[&](const int64_type&)
-> caf::expected<std::unique_ptr<aggregation_function>> {
return std::make_unique<mean_function<int64_type>>(input_type);
},
[&](const double_type&)
-> caf::expected<std::unique_ptr<aggregation_function>> {
return std::make_unique<mean_function<double_type>>(input_type);
},
[](const concrete_type auto& type)
-> caf::expected<std::unique_ptr<aggregation_function>> {
return caf::make_error(ec::invalid_configuration,
fmt::format("mean aggregation function does not "
"support type {}",
type));
},
};
return caf::visit(f, input_type);
}

auto aggregation_default() const -> data override {
return caf::none;
}
};

} // namespace

} // namespace tenzir::plugins::mean

TENZIR_REGISTER_PLUGIN(tenzir::plugins::mean::plugin)
1 change: 1 addition & 0 deletions web/docs/operators/summarize.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ The following aggregation functions are available:
- `sum`: Computes the sum of all grouped values.
- `min`: Computes the minimum of all grouped values.
- `max`: Computes the maximum of all grouped values.
- `mean`: Computes the mean of all grouped values.
- `any`: Computes the disjunction (OR) of all grouped values. Requires the
values to be booleans.
- `all`: Computes the conjunction (AND) of all grouped values. Requires the
Expand Down

0 comments on commit c5996ca

Please sign in to comment.