Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support IP-in-subnet queries in lookup tables #4051

Merged
merged 21 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog/next/features/4051--subnet-lookups.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The `lookup-table` context now performs longest-prefix matches when the table
key is of type `subnet` and the to-be-enriched field of type `ip`. For example,
a lookup table with key `10.0.0.0/8` will match when enriching the IP address
`10.1.1.1`.
2 changes: 1 addition & 1 deletion contrib/tenzir-plugins
97 changes: 84 additions & 13 deletions libtenzir/builtins/contexts/lookup_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <tenzir/concept/parseable/to.hpp>
#include <tenzir/data.hpp>
#include <tenzir/detail/range_map.hpp>
#include <tenzir/detail/subnet_tree.hpp>
#include <tenzir/expression.hpp>
#include <tenzir/fbs/data.hpp>
#include <tenzir/flatbuffer.hpp>
Expand All @@ -32,8 +33,6 @@
#include <caf/sum_type.hpp>
#include <tsl/robin_map.h>

#include <chrono>
#include <concepts>
#include <memory>
#include <string>

Expand Down Expand Up @@ -102,7 +101,7 @@ class key_data {
data_(from_data(std::move(d))) {
}

friend bool operator==(const key_data& a, const key_data& b) {
friend auto operator==(const key_data& a, const key_data& b) -> bool {
return a.data_ == b.data_;
}

Expand Down Expand Up @@ -217,8 +216,10 @@ class ctx final : public virtual context {
public:
ctx() noexcept = default;

explicit ctx(map_type context_entries) noexcept
: context_entries{std::move(context_entries)} {
explicit ctx(map_type context_entries,
detail::subnet_tree subnet_entries) noexcept
: context_entries{std::move(context_entries)},
subnet_entries{std::move(subnet_entries)} {
// nop
}

Expand All @@ -229,10 +230,29 @@ class ctx final : public virtual context {
auto apply(series array, bool replace) const
-> caf::expected<std::vector<series>> override {
auto builder = series_builder{};
auto subnet_lookup = [&](const auto& value) -> std::optional<view<data>> {
auto match = detail::overload{
[&](const auto&) -> const data* {
return nullptr;
},
[&](view<ip> addr) {
return subnet_entries.match(materialize(addr));
},
[&](view<subnet> sn) {
return subnet_entries.match(materialize(sn));
},
};
if (auto x = caf::visit(match, value)) {
return make_view(*x);
}
return std::nullopt;
};
for (const auto& value : array.values()) {
if (auto it = context_entries.find(materialize(value));
it != context_entries.end()) {
builder.data(it->second);
} else if (auto x = subnet_lookup(value)) {
builder.data(*x);
} else if (replace and not caf::holds_alternative<caf::none_t>(value)) {
builder.data(value);
} else {
Expand Down Expand Up @@ -260,6 +280,9 @@ class ctx final : public virtual context {
"heterogeneous keys");
}
}
for (const auto& [k, _] : subnet_entries.nodes()) {
keys.emplace_back(k);
}
auto result = disjunction{};
result.reserve(fields.size());
for (const auto& field : fields) {
Expand All @@ -276,11 +299,29 @@ class ctx final : public virtual context {

/// Inspects the context.
auto show() const -> record override {
return record{{"num_entries", context_entries.size()}};
// There's no size() function for the PATRICIA trie, so we walk the tree
// nodes here once in O(n).
auto num_subnet_entries = size_t{0};
for (auto _ : subnet_entries.nodes()) {
++num_subnet_entries;
(void)_;
}
return record{
{"num_entries", context_entries.size() + num_subnet_entries},
};
}

auto dump() -> generator<table_slice> override {
auto entry_builder = series_builder{};
for (const auto& [key, value] : subnet_entries.nodes()) {
auto row = entry_builder.record();
row.field("key", data{key});
row.field("value", value ? *value : data{});
if (entry_builder.length() >= context::dump_batch_size_limit) {
co_yield entry_builder.finish_assert_one_slice(
fmt::format("tenzir.{}.info", context_type()));
}
}
for (const auto& [key, value] : context_entries) {
auto row = entry_builder.record();
row.field("key", key.to_original_data());
Expand Down Expand Up @@ -328,8 +369,14 @@ class ctx final : public virtual context {
while (key_it != key_values.end()) {
TENZIR_ASSERT(context_it != context_values.end());
auto materialized_key = materialize(*key_it);
context_entries.insert_or_assign(materialized_key,
materialize(*context_it));
// Subnets never make it into the regular map of entries.
dominiklohmann marked this conversation as resolved.
Show resolved Hide resolved
if (caf::holds_alternative<subnet_type>(key_type)) {
const auto& key = caf::get<subnet>(materialized_key);
subnet_entries.insert(key, materialize(*context_it));
} else {
context_entries.insert_or_assign(materialized_key,
materialize(*context_it));
}
key_values_list.emplace_back(materialized_key);
++key_it;
++context_it;
Expand Down Expand Up @@ -383,6 +430,7 @@ class ctx final : public virtual context {

auto reset() -> caf::expected<void> override {
context_entries.clear();
subnet_entries.clear();
return {};
}

Expand All @@ -409,6 +457,23 @@ class ctx final : public virtual context {
value_offsets.emplace_back(fbs::CreateData(
builder, fbs::data::Data::record, record_offset.Union()));
}
for (const auto& [key, value] : subnet_entries.nodes()) {
auto field_offsets
= std::vector<flatbuffers::Offset<fbs::data::RecordField>>{};
field_offsets.reserve(2);
const auto key_key_offset = builder.CreateSharedString("key");
const auto key_value_offset = pack(builder, data{key});
field_offsets.emplace_back(fbs::data::CreateRecordField(
builder, key_key_offset, key_value_offset));
const auto value_key_offset = builder.CreateSharedString("value");
const auto value_value_offset = pack(builder, *value);
field_offsets.emplace_back(fbs::data::CreateRecordField(
builder, value_key_offset, value_value_offset));
const auto record_offset
= fbs::data::CreateRecordDirect(builder, &field_offsets);
value_offsets.emplace_back(fbs::CreateData(
builder, fbs::data::Data::record, record_offset.Union()));
}
const auto list_offset
= fbs::data::CreateListDirect(builder, &value_offsets);
const auto data_offset
Expand All @@ -419,15 +484,16 @@ class ctx final : public virtual context {

private:
map_type context_entries;
detail::subnet_tree subnet_entries;
};

struct v1_loader : public context_loader {
auto version() const -> int {
auto version() const -> int final {
return 1;
}

auto load(chunk_ptr serialized) const
-> caf::expected<std::unique_ptr<context>> {
-> caf::expected<std::unique_ptr<context>> final {
auto fb = flatbuffer<fbs::Data>::make(std::move(serialized));
if (not fb) {
return caf::make_error(ec::serialization_error,
Expand All @@ -436,6 +502,7 @@ struct v1_loader : public context_loader {
fb.error()));
}
auto context_entries = map_type{};
auto subnet_entries = detail::subnet_tree{};
const auto* list = fb.value()->data_as_list();
if (not list) {
return caf::make_error(ec::serialization_error,
Expand Down Expand Up @@ -483,10 +550,14 @@ struct v1_loader : public context_loader {
"context: invalid value: {}",
err));
}
context_entries.emplace(std::move(key), std::move(value));
if (auto* sn = caf::get_if<subnet>(&key)) {
subnet_entries.insert(*sn, std::move(value));
} else {
context_entries.emplace(std::move(key), std::move(value));
}
}

return std::make_unique<ctx>(std::move(context_entries));
return std::make_unique<ctx>(std::move(context_entries),
std::move(subnet_entries));
}
};

Expand Down
67 changes: 67 additions & 0 deletions libtenzir/include/tenzir/detail/subnet_tree.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// _ _____ __________
// | | / / _ | / __/_ __/ Visibility
// | |/ / __ |_\ \ / / Across
// |___/_/ |_/___/ /_/ Space and Time
//
// SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <tenzir/data.hpp>
#include <tenzir/generator.hpp>
#include <tenzir/subnet.hpp>

namespace tenzir::detail {

class subnet_tree {
public:
/// Constructs an empty tree.
subnet_tree();

subnet_tree(subnet_tree&& other) noexcept;
auto operator=(subnet_tree&& other) noexcept -> subnet_tree&;

/// Destroys a tree.
~subnet_tree();

/// Looks for a value for a given key.
/// @note Unlike `search`, this performs an exact match and not a
/// longest-prefix match.
auto lookup(subnet key) const -> const data*;

/// Looks for the longest-prefix match of a subnet in which the given IP
/// address occurs.
auto match(ip key) const -> const data*;

/// Looks for the longest-prefix match of a subnet.
auto match(subnet key) const -> const data*;

/// Performs a prefix-search for a given IP address, returning all subnets
/// that contain it.
auto search(ip key) const
-> generator<std::pair<subnet, const data*>>;

/// Performs a prefix-search for a given subnet, returning all subnets that
/// contain it.
auto search(subnet key) const
-> generator<std::pair<subnet, const data*>>;

/// Retrieves all nodes in the tree.
auto nodes() const -> generator<std::pair<subnet, const data*>>;

/// Inserts a key-value pair.
auto insert(subnet key, data value) -> bool;

/// Removes a node.
auto erase(subnet key) -> bool;

/// Removes all elements from the tree.
auto clear() -> void;

private:
struct impl;
std::unique_ptr<impl> impl_;
};

} // namespace tenzir::detail