diff --git a/src/asm_cfg.cpp b/src/asm_cfg.cpp index d2cdd912..a3b4cad8 100644 --- a/src/asm_cfg.cpp +++ b/src/asm_cfg.cpp @@ -7,20 +7,21 @@ #include #include -#include "asm_syntax.hpp" #include "config.hpp" #include "crab/cfg.hpp" #include "crab/wto.hpp" +#include "asm_syntax.hpp" +#include "program.hpp" + using std::optional; using std::set; using std::string; using std::to_string; using std::vector; -namespace crab { struct cfg_builder_t final { - cfg_t cfg; + Program prog; // TODO: ins should be inserted elsewhere void insert_after(const label_t& prev_label, const label_t& new_label, const GuardedInstruction& ins) { @@ -28,10 +29,10 @@ struct cfg_builder_t final { CRAB_ERROR("Cannot insert after the same label ", to_string(new_label)); } std::set prev_children; - std::swap(prev_children, cfg.get_node(prev_label).children); + std::swap(prev_children, prog.m_cfg.get_node(prev_label).children); for (const label_t& next_label : prev_children) { - cfg.get_node(next_label).parents.erase(prev_label); + prog.m_cfg.get_node(next_label).parents.erase(prev_label); } insert(new_label, ins); @@ -43,18 +44,17 @@ struct cfg_builder_t final { // TODO: ins should be inserted elsewhere void insert(const label_t& _label, const GuardedInstruction& ins) { - const auto it = cfg.neighbours.find(_label); - if (it != cfg.neighbours.end()) { + if (const auto it = prog.m_cfg.neighbours.find(_label); it != prog.m_cfg.neighbours.end()) { CRAB_ERROR("Label ", to_string(_label), " already exists"); } - cfg.neighbours.emplace(_label, cfg_t::adjacent_t{}); - cfg.instructions.emplace(_label, ins); + prog.m_cfg.neighbours.emplace(_label, crab::cfg_t::adjacent_t{}); + prog.m_instructions.emplace(_label, ins); } // TODO: ins should be inserted elsewhere label_t insert_jump(const label_t& from, const label_t& to, const GuardedInstruction& ins) { const label_t jump_label = label_t::make_jump(from, to); - if (cfg.contains(jump_label)) { + if (prog.m_cfg.contains(jump_label)) { CRAB_ERROR("Jump label ", to_string(jump_label), " already exists"); } insert(jump_label, ins); @@ -66,20 +66,24 @@ struct cfg_builder_t final { void add_child(const label_t& a, const label_t& b) { assert(b != label_t::entry); assert(a != label_t::exit); - cfg.neighbours.at(a).children.insert(b); - cfg.neighbours.at(b).parents.insert(a); + prog.m_cfg.neighbours.at(a).children.insert(b); + prog.m_cfg.neighbours.at(b).parents.insert(a); } void remove_child(const label_t& a, const label_t& b) { - cfg.get_node(a).children.erase(b); - cfg.get_node(b).parents.erase(a); + prog.m_cfg.get_node(a).children.erase(b); + prog.m_cfg.get_node(b).parents.erase(a); + } + + void set_assertions(const label_t& label, const std::vector& assertions) { + if (!prog.m_instructions.contains(label)) { + CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); + } + prog.m_instructions.at(label).preconditions = assertions; } }; -} // namespace crab using crab::basic_block_t; -using crab::cfg_builder_t; -using crab::cfg_t; /// Get the inverse of a given comparison operation. static Condition::Op reverse(const Condition::Op op) { @@ -130,13 +134,13 @@ static void add_cfg_nodes(cfg_builder_t& builder, const label_t& caller_label, c bool first = true; // Get the label of the node to go to on returning from the macro. - label_t exit_to_label = builder.cfg.get_child(caller_label); + label_t exit_to_label = builder.prog.cfg().get_child(caller_label); // Construct the variable prefix to use for the new stack frame, // and store a copy in the CallLocal instruction since the instruction-specific // labels may only exist until the CFG is simplified. const std::string stack_frame_prefix = to_string(caller_label); - if (const auto pcall = std::get_if(&builder.cfg.instruction_at(caller_label))) { + if (const auto pcall = std::get_if(&builder.prog.instruction_at(caller_label))) { pcall->stack_frame_prefix = stack_frame_prefix; } @@ -149,12 +153,12 @@ static void add_cfg_nodes(cfg_builder_t& builder, const label_t& caller_label, c macro_labels.erase(macro_label); if (stack_frame_prefix == macro_label.stack_frame_prefix) { - throw crab::InvalidControlFlow{stack_frame_prefix + ": illegal recursion"}; + throw InvalidControlFlow{stack_frame_prefix + ": illegal recursion"}; } // Clone the macro block into a new block with the new stack frame prefix. const label_t label{macro_label.from, macro_label.to, stack_frame_prefix}; - auto inst = builder.cfg.instruction_at(macro_label); + auto inst = builder.prog.instruction_at(macro_label); if (const auto pexit = std::get_if(&inst)) { pexit->stack_frame_prefix = label.stack_frame_prefix; } else if (const auto pcall = std::get_if(&inst)) { @@ -169,18 +173,19 @@ static void add_cfg_nodes(cfg_builder_t& builder, const label_t& caller_label, c } // Add an edge from any other predecessors. - for (const auto& prev_macro_nodes = builder.cfg.parents_of(macro_label); + for (const auto& prev_macro_nodes = builder.prog.cfg().parents_of(macro_label); const auto& prev_macro_label : prev_macro_nodes) { const label_t prev_label(prev_macro_label.from, prev_macro_label.to, to_string(caller_label)); - if (const auto& labels = builder.cfg.labels(); std::ranges::find(labels, prev_label) != labels.end()) { + if (const auto& labels = builder.prog.cfg().labels(); + std::ranges::find(labels, prev_label) != labels.end()) { builder.add_child(prev_label, label); } } // Walk all successor nodes. - for (const auto& next_macro_nodes = builder.cfg.children_of(macro_label); + for (const auto& next_macro_nodes = builder.prog.cfg().children_of(macro_label); const auto& next_macro_label : next_macro_nodes) { - if (next_macro_label == builder.cfg.exit_label()) { + if (next_macro_label == builder.prog.cfg().exit_label()) { // This is an exit transition, so add edge to the block to execute // upon returning from the macro. builder.add_child(label, exit_to_label); @@ -203,9 +208,9 @@ static void add_cfg_nodes(cfg_builder_t& builder, const label_t& caller_label, c const long stack_frame_depth = std::ranges::count(caller_label_str, STACK_FRAME_DELIMITER) + 2; for (const auto& macro_label : seen_labels) { const label_t label{macro_label.from, macro_label.to, caller_label_str}; - if (const auto pins = std::get_if(&builder.cfg.instruction_at(label))) { + if (const auto pins = std::get_if(&builder.prog.instruction_at(label))) { if (stack_frame_depth >= MAX_CALL_STACK_FRAMES) { - throw crab::InvalidControlFlow{"too many call stack frames"}; + throw InvalidControlFlow{"too many call stack frames"}; } add_cfg_nodes(builder, label, pins->target); } @@ -225,10 +230,10 @@ static cfg_builder_t instruction_seq_to_cfg(const InstructionSeq& insts, const b } if (insts.size() == 0) { - throw crab::InvalidControlFlow{"empty instruction sequence"}; + throw InvalidControlFlow{"empty instruction sequence"}; } else { const auto& [label, inst, _0] = insts[0]; - builder.add_child(builder.cfg.entry_label(), label); + builder.add_child(builder.prog.cfg().entry_label(), label); } // Do a first pass ignoring all function macro calls. @@ -239,12 +244,12 @@ static cfg_builder_t instruction_seq_to_cfg(const InstructionSeq& insts, const b continue; } - label_t fallthrough{builder.cfg.exit_label()}; + label_t fallthrough{builder.prog.cfg().exit_label()}; if (i + 1 < insts.size()) { fallthrough = std::get<0>(insts[i + 1]); } else { if (has_fall(inst) && must_have_exit) { - throw crab::InvalidControlFlow{"fallthrough in last instruction"}; + throw InvalidControlFlow{"fallthrough in last instruction"}; } } if (const auto jmp = std::get_if(&inst)) { @@ -254,8 +259,8 @@ static cfg_builder_t instruction_seq_to_cfg(const InstructionSeq& insts, const b builder.add_child(label, fallthrough); continue; } - if (!builder.cfg.contains(target_label)) { - throw crab::InvalidControlFlow{"jump to undefined label " + to_string(target_label)}; + if (!builder.prog.cfg().contains(target_label)) { + throw InvalidControlFlow{"jump to undefined label " + to_string(target_label)}; } builder.insert_jump(label, target_label, GuardedInstruction{.cmd = Assume{.cond = *cond, .is_implicit = true}}); @@ -270,7 +275,7 @@ static cfg_builder_t instruction_seq_to_cfg(const InstructionSeq& insts, const b } } if (std::holds_alternative(inst)) { - builder.add_child(label, builder.cfg.exit_label()); + builder.add_child(label, builder.prog.cfg().exit_label()); } } @@ -286,28 +291,29 @@ static cfg_builder_t instruction_seq_to_cfg(const InstructionSeq& insts, const b return builder; } -cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const prepare_cfg_options& options) { +Program Program::from_sequence(const InstructionSeq& inst_seq, const program_info& info, + const prepare_cfg_options& options) { thread_local_program_info.set(info); // Convert the instruction sequence to a deterministic control-flow graph. - cfg_builder_t builder = instruction_seq_to_cfg(prog, options.must_have_exit); + cfg_builder_t builder = instruction_seq_to_cfg(inst_seq, options.must_have_exit); // Detect loops using Weak Topological Ordering (WTO) and insert counters at loop entry points. WTO provides a // hierarchical decomposition of the CFG that identifies all strongly connected components (cycles) and their entry // points. These entry points serve as natural locations for loop counters that help verify program termination. if (options.check_for_termination) { - const wto_t wto{builder.cfg}; + const crab::wto_t wto{builder.prog.cfg()}; wto.for_each_loop_head([&](const label_t& label) -> void { builder.insert_after(label, label_t::make_increment_counter(label), GuardedInstruction{.cmd = IncrementLoopCounter{label}}); }); } - // Annotate the CFG by adding in assertions before every memory instruction. - for (auto& [label, ins] : builder.cfg.instructions) { - builder.cfg.set_assertions(label, get_assertions(ins.cmd, info, label)); + // Annotate the CFG by explicitly adding in assertions before every memory instruction. + for (const auto& label : builder.prog.labels()) { + builder.set_assertions(label, get_assertions(builder.prog.instruction_at(label), info, label)); } - return builder.cfg; + return builder.prog; } std::set basic_block_t::collect_basic_blocks(const cfg_t& cfg, const bool simplify) { @@ -411,14 +417,14 @@ std::vector stats_headers() { }; } -std::map collect_stats(const cfg_t& cfg) { +std::map collect_stats(const Program& prog) { std::map res; for (const auto& h : stats_headers()) { res[h] = 0; } - for (const auto& label : cfg.labels()) { + for (const auto& label : prog.labels()) { res["instructions"]++; - const auto cmd = cfg.instruction_at(label); + const auto cmd = prog.instruction_at(label); if (const auto pins = std::get_if(&cmd)) { if (pins->mapfd == -1) { res["map_in_map"] = 1; @@ -433,19 +439,19 @@ std::map collect_stats(const cfg_t& cfg) { res[pins->is64 ? "arith64" : "arith32"]++; } res[instype(cmd)]++; - if (cfg.in_degree(label) > 1) { + if (prog.cfg().in_degree(label) > 1) { res["joins"]++; } - if (cfg.out_degree(label) > 1) { + if (prog.cfg().out_degree(label) > 1) { res["jumps"]++; } } return res; } -cfg_t crab::cfg_from_adjacency_list(const std::map>& adj_list) { +crab::cfg_t crab::cfg_from_adjacency_list(const std::map>& adj_list) { cfg_builder_t builder; - for (const auto& [label, children] : adj_list) { + for (const auto& label : std::views::keys(adj_list)) { if (label == label_t::entry || label == label_t::exit) { continue; } @@ -456,5 +462,5 @@ cfg_t crab::cfg_from_adjacency_list(const std::map builder.add_child(label, child); } } - return builder.cfg; + return builder.prog.cfg(); } diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index fadddec2..1fb9fab4 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -95,21 +95,21 @@ void print_jump(std::ostream& o, const std::string& direction, const std::set \"" << next << "\";\n"; } out << "\n"; @@ -141,12 +141,12 @@ void print_dot(const crab::cfg_t& cfg, std::ostream& out) { out << "}\n"; } -void print_dot(const crab::cfg_t& cfg, const std::string& outfile) { +void print_dot(const Program& prog, const std::string& outfile) { std::ofstream out{outfile}; if (out.fail()) { throw std::runtime_error(std::string("Could not open file ") + outfile); } - print_dot(cfg, out); + print_dot(prog, out); } void print_reachability(std::ostream& os, const Report& report) { @@ -174,9 +174,9 @@ void print_all_messages(std::ostream& os, const Report& report) { print_warnings(os, report); } -void print_invariants(std::ostream& os, const crab::cfg_t& cfg, const bool simplify, const Invariants& invariants) { - print_cfg( - cfg, os, simplify, +void print_invariants(std::ostream& os, const Program& prog, const bool simplify, const Invariants& invariants) { + print_program( + prog, os, simplify, [&](std::ostream& os, const label_t& label) -> void { os << "\nPre-invariant : " << invariants.invariants.at(label).pre << "\n"; }, diff --git a/src/crab/array_domain.cpp b/src/crab/array_domain.cpp index e6ad0830..5f102f46 100644 --- a/src/crab/array_domain.cpp +++ b/src/crab/array_domain.cpp @@ -793,11 +793,6 @@ std::optional array_domain_t::store_type(NumAbsDomain& inv, const li return {}; } -std::optional array_domain_t::store_type(NumAbsDomain& inv, const linear_expression_t& idx, - const linear_expression_t& elem_size, const Reg& reg) { - return store_type(inv, idx, elem_size, variable_t::reg(data_kind_t::types, reg.v)); -} - void array_domain_t::havoc(NumAbsDomain& inv, const data_kind_t kind, const linear_expression_t& idx, const linear_expression_t& elem_size) { auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size); diff --git a/src/crab/array_domain.hpp b/src/crab/array_domain.hpp index 2eb56167..f281699d 100644 --- a/src/crab/array_domain.hpp +++ b/src/crab/array_domain.hpp @@ -76,8 +76,6 @@ class array_domain_t final { const linear_expression_t& elem_size, const linear_expression_t& val); std::optional store_type(NumAbsDomain& inv, const linear_expression_t& idx, const linear_expression_t& elem_size, const linear_expression_t& val); - std::optional store_type(NumAbsDomain& inv, const linear_expression_t& idx, - const linear_expression_t& elem_size, const Reg& reg); void havoc(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& idx, const linear_expression_t& elem_size); diff --git a/src/crab/cfg.hpp b/src/crab/cfg.hpp index 583bfe9b..c5e45367 100644 --- a/src/crab/cfg.hpp +++ b/src/crab/cfg.hpp @@ -14,16 +14,14 @@ #include #include "config.hpp" -// TODO: avoid including asm_syntax.hpp here -#include "asm_syntax.hpp" #include "crab/label.hpp" #include "crab_utils/debug.hpp" - +struct cfg_builder_t; namespace crab { /// Control-Flow Graph class cfg_t final { - friend struct cfg_builder_t; + friend struct ::cfg_builder_t; // the choice to use set means that unmarshaling a conditional jump to the same target may be different using label_vec_t = std::set; @@ -75,35 +73,6 @@ class cfg_t final { } public: - // TODO: temporarily here. Move to a separate class containing both cfg and instructions - std::map instructions{{label_t::entry, {.cmd = Undefined{}}}, - {label_t::exit, {.cmd = Undefined{}}}}; - const Instruction& instruction_at(const label_t& label) const { - if (!instructions.contains(label)) { - CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); - } - return instructions.at(label).cmd; - } - Instruction& instruction_at(const label_t& label) { - if (!instructions.contains(label)) { - CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); - } - return instructions.at(label).cmd; - } - - void set_assertions(const label_t& label, const std::vector& assertions) { - if (!instructions.contains(label)) { - CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); - } - instructions.at(label).preconditions = assertions; - } - const auto assertions_at(const label_t& label) const { - if (!instructions.contains(label)) { - CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); - } - return instructions.at(label).preconditions; - } - [[nodiscard]] label_t exit_label() const { return label_t::exit; @@ -214,21 +183,4 @@ class basic_block_t final { cfg_t cfg_from_adjacency_list(const std::map>& adj_list); -class InvalidControlFlow final : public std::runtime_error { - public: - explicit InvalidControlFlow(const std::string& what) : std::runtime_error(what) {} -}; - } // end namespace crab - -std::vector get_assertions(Instruction ins, const program_info& info, const std::optional& label); -crab::cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const prepare_cfg_options& options); - -std::vector stats_headers(); -std::map collect_stats(const crab::cfg_t& cfg); - -using printfunc = std::function; -void print_cfg(const crab::cfg_t& cfg, std::ostream& os, const bool simplify, const printfunc& prefunc, - const printfunc& postfunc); -void print_cfg(const crab::cfg_t& cfg, std::ostream& os, const bool simplify); -void print_dot(const crab::cfg_t& cfg, const std::string& outfile); diff --git a/src/crab/ebpf_checker.cpp b/src/crab/ebpf_checker.cpp index 4a09e549..92f30b2c 100644 --- a/src/crab/ebpf_checker.cpp +++ b/src/crab/ebpf_checker.cpp @@ -15,6 +15,7 @@ #include "crab_utils/num_safety.hpp" #include "dsl_syntax.hpp" #include "platform.hpp" +#include "program.hpp" #include "string_constraints.hpp" using crab::domains::NumAbsDomain; diff --git a/src/crab/fwd_analyzer.cpp b/src/crab/fwd_analyzer.cpp index b650c7d8..ec2a7102 100644 --- a/src/crab/fwd_analyzer.cpp +++ b/src/crab/fwd_analyzer.cpp @@ -3,17 +3,17 @@ #include #include +#include "config.hpp" #include "crab/cfg.hpp" -#include "crab/wto.hpp" - #include "crab/ebpf_domain.hpp" #include "crab/fwd_analyzer.hpp" - -#include "config.hpp" +#include "crab/wto.hpp" +#include "program.hpp" namespace crab { class interleaved_fwd_fixpoint_iterator_t final { + const Program& _prog; const cfg_t& _cfg; const wto_t _wto; invariant_table_t _inv; @@ -36,12 +36,12 @@ class interleaved_fwd_fixpoint_iterator_t final { void transform_to_post(const label_t& label, ebpf_domain_t pre) { if (thread_local_options.assume_assertions) { - for (const auto& assertion : _cfg.assertions_at(label)) { + for (const auto& assertion : _prog.assertions_at(label)) { // avoid redundant errors ebpf_domain_assume(pre, assertion); } } - ebpf_domain_transform(pre, _cfg.instruction_at(label)); + ebpf_domain_transform(pre, _prog.instruction_at(label)); _inv.at(label).post = std::move(pre); } @@ -57,7 +57,8 @@ class interleaved_fwd_fixpoint_iterator_t final { return res; } - explicit interleaved_fwd_fixpoint_iterator_t(const cfg_t& cfg) : _cfg(cfg), _wto(cfg) { + explicit interleaved_fwd_fixpoint_iterator_t(const Program& prog) + : _prog(prog), _cfg(prog.cfg()), _wto(prog.cfg()) { for (const auto& label : _cfg.labels()) { _inv.emplace(label, invariant_map_pair{ebpf_domain_t::bottom(), ebpf_domain_t::bottom()}); } @@ -68,12 +69,12 @@ class interleaved_fwd_fixpoint_iterator_t final { void operator()(const std::shared_ptr& cycle); - friend invariant_table_t run_forward_analyzer(const cfg_t& cfg, ebpf_domain_t entry_inv); + friend invariant_table_t run_forward_analyzer(const Program& prog, ebpf_domain_t entry_inv); }; -invariant_table_t run_forward_analyzer(const cfg_t& cfg, ebpf_domain_t entry_inv) { +invariant_table_t run_forward_analyzer(const Program& prog, ebpf_domain_t entry_inv) { // Go over the CFG in weak topological order (accounting for loops). - interleaved_fwd_fixpoint_iterator_t analyzer(cfg); + interleaved_fwd_fixpoint_iterator_t analyzer(prog); if (thread_local_options.cfg_opts.check_for_termination) { // Initialize loop counters for potential loop headers. // This enables enforcement of upper bounds on loop iterations @@ -82,7 +83,7 @@ invariant_table_t run_forward_analyzer(const cfg_t& cfg, ebpf_domain_t entry_inv analyzer._wto.for_each_loop_head( [&](const label_t& label) { ebpf_domain_initialize_loop_counter(entry_inv, label); }); } - analyzer.set_pre(cfg.entry_label(), entry_inv); + analyzer.set_pre(prog.cfg().entry_label(), entry_inv); for (const auto& component : analyzer._wto) { std::visit(analyzer, component); } diff --git a/src/crab/fwd_analyzer.hpp b/src/crab/fwd_analyzer.hpp index c8783968..8035417f 100644 --- a/src/crab/fwd_analyzer.hpp +++ b/src/crab/fwd_analyzer.hpp @@ -4,8 +4,8 @@ #include -#include "crab/cfg.hpp" #include "crab/ebpf_domain.hpp" +#include "program.hpp" namespace crab { @@ -15,6 +15,6 @@ struct invariant_map_pair { }; using invariant_table_t = std::map; -invariant_table_t run_forward_analyzer(const cfg_t& cfg, ebpf_domain_t entry_inv); +invariant_table_t run_forward_analyzer(const Program& prog, ebpf_domain_t entry_inv); } // namespace crab diff --git a/src/crab/type_domain.hpp b/src/crab/type_domain.hpp index 9a15cc61..9c094efb 100644 --- a/src/crab/type_domain.hpp +++ b/src/crab/type_domain.hpp @@ -12,6 +12,8 @@ #include "crab/type_encoding.hpp" #include "crab/variable.hpp" +#include "asm_syntax.hpp" // for Reg + namespace crab { using domains::NumAbsDomain; diff --git a/src/crab/wto.cpp b/src/crab/wto.cpp index c95b6fd3..e843f420 100644 --- a/src/crab/wto.cpp +++ b/src/crab/wto.cpp @@ -4,14 +4,14 @@ #include "wto.hpp" -using crab::cfg_t; - // This file contains an iterative implementation of the recursive algorithm in // Bourdoncle, "Efficient chaotic iteration strategies with widenings", 1993 // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3574 // where _visit_stack is roughly equivalent to a stack trace in the recursive algorithm. // However, this scales much higher since it does not run out of stack memory. +namespace crab { + bool is_component_member(const label_t& label, const cycle_or_label& component) { if (const auto plabel = std::get_if(&component)) { return *plabel == label; @@ -308,3 +308,4 @@ const wto_nesting_t& wto_t::nesting(const label_t& label) const { } return _nesting.at(label); } +} // namespace crab diff --git a/src/crab/wto.hpp b/src/crab/wto.hpp index ebfe2117..f5424e38 100644 --- a/src/crab/wto.hpp +++ b/src/crab/wto.hpp @@ -28,6 +28,9 @@ #include #include "crab/cfg.hpp" +#include "crab/label.hpp" + +namespace crab { // Bourdoncle, "Efficient chaotic iteration strategies with widenings", 1993 // uses the notation w(c) to refer to the set of heads of the nested components @@ -114,7 +117,7 @@ class wto_t final { friend class wto_builder_t; public: - explicit wto_t(const crab::cfg_t& cfg); + explicit wto_t(const cfg_t& cfg); [[nodiscard]] wto_partition_t::const_reverse_iterator begin() const { @@ -144,3 +147,4 @@ class wto_t final { } } }; +} // namespace crab diff --git a/src/crab_utils/graph_ops.hpp b/src/crab_utils/graph_ops.hpp index 5e2d28ec..7e12eacc 100644 --- a/src/crab_utils/graph_ops.hpp +++ b/src/crab_utils/graph_ops.hpp @@ -502,6 +502,7 @@ class GraphOps { enum SMarkT { V_UNSTABLE = 0, V_STABLE = 1 }; // Whether a vertex is in the current SCC/queue for Bellman-Ford. enum QMarkT { BF_NONE = 0, BF_SCC = 1, BF_QUEUED = 2 }; + private: // Scratch space needed by the graph algorithms. // Should really switch to some kind of arena allocator, rather @@ -535,6 +536,7 @@ class GraphOps { ts = 0; ts_idx = 0; } + private: static void grow_scratch(const size_t sz) { if (sz <= scratch_sz) { @@ -975,6 +977,7 @@ class GraphOps { } } } + public: template static bool repair_potential(const G& g, WeightVector& p, vert_id ii, vert_id jj) { @@ -1047,6 +1050,7 @@ class GraphOps { } return delta; } + private: // Compute the transitive closure of edges reachable from v, assuming // (1) the subgraph G \ {v} is closed, and @@ -1101,6 +1105,7 @@ class GraphOps { vert_marks->at(*adj_head) = 0; } } + public: static void close_over_edge(graph_t& g, vert_id ii, vert_id jj) { assert(ii != 0 && jj != 0); diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index 957030c6..b6b76964 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -17,7 +17,6 @@ #include "crab_verifier.hpp" #include "string_constraints.hpp" -using crab::cfg_t; using crab::ebpf_domain_t; using std::string; @@ -48,27 +47,27 @@ int Invariants::max_loop_count() const { return std::numeric_limits::max(); } -Invariants analyze(const cfg_t& cfg, ebpf_domain_t&& entry_invariant) { - return Invariants{run_forward_analyzer(cfg, std::move(entry_invariant))}; +Invariants analyze(const Program& prog, ebpf_domain_t&& entry_invariant) { + return Invariants{run_forward_analyzer(prog, std::move(entry_invariant))}; } -Invariants analyze(const cfg_t& cfg) { +Invariants analyze(const Program& prog) { ebpf_verifier_clear_before_analysis(); - return analyze(cfg, ebpf_domain_t::setup_entry(thread_local_options.setup_constraints)); + return analyze(prog, ebpf_domain_t::setup_entry(thread_local_options.setup_constraints)); } -Invariants analyze(const cfg_t& cfg, const string_invariant& entry_invariant) { +Invariants analyze(const Program& prog, const string_invariant& entry_invariant) { ebpf_verifier_clear_before_analysis(); - return analyze(cfg, + return analyze(prog, ebpf_domain_t::from_constraints(entry_invariant.value(), thread_local_options.setup_constraints)); } -bool Invariants::verified(const cfg_t& cfg) const { +bool Invariants::verified(const Program& prog) const { for (const auto& [label, inv_pair] : invariants) { if (inv_pair.pre.is_bottom()) { continue; } - for (const Assertion& assertion : cfg.assertions_at(label)) { + for (const Assertion& assertion : prog.assertions_at(label)) { if (!ebpf_domain_check(inv_pair.pre, assertion).empty()) { return false; } @@ -77,19 +76,19 @@ bool Invariants::verified(const cfg_t& cfg) const { return true; } -Report Invariants::check_assertions(const cfg_t& cfg) const { +Report Invariants::check_assertions(const Program& prog) const { Report report; for (const auto& [label, inv_pair] : invariants) { if (inv_pair.pre.is_bottom()) { continue; } - for (const Assertion& assertion : cfg.assertions_at(label)) { + for (const Assertion& assertion : prog.assertions_at(label)) { const auto warnings = ebpf_domain_check(inv_pair.pre, assertion); for (const auto& msg : warnings) { report.warnings[label].emplace_back(msg); } } - if (const auto passume = std::get_if(&cfg.instruction_at(label))) { + if (const auto passume = std::get_if(&prog.instruction_at(label))) { if (inv_pair.post.is_bottom()) { const auto s = to_string(*passume); report.reachability[label].emplace_back("Code becomes unreachable (" + s + ")"); diff --git a/src/crab_verifier.hpp b/src/crab_verifier.hpp index ec8cc127..edebe06a 100644 --- a/src/crab_verifier.hpp +++ b/src/crab_verifier.hpp @@ -3,8 +3,8 @@ #pragma once #include "config.hpp" -#include "crab/cfg.hpp" #include "crab/fwd_analyzer.hpp" +#include "program.hpp" #include "spec_type_descriptors.hpp" #include "string_constraints.hpp" @@ -64,15 +64,15 @@ class Invariants final { crab::interval_t exit_value() const; int max_loop_count() const; - bool verified(const crab::cfg_t& cfg) const; - Report check_assertions(const crab::cfg_t& cfg) const; + bool verified(const Program& prog) const; + Report check_assertions(const Program& prog) const; - friend void print_invariants(std::ostream& os, const crab::cfg_t& cfg, bool simplify, const Invariants& invariants); + friend void print_invariants(std::ostream& os, const Program& prog, bool simplify, const Invariants& invariants); }; -Invariants analyze(const crab::cfg_t& cfg); -Invariants analyze(const crab::cfg_t& cfg, const string_invariant& entry_invariant); -inline bool verify(const crab::cfg_t& cfg) { return analyze(cfg).verified(cfg); } +Invariants analyze(const Program& prog); +Invariants analyze(const Program& prog, const string_invariant& entry_invariant); +inline bool verify(const Program& prog) { return analyze(cfg).verified(cfg); } int create_map_crab(const EbpfMapType& map_type, uint32_t key_size, uint32_t value_size, uint32_t max_entries, ebpf_verifier_options_t options); diff --git a/src/ebpf_verifier.hpp b/src/ebpf_verifier.hpp index 57fb13a7..99a9fb20 100644 --- a/src/ebpf_verifier.hpp +++ b/src/ebpf_verifier.hpp @@ -5,6 +5,6 @@ #include "asm_files.hpp" #include "asm_unmarshal.hpp" #include "config.hpp" -#include "crab/cfg.hpp" #include "crab_verifier.hpp" #include "platform.hpp" +#include "program.hpp" diff --git a/src/main/check.cpp b/src/main/check.cpp index abf46060..0d46ea2f 100644 --- a/src/main/check.cpp +++ b/src/main/check.cpp @@ -228,10 +228,10 @@ int main(int argc, char** argv) { return 1; } - auto& prog = std::get(prog_or_error); + auto& inst_seq = std::get(prog_or_error); if (!asmfile.empty()) { std::ofstream out{asmfile}; - print(prog, out, {}); + print(inst_seq, out, {}); print_map_descriptors(thread_local_program_info->map_descriptors, out); } @@ -239,26 +239,26 @@ int main(int argc, char** argv) { // Convert the instruction sequence to a control-flow graph. try { const auto verbosity = ebpf_verifier_options.verbosity_opts; - const crab::cfg_t cfg = prepare_cfg(prog, raw_prog.info, ebpf_verifier_options.cfg_opts); + const Program prog = Program::from_sequence(inst_seq, raw_prog.info, ebpf_verifier_options.cfg_opts); if (domain == "cfg") { - print_cfg(cfg, std::cout, verbosity.simplify); + print_program(prog, std::cout, verbosity.simplify); return 0; } const auto begin = std::chrono::steady_clock::now(); - auto invariants = analyze(cfg); + auto invariants = analyze(prog); const auto end = std::chrono::steady_clock::now(); const auto seconds = std::chrono::duration(end - begin).count(); if (verbosity.print_invariants) { - print_invariants(std::cout, cfg, verbosity.simplify, invariants); + print_invariants(std::cout, prog, verbosity.simplify, invariants); } bool pass; if (verbosity.print_failures) { - auto report = invariants.check_assertions(cfg); + auto report = invariants.check_assertions(prog); print_warnings(std::cout, report); pass = report.verified(); } else { - pass = invariants.verified(cfg); + pass = invariants.verified(prog); } if (pass && ebpf_verifier_options.cfg_opts.check_for_termination && (verbosity.print_failures || verbosity.print_invariants)) { @@ -277,14 +277,14 @@ int main(int argc, char** argv) { return !res; } else if (domain == "stats") { // Convert the instruction sequence to a control-flow graph. - const crab::cfg_t cfg = prepare_cfg(prog, raw_prog.info, ebpf_verifier_options.cfg_opts); + const Program prog = Program::from_sequence(inst_seq, raw_prog.info, ebpf_verifier_options.cfg_opts); // Just print eBPF program stats. - auto stats = collect_stats(cfg); + auto stats = collect_stats(prog); if (!dotfile.empty()) { - print_dot(cfg, dotfile); + print_dot(prog, dotfile); } - std::cout << std::hex << hash(raw_prog) << std::dec << "," << prog.size(); + std::cout << std::hex << hash(raw_prog) << std::dec << "," << inst_seq.size(); for (const string& h : stats_headers()) { std::cout << "," << stats.at(h); } diff --git a/src/program.hpp b/src/program.hpp new file mode 100644 index 00000000..a26700a1 --- /dev/null +++ b/src/program.hpp @@ -0,0 +1,71 @@ +// Copyright (c) Prevail Verifier contributors. +// SPDX-License-Identifier: MIT +#pragma once + +#include +#include + +#include "crab/cfg.hpp" +#include "crab/label.hpp" +#include "crab_utils/debug.hpp" + +#include "asm_syntax.hpp" + +class Program { + friend struct cfg_builder_t; + + std::map m_instructions{{label_t::entry, {.cmd = Undefined{}}}, + {label_t::exit, {.cmd = Undefined{}}}}; + crab::cfg_t m_cfg; + + // TODO: add program_info field + + public: + const crab::cfg_t& cfg() const { return m_cfg; } + + //! return a view of the labels, including entry and exit + [[nodiscard]] + auto labels() const { + return m_cfg.labels(); + } + + const Instruction& instruction_at(const label_t& label) const { + if (!m_instructions.contains(label)) { + CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); + } + return m_instructions.at(label).cmd; + } + + Instruction& instruction_at(const label_t& label) { + if (!m_instructions.contains(label)) { + CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); + } + return m_instructions.at(label).cmd; + } + + std::vector assertions_at(const label_t& label) const { + if (!m_instructions.contains(label)) { + CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); + } + return m_instructions.at(label).preconditions; + } + + static Program from_sequence(const InstructionSeq& inst_seq, const program_info& info, + const prepare_cfg_options& options); +}; + +class InvalidControlFlow final : public std::runtime_error { + public: + explicit InvalidControlFlow(const std::string& what) : std::runtime_error(what) {} +}; + +std::vector get_assertions(Instruction ins, const program_info& info, const std::optional& label); + +std::vector stats_headers(); +std::map collect_stats(const Program& prog); + +using printfunc = std::function; +void print_program(const Program& prog, std::ostream& os, bool simplify, const printfunc& prefunc, + const printfunc& postfunc); +void print_program(const Program& prog, std::ostream& os, bool simplify); +void print_dot(const Program& prog, const std::string& outfile); diff --git a/src/test/ebpf_yaml.cpp b/src/test/ebpf_yaml.cpp index cd0eb1e1..afeef384 100644 --- a/src/test/ebpf_yaml.cpp +++ b/src/test/ebpf_yaml.cpp @@ -17,7 +17,6 @@ #include "ebpf_yaml.hpp" #include "string_constraints.hpp" -using crab::cfg_t; using std::string; using std::vector; @@ -246,10 +245,10 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { program_info info{&g_platform_test, {}, program_type}; thread_local_options = test_case.options; try { - const cfg_t cfg = prepare_cfg(test_case.instruction_seq, info, test_case.options.cfg_opts); - const Invariants invariants = analyze(cfg, test_case.assumed_pre_invariant); + const Program prog = Program::from_sequence(test_case.instruction_seq, info, test_case.options.cfg_opts); + const Invariants invariants = analyze(prog, test_case.assumed_pre_invariant); const string_invariant actual_last_invariant = invariants.invariant_at(label_t::exit); - const std::set actual_messages = invariants.check_assertions(cfg).all_messages(); + const std::set actual_messages = invariants.check_assertions(prog).all_messages(); if (actual_last_invariant == test_case.expected_post_invariant && actual_messages == test_case.expected_messages) { @@ -259,7 +258,7 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { .invariant = make_diff(actual_last_invariant, test_case.expected_post_invariant), .messages = make_diff(actual_messages, test_case.expected_messages), }; - } catch (crab::InvalidControlFlow& ex) { + } catch (InvalidControlFlow& ex) { const std::set actual_messages{ex.what()}; if (test_case.expected_post_invariant == string_invariant::top() && actual_messages == test_case.expected_messages) { @@ -354,11 +353,11 @@ ConformanceTestResult run_conformance_test_case(const std::vector& me return {}; } - auto& prog = std::get(prog_or_error); + const InstructionSeq& inst_seq = std::get(prog_or_error); ebpf_verifier_options_t options{}; if (debug) { - print(prog, std::cout, {}); + print(inst_seq, std::cout, {}); options.verbosity_opts.print_failures = true; options.verbosity_opts.print_invariants = true; options.verbosity_opts.simplify = false; @@ -366,9 +365,9 @@ ConformanceTestResult run_conformance_test_case(const std::vector& me thread_local_options = options; try { - const cfg_t cfg = prepare_cfg(prog, info, options.cfg_opts); - const Invariants invariants = analyze(cfg, pre_invariant); - return ConformanceTestResult{.success = invariants.verified(cfg), .r0_value = invariants.exit_value()}; + const Program prog = Program::from_sequence(inst_seq, info, options.cfg_opts); + const Invariants invariants = analyze(prog, pre_invariant); + return ConformanceTestResult{.success = invariants.verified(prog), .r0_value = invariants.exit_value()}; } catch (const std::exception&) { // Catch exceptions thrown in ebpf_domain.cpp. return {}; diff --git a/src/test/test_marshal.cpp b/src/test/test_marshal.cpp index 82756292..7dcae9bd 100644 --- a/src/test/test_marshal.cpp +++ b/src/test/test_marshal.cpp @@ -226,9 +226,10 @@ static void compare_unmarshal_marshal(const ebpf_inst& ins, const ebpf_inst& exp const ebpf_platform_t& platform = g_ebpf_platform_linux) { program_info info{.platform = &platform, .type = platform.get_program_type("unspec", "unspec")}; constexpr ebpf_inst exit{.opcode = INST_OP_EXIT}; - InstructionSeq parsed = std::get(unmarshal(raw_program{"", "", 0, "", {ins, exit, exit}, info})); - REQUIRE(parsed.size() == 3); - auto [_, single, _2] = parsed.front(); + const InstructionSeq inst_seq = + std::get(unmarshal(raw_program{"", "", 0, "", {ins, exit, exit}, info})); + REQUIRE(inst_seq.size() == 3); + auto [_, single, _2] = inst_seq.front(); (void)_; // unused (void)_2; // unused std::vector marshaled = marshal(single, 0); @@ -262,9 +263,10 @@ static void compare_unmarshal_marshal(const ebpf_inst& ins1, const ebpf_inst& in program_info info{.platform = &g_ebpf_platform_linux, .type = g_ebpf_platform_linux.get_program_type("unspec", "unspec")}; constexpr ebpf_inst exit{.opcode = INST_OP_EXIT}; - auto parsed = std::get(unmarshal(raw_program{"", "", 0, "", {ins1, ins2, exit, exit}, info})); - REQUIRE(parsed.size() == 3); - auto [_, single, _2] = parsed.front(); + const InstructionSeq inst_seq = + std::get(unmarshal(raw_program{"", "", 0, "", {ins1, ins2, exit, exit}, info})); + REQUIRE(inst_seq.size() == 3); + auto [_, single, _2] = inst_seq.front(); (void)_; // unused (void)_2; // unused std::vector marshaled = marshal(single, 0); @@ -280,9 +282,10 @@ static void compare_unmarshal_marshal(const ebpf_inst& ins1, const ebpf_inst& in static void compare_marshal_unmarshal(const Instruction& ins, bool double_cmd = false, const ebpf_platform_t& platform = g_ebpf_platform_linux) { program_info info{.platform = &platform, .type = platform.get_program_type("unspec", "unspec")}; - InstructionSeq parsed = std::get(unmarshal(raw_program{"", "", 0, "", marshal(ins, 0), info})); - REQUIRE(parsed.size() == 1); - auto [_, single, _2] = parsed.back(); + const InstructionSeq inst_seq = + std::get(unmarshal(raw_program{"", "", 0, "", marshal(ins, 0), info})); + REQUIRE(inst_seq.size() == 1); + auto [_, single, _2] = inst_seq.back(); (void)_; // unused (void)_2; // unused REQUIRE(single == ins); diff --git a/src/test/test_verify.cpp b/src/test/test_verify.cpp index c606410e..afc2936c 100644 --- a/src/test/test_verify.cpp +++ b/src/test/test_verify.cpp @@ -5,8 +5,6 @@ #include "ebpf_verifier.hpp" -using crab::cfg_t; - #define FAIL_LOAD_ELF(dirname, filename, sectionname) \ TEST_CASE("Try loading nonexisting program: " dirname "/" filename, "[elf]") { \ try { \ @@ -41,29 +39,29 @@ FAIL_UNMARSHAL("invalid", "invalid-lddw.o", ".text") REQUIRE(raw_progs.size() == 1); \ raw_program raw_prog = raw_progs.back(); \ std::variant prog_or_error = unmarshal(raw_prog); \ - const auto prog = std::get_if(&prog_or_error); \ - REQUIRE(prog != nullptr); \ - cfg_t cfg = prepare_cfg(*prog, raw_prog.info, thread_local_options.cfg_opts); \ - REQUIRE_THROWS_AS(analyze(cfg), UnmarshalError); \ + const auto inst_seq = std::get_if(&prog_or_error); \ + REQUIRE(inst_seq != nullptr); \ + Program prog = Program::from_sequence(*inst_seq, raw_prog.info, thread_local_options.cfg_opts); \ + REQUIRE_THROWS_AS(analyze(prog), UnmarshalError); \ } FAIL_ANALYZE("build", "badmapptr.o", "test") // Verify a program in a section that may have multiple programs in it. -#define VERIFY_PROGRAM(dirname, filename, section_name, program_name, _options, platform, should_pass, count) \ - do { \ - thread_local_options = _options; \ - const auto raw_progs = read_elf("ebpf-samples/" dirname "/" filename, section_name, {}, platform); \ - REQUIRE(raw_progs.size() == count); \ - for (const auto& raw_prog : raw_progs) { \ - if (count == 1 || raw_prog.function_name == program_name) { \ - const auto prog_or_error = unmarshal(raw_prog); \ - const auto prog = std::get_if(&prog_or_error); \ - REQUIRE(prog != nullptr); \ - const cfg_t cfg = prepare_cfg(*prog, raw_prog.info, thread_local_options.cfg_opts); \ - REQUIRE(verify(cfg) == should_pass); \ - } \ - } \ +#define VERIFY_PROGRAM(dirname, filename, section_name, program_name, _options, platform, should_pass, count) \ + do { \ + thread_local_options = _options; \ + const auto raw_progs = read_elf("ebpf-samples/" dirname "/" filename, section_name, {}, platform); \ + REQUIRE(raw_progs.size() == count); \ + for (const auto& raw_prog : raw_progs) { \ + if (count == 1 || raw_prog.function_name == program_name) { \ + const auto prog_or_error = unmarshal(raw_prog); \ + const auto inst_seq = std::get_if(&prog_or_error); \ + REQUIRE(inst_seq != nullptr); \ + const Program prog = Program::from_sequence(*inst_seq, raw_prog.info, thread_local_options.cfg_opts); \ + REQUIRE(verify(prog) == should_pass); \ + } \ + } \ } while (0) // Verify a section with only one program in it. @@ -606,7 +604,7 @@ TEST_SECTION_LEGACY_FAIL("cilium", "bpf_lxc.o", "2/10") TEST_SECTION_FAIL("cilium", "bpf_lxc.o", "2/11") TEST_SECTION_FAIL("cilium", "bpf_lxc.o", "2/12") -void test_analyze_thread(const cfg_t* cfg, program_info* info, bool* res) { +void test_analyze_thread(const Program* cfg, program_info* info, bool* res) { thread_local_program_info.set(*info); *res = verify(*cfg); } @@ -617,21 +615,21 @@ TEST_CASE("multithreading", "[verify][multithreading]") { REQUIRE(raw_progs1.size() == 1); raw_program raw_prog1 = raw_progs1.back(); auto prog_or_error1 = unmarshal(raw_prog1); - auto prog1 = std::get_if(&prog_or_error1); - REQUIRE(prog1 != nullptr); - const cfg_t cfg1 = prepare_cfg(*prog1, raw_prog1.info, {}); + auto inst_seq1 = std::get_if(&prog_or_error1); + REQUIRE(inst_seq1 != nullptr); + const Program prog1 = Program::from_sequence(*inst_seq1, raw_prog1.info, {}); auto raw_progs2 = read_elf("ebpf-samples/bpf_cilium_test/bpf_netdev.o", "2/2", {}, &g_ebpf_platform_linux); REQUIRE(raw_progs2.size() == 1); raw_program raw_prog2 = raw_progs2.back(); auto prog_or_error2 = unmarshal(raw_prog2); - auto prog2 = std::get_if(&prog_or_error2); - REQUIRE(prog2 != nullptr); - const cfg_t cfg2 = prepare_cfg(*prog2, raw_prog2.info, {}); + auto inst_seq2 = std::get_if(&prog_or_error2); + REQUIRE(inst_seq2 != nullptr); + const Program prog2 = Program::from_sequence(*inst_seq2, raw_prog2.info, {}); bool res1, res2; - std::thread a(test_analyze_thread, &cfg1, &raw_prog1.info, &res1); - std::thread b(test_analyze_thread, &cfg2, &raw_prog2.info, &res2); + std::thread a(test_analyze_thread, &prog1, &raw_prog1.info, &res1); + std::thread b(test_analyze_thread, &prog2, &raw_prog2.info, &res2); a.join(); b.join(); diff --git a/src/test/test_wto.cpp b/src/test/test_wto.cpp index 8406482f..e2200027 100644 --- a/src/test/test_wto.cpp +++ b/src/test/test_wto.cpp @@ -6,11 +6,12 @@ #include "crab/wto.hpp" using crab::label_t; +using crab::wto_t; TEST_CASE("wto figure 1", "[wto]") { // Construct the example graph in figure 1 of Bourdoncle, // "Efficient chaotic iteration strategies with widenings", 1993. - const wto_t wto(crab::cfg_from_adjacency_list({{crab::label_t::entry, {label_t{1}}}, + const wto_t wto(crab::cfg_from_adjacency_list({{label_t::entry, {label_t{1}}}, {label_t{1}, {label_t{2}}}, {label_t{2}, {label_t{3}}}, {label_t{3}, {label_t{4}}}, @@ -28,7 +29,7 @@ TEST_CASE("wto figure 1", "[wto]") { TEST_CASE("wto figure 2a", "[wto]") { // Construct the example graph in figure 2a of Bourdoncle, // "Efficient chaotic iteration strategies with widenings", 1993. - const wto_t wto(crab::cfg_from_adjacency_list({{crab::label_t::entry, {label_t{1}}}, + const wto_t wto(crab::cfg_from_adjacency_list({{label_t::entry, {label_t{1}}}, {label_t{1}, {label_t{2}, label_t{4}}}, {label_t{2}, {label_t{3}}}, {label_t{3}, {label_t::exit}}, @@ -43,7 +44,7 @@ TEST_CASE("wto figure 2a", "[wto]") { TEST_CASE("wto figure 2b", "[wto]") { // Construct the example graph in figure 2b of Bourdoncle, // "Efficient chaotic iteration strategies with widenings", 1993. - const wto_t wto(crab::cfg_from_adjacency_list({{crab::label_t::entry, {label_t{1}}}, + const wto_t wto(crab::cfg_from_adjacency_list({{label_t::entry, {label_t{1}}}, {label_t{1}, {label_t{2}, label_t{4}}}, {label_t{2}, {label_t{3}}}, {label_t{3}, {label_t{1}, label_t::exit}},