From d533293f942647039cb91ab45969f5e2bd0999ae Mon Sep 17 00:00:00 2001 From: Elazar Gershuni Date: Sun, 1 Dec 2024 23:36:24 +0200 Subject: [PATCH 1/2] reduce coupling between control flow logic and instructions Signed-off-by: Elazar Gershuni --- src/asm_cfg.cpp | 209 ++++++++++++------ src/asm_files.cpp | 7 +- src/asm_ostream.cpp | 173 +++++++-------- src/asm_parse.cpp | 2 +- src/asm_syntax.hpp | 4 +- src/asm_unmarshal.cpp | 5 +- src/assertions.cpp | 16 +- src/config.hpp | 7 +- src/crab/cfg.hpp | 413 +++++++++-------------------------- src/crab/fwd_analyzer.cpp | 10 +- src/crab/split_dbm.cpp | 6 +- src/crab/thresholds.cpp | 16 +- src/crab/thresholds.hpp | 2 +- src/crab/var_factory.cpp | 3 +- src/crab/wto.cpp | 11 +- src/crab/wto.hpp | 2 +- src/crab_utils/graph_ops.hpp | 4 +- src/crab_verifier.cpp | 12 +- src/crab_verifier.hpp | 20 +- src/main/check.cpp | 6 +- src/string_constraints.hpp | 2 +- src/test/ebpf_yaml.cpp | 1 + src/test/test_verify.cpp | 5 +- src/test/test_wto.cpp | 85 ++----- 24 files changed, 406 insertions(+), 615 deletions(-) diff --git a/src/asm_cfg.cpp b/src/asm_cfg.cpp index 337003323..d2cdd9129 100644 --- a/src/asm_cfg.cpp +++ b/src/asm_cfg.cpp @@ -8,6 +8,7 @@ #include #include "asm_syntax.hpp" +#include "config.hpp" #include "crab/cfg.hpp" #include "crab/wto.hpp" @@ -17,6 +18,69 @@ using std::string; using std::to_string; using std::vector; +namespace crab { +struct cfg_builder_t final { + cfg_t cfg; + + // TODO: ins should be inserted elsewhere + void insert_after(const label_t& prev_label, const label_t& new_label, const GuardedInstruction& ins) { + if (prev_label == new_label) { + CRAB_ERROR("Cannot insert after the same label ", to_string(new_label)); + } + std::set prev_children; + std::swap(prev_children, cfg.get_node(prev_label).children); + + for (const label_t& next_label : prev_children) { + cfg.get_node(next_label).parents.erase(prev_label); + } + + insert(new_label, ins); + for (const label_t& next_label : prev_children) { + add_child(prev_label, new_label); + add_child(new_label, next_label); + } + } + + // TODO: ins should be inserted elsewhere + void insert(const label_t& _label, const GuardedInstruction& ins) { + const auto it = cfg.neighbours.find(_label); + if (it != cfg.neighbours.end()) { + CRAB_ERROR("Label ", to_string(_label), " already exists"); + } + cfg.neighbours.emplace(_label, cfg_t::adjacent_t{}); + cfg.instructions.emplace(_label, ins); + } + + // TODO: ins should be inserted elsewhere + label_t insert_jump(const label_t& from, const label_t& to, const GuardedInstruction& ins) { + const label_t jump_label = label_t::make_jump(from, to); + if (cfg.contains(jump_label)) { + CRAB_ERROR("Jump label ", to_string(jump_label), " already exists"); + } + insert(jump_label, ins); + add_child(from, jump_label); + add_child(jump_label, to); + return jump_label; + } + + void add_child(const label_t& a, const label_t& b) { + assert(b != label_t::entry); + assert(a != label_t::exit); + cfg.neighbours.at(a).children.insert(b); + cfg.neighbours.at(b).parents.insert(a); + } + + void remove_child(const label_t& a, const label_t& b) { + cfg.get_node(a).children.erase(b); + cfg.get_node(b).parents.erase(a); + } +}; +} // namespace crab + +using crab::basic_block_t; +using crab::cfg_builder_t; +using crab::cfg_t; + /// Get the inverse of a given comparison operation. static Condition::Op reverse(const Condition::Op op) { switch (op) { @@ -62,18 +126,17 @@ static bool has_fall(const Instruction& ins) { } /// Update a control-flow graph to inline function macros. -static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t& entry_label) { +static void add_cfg_nodes(cfg_builder_t& builder, const label_t& caller_label, const label_t& entry_label) { bool first = true; // Get the label of the node to go to on returning from the macro. - crab::value_t& exit_to_node = cfg.get_node(cfg.next_nodes(caller_label).front()); + label_t exit_to_label = builder.cfg.get_child(caller_label); // Construct the variable prefix to use for the new stack frame, // and store a copy in the CallLocal instruction since the instruction-specific // labels may only exist until the CFG is simplified. - crab::value_t& caller_node = cfg.get_node(caller_label); const std::string stack_frame_prefix = to_string(caller_label); - if (const auto pcall = std::get_if(&caller_node.instruction().cmd)) { + if (const auto pcall = std::get_if(&builder.cfg.instruction_at(caller_label))) { pcall->stack_frame_prefix = stack_frame_prefix; } @@ -91,36 +154,36 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t // Clone the macro block into a new block with the new stack frame prefix. const label_t label{macro_label.from, macro_label.to, stack_frame_prefix}; - auto inst = cfg.at(macro_label); - if (const auto pexit = std::get_if(&inst.cmd)) { + auto inst = builder.cfg.instruction_at(macro_label); + if (const auto pexit = std::get_if(&inst)) { pexit->stack_frame_prefix = label.stack_frame_prefix; - } else if (const auto pcall = std::get_if(&inst.cmd)) { + } else if (const auto pcall = std::get_if(&inst)) { pcall->stack_frame_prefix = label.stack_frame_prefix; } - crab::value_t& bb = cfg.insert(label, inst.cmd); + builder.insert(label, {.cmd = inst}); if (first) { // Add an edge from the caller to the new block. first = false; - caller_node >> bb; + builder.add_child(caller_label, label); } // Add an edge from any other predecessors. - for (const auto& prev_macro_nodes = cfg.prev_nodes(macro_label); + for (const auto& prev_macro_nodes = builder.cfg.parents_of(macro_label); const auto& prev_macro_label : prev_macro_nodes) { const label_t prev_label(prev_macro_label.from, prev_macro_label.to, to_string(caller_label)); - if (const auto& labels = cfg.labels(); std::ranges::find(labels, prev_label) != labels.end()) { - cfg.get_node(prev_label) >> bb; + if (const auto& labels = builder.cfg.labels(); std::ranges::find(labels, prev_label) != labels.end()) { + builder.add_child(prev_label, label); } } // Walk all successor nodes. - for (const auto& next_macro_nodes = cfg.next_nodes(macro_label); + for (const auto& next_macro_nodes = builder.cfg.children_of(macro_label); const auto& next_macro_label : next_macro_nodes) { - if (next_macro_label == cfg.exit_label()) { + if (next_macro_label == builder.cfg.exit_label()) { // This is an exit transition, so add edge to the block to execute // upon returning from the macro. - bb >> exit_to_node; + builder.add_child(label, exit_to_label); } else if (!seen_labels.contains(next_macro_label)) { // Push any other unprocessed successor label onto the list to be processed. if (!macro_labels.contains(next_macro_label)) { @@ -133,39 +196,39 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t // Remove the original edge from the caller node to its successor, // since processing now goes through the function macro instead. - caller_node -= exit_to_node; + builder.remove_child(caller_label, exit_to_label); // Finally, recurse to replace any nested function macros. string caller_label_str = to_string(caller_label); const long stack_frame_depth = std::ranges::count(caller_label_str, STACK_FRAME_DELIMITER) + 2; for (const auto& macro_label : seen_labels) { - const label_t label(macro_label.from, macro_label.to, caller_label_str); - if (const auto pins = std::get_if(&cfg.at(label).cmd)) { + const label_t label{macro_label.from, macro_label.to, caller_label_str}; + if (const auto pins = std::get_if(&builder.cfg.instruction_at(label))) { if (stack_frame_depth >= MAX_CALL_STACK_FRAMES) { throw crab::InvalidControlFlow{"too many call stack frames"}; } - add_cfg_nodes(cfg, label, pins->target); + add_cfg_nodes(builder, label, pins->target); } } } /// Convert an instruction sequence to a control-flow graph (CFG). -static cfg_t instruction_seq_to_cfg(const InstructionSeq& insts, const bool must_have_exit) { - cfg_t cfg; +static cfg_builder_t instruction_seq_to_cfg(const InstructionSeq& insts, const bool must_have_exit) { + cfg_builder_t builder; // First add all instructions to the CFG without connecting for (const auto& [label, inst, _] : insts) { if (std::holds_alternative(inst)) { continue; } - cfg.insert(label, inst); + builder.insert(label, GuardedInstruction{.cmd = inst}); } if (insts.size() == 0) { throw crab::InvalidControlFlow{"empty instruction sequence"}; } else { const auto& [label, inst, _0] = insts[0]; - cfg.get_node(cfg.entry_label()) >> cfg.get_node(label); + builder.add_child(builder.cfg.entry_label(), label); } // Do a first pass ignoring all function macro calls. @@ -175,9 +238,8 @@ static cfg_t instruction_seq_to_cfg(const InstructionSeq& insts, const bool must if (std::holds_alternative(inst)) { continue; } - auto& value = cfg.get_node(label); - label_t fallthrough{cfg.exit_label()}; + label_t fallthrough{builder.cfg.exit_label()}; if (i + 1 < insts.size()) { fallthrough = std::get<0>(insts[i + 1]); } else { @@ -189,30 +251,26 @@ static cfg_t instruction_seq_to_cfg(const InstructionSeq& insts, const bool must if (const auto cond = jmp->cond) { label_t target_label = jmp->target; if (target_label == fallthrough) { - value >> cfg.get_node(fallthrough); + builder.add_child(label, fallthrough); continue; } - - vector> jumps{ - {target_label, *cond}, - {fallthrough, reverse(*cond)}, - }; - for (const auto& [next_label, cond1] : jumps) { - label_t jump_label = label_t::make_jump(label, next_label); - crab::value_t& jump_node = cfg.insert(jump_label, Assume{.cond = cond1, .is_explicit = false}); - value >> jump_node; - jump_node >> cfg.get_node(next_label); + if (!builder.cfg.contains(target_label)) { + throw crab::InvalidControlFlow{"jump to undefined label " + to_string(target_label)}; } + builder.insert_jump(label, target_label, + GuardedInstruction{.cmd = Assume{.cond = *cond, .is_implicit = true}}); + builder.insert_jump(label, fallthrough, + GuardedInstruction{.cmd = Assume{.cond = reverse(*cond), .is_implicit = true}}); } else { - value >> cfg.get_node(jmp->target); + builder.add_child(label, jmp->target); } } else { if (has_fall(inst)) { - value >> cfg.get_node(fallthrough); + builder.add_child(label, fallthrough); } } if (std::holds_alternative(inst)) { - value >> cfg.get_node(cfg.exit_label()); + builder.add_child(label, builder.cfg.exit_label()); } } @@ -221,33 +279,35 @@ static cfg_t instruction_seq_to_cfg(const InstructionSeq& insts, const bool must // results of the first pass. for (const auto& [label, inst, _] : insts) { if (const auto pins = std::get_if(&inst)) { - add_cfg_nodes(cfg, label, pins->target); + add_cfg_nodes(builder, label, pins->target); } } - return cfg; + return builder; } cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const prepare_cfg_options& options) { thread_local_program_info.set(info); // Convert the instruction sequence to a deterministic control-flow graph. - cfg_t cfg = instruction_seq_to_cfg(prog, options.must_have_exit); + cfg_builder_t builder = instruction_seq_to_cfg(prog, options.must_have_exit); // Detect loops using Weak Topological Ordering (WTO) and insert counters at loop entry points. WTO provides a // hierarchical decomposition of the CFG that identifies all strongly connected components (cycles) and their entry // points. These entry points serve as natural locations for loop counters that help verify program termination. if (options.check_for_termination) { - const wto_t wto{cfg}; + const wto_t wto{builder.cfg}; wto.for_each_loop_head([&](const label_t& label) -> void { - cfg.insert_after(label, label_t::make_increment_counter(label), IncrementLoopCounter{label}); + builder.insert_after(label, label_t::make_increment_counter(label), + GuardedInstruction{.cmd = IncrementLoopCounter{label}}); }); } // Annotate the CFG by adding in assertions before every memory instruction. - explicate_assertions(cfg, info); - - return cfg; + for (auto& [label, ins] : builder.cfg.instructions) { + builder.cfg.set_assertions(label, get_assertions(ins.cmd, info, label)); + } + return builder.cfg; } std::set basic_block_t::collect_basic_blocks(const cfg_t& cfg, const bool simplify) { @@ -262,26 +322,27 @@ std::set basic_block_t::collect_basic_blocks(const cfg_t& cfg, co } std::set res; - std::set worklist(cfg.label_begin(), cfg.label_end()); + std::set worklist; + for (const label_t& label : cfg.labels()) { + worklist.insert(label); + } std::set seen; while (!worklist.empty()) { - const label_t label = *worklist.begin(); + label_t label = *worklist.begin(); worklist.erase(label); if (seen.contains(label)) { continue; } seen.insert(label); - const value_t* current_value = &cfg.get_node(label); - if (current_value->in_degree() == 1 && cfg.get_parent(label).out_degree() == 1) { + if (cfg.in_degree(label) == 1 && cfg.num_siblings(label) == 1) { continue; } basic_block_t bb{label}; - while (current_value->out_degree() == 1) { - const value_t& next_value = cfg.get_child(bb.last_label()); - const label_t& next_label = next_value.label(); + while (cfg.out_degree(label) == 1) { + const label_t& next_label = cfg.get_child(bb.last_label()); - if (seen.contains(next_label) || next_label == cfg.exit_label() || next_value.in_degree() != 1) { + if (seen.contains(next_label) || next_label == cfg.exit_label() || cfg.in_degree(next_label) != 1) { break; } @@ -294,20 +355,13 @@ std::set basic_block_t::collect_basic_blocks(const cfg_t& cfg, co worklist.erase(next_label); seen.insert(next_label); - current_value = &next_value; + label = next_label; } res.emplace(std::move(bb)); } return res; } -template -static vector unique(const std::pair& be) { - vector res; - std::unique_copy(be.first, be.second, std::back_inserter(res)); - return res; -} - /// Get the type of given Instruction. /// Most of these type names are also statistics header labels. static std::string instype(Instruction ins) { @@ -351,7 +405,7 @@ static std::string instype(Instruction ins) { std::vector stats_headers() { return { - "basic_blocks", "joins", "other", "jumps", "assign", "arith", + "instructions", "joins", "other", "jumps", "assign", "arith", "load", "store", "load_store", "packet_access", "call_1", "call_mem", "call_nomem", "reallocate", "map_in_map", "arith64", "arith32", }; @@ -362,10 +416,9 @@ std::map collect_stats(const cfg_t& cfg) { for (const auto& h : stats_headers()) { res[h] = 0; } - for (const auto& this_label : cfg.labels()) { - res["basic_blocks"]++; - const crab::value_t& value = cfg.get_node(this_label); - const auto cmd = value.instruction().cmd; + for (const auto& label : cfg.labels()) { + res["instructions"]++; + const auto cmd = cfg.instruction_at(label); if (const auto pins = std::get_if(&cmd)) { if (pins->mapfd == -1) { res["map_in_map"] = 1; @@ -380,12 +433,28 @@ std::map collect_stats(const cfg_t& cfg) { res[pins->is64 ? "arith64" : "arith32"]++; } res[instype(cmd)]++; - if (unique(value.prev_labels()).size() > 1) { + if (cfg.in_degree(label) > 1) { res["joins"]++; } - if (unique(value.prev_labels()).size() > 1) { + if (cfg.out_degree(label) > 1) { res["jumps"]++; } } return res; } + +cfg_t crab::cfg_from_adjacency_list(const std::map>& adj_list) { + cfg_builder_t builder; + for (const auto& [label, children] : adj_list) { + if (label == label_t::entry || label == label_t::exit) { + continue; + } + builder.insert(label, {.cmd = Undefined{}}); + } + for (const auto& [label, children] : adj_list) { + for (const auto& child : children) { + builder.add_child(label, child); + } + } + return builder.cfg; +} diff --git a/src/asm_files.cpp b/src/asm_files.cpp index 68fc98805..551a88817 100644 --- a/src/asm_files.cpp +++ b/src/asm_files.cpp @@ -205,8 +205,7 @@ struct function_relocation { string target_function_name; }; -static raw_program* find_subprogram(vector& programs, - const ELFIO::section& subprogram_section, +static raw_program* find_subprogram(vector& programs, const ELFIO::section& subprogram_section, const std::string& symbol_name) { // Find subprogram by name. for (auto& subprog : programs) { @@ -219,8 +218,8 @@ static raw_program* find_subprogram(vector& programs, // Returns an error message, or empty string on success. static std::string append_subprograms(raw_program& prog, vector& programs, - const vector& function_relocations, const ELFIO::elfio& reader, - const ELFIO::const_symbol_section_accessor& symbols) { + const vector& function_relocations, + const ELFIO::elfio& reader, const ELFIO::const_symbol_section_accessor& symbols) { if (prog.resolved_subprograms) { // We've already appended any relevant subprograms. return {}; diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index e0f588a61..fadddec26 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -23,23 +23,6 @@ using std::optional; using std::string; using std::vector; -struct LineInfoPrinter { - std::ostream& os; - std::string previous_source_line; - - void print_line_info(const label_t& label) { - if (thread_local_options.verbosity_opts.print_line_info) { - const auto& line_info_map = thread_local_program_info.get().line_info; - const auto& line_info = line_info_map.find(label.from); - // Print line info only once. - if (line_info != line_info_map.end() && line_info->second.source_line != previous_source_line) { - os << "\n" << line_info->second << "\n"; - previous_source_line = line_info->second.source_line; - } - } - } -}; - namespace crab { std::string number_t::to_string() const { return _n.str(); } @@ -76,52 +59,29 @@ string to_string(label_t const& label) { return str.str(); } -void print_dot(const cfg_t& cfg, std::ostream& out) { - out << "digraph program {\n"; - out << " node [shape = rectangle];\n"; - for (const auto& label : cfg.labels()) { - out << " \"" << label << "\"[xlabel=\"" << label << "\",label=\""; +} // namespace crab - const auto& value = cfg.get_node(label); - const auto& ins = value.instruction(); - for (const auto& pre : ins.preconditions) { - out << "assert " << pre << "\\l"; - } - out << ins.cmd << "\\l"; +struct LineInfoPrinter { + std::ostream& os; + std::string previous_source_line; - out << "\"];\n"; - for (const label_t& next : value.next_labels_set()) { - out << " \"" << label << "\" -> \"" << next << "\";\n"; + void print_line_info(const label_t& label) { + if (thread_local_options.verbosity_opts.print_line_info) { + const auto& line_info_map = thread_local_program_info.get().line_info; + const auto& line_info = line_info_map.find(label.from); + // Print line info only once. + if (line_info != line_info_map.end() && line_info->second.source_line != previous_source_line) { + os << "\n" << line_info->second << "\n"; + previous_source_line = line_info->second.source_line; + } } - out << "\n"; - } - out << "}\n"; -} - -void print_dot(const cfg_t& cfg, const std::string& outfile) { - std::ofstream out{outfile}; - if (out.fail()) { - throw std::runtime_error(std::string("Could not open file ") + outfile); - } - print_dot(cfg, out); -} - -void print_label(std::ostream& o, const value_t& value) { o << value.label() << ":\n"; } - -void print_assertions(std::ostream& o, const value_t& value) { - for (const auto& pre : value.instruction().preconditions) { - o << " " - << "assert " << pre << ";\n"; } -} - -void print_instruction(std::ostream& o, const value_t& value) { o << " " << value.instruction().cmd << ";\n"; } +}; -void print_goto(std::ostream& o, const value_t& value) { - auto [it, et] = value.next_labels(); +void print_jump(std::ostream& o, const std::string& direction, const std::set& labels) { + auto [it, et] = std::pair{labels.begin(), labels.end()}; if (it != et) { - o << " " - << "goto "; + o << " " << direction << " "; while (it != et) { o << *it; ++it; @@ -135,49 +95,59 @@ void print_goto(std::ostream& o, const value_t& value) { o << "\n"; } -void print_from(std::ostream& o, const value_t& value) { - auto [it, et] = value.prev_labels(); - if (it != et) { - o << " " - << "from "; - while (it != et) { - o << *it; - ++it; - if (it == et) { - o << ";"; - } else { - o << ","; +void print_cfg(const crab::cfg_t& cfg, std::ostream& os, const bool simplify, const printfunc& prefunc, + const printfunc& postfunc) { + LineInfoPrinter printer{os}; + for (const crab::basic_block_t& bb : crab::basic_block_t::collect_basic_blocks(cfg, simplify)) { + prefunc(os, bb.first_label()); + print_jump(os, "from", cfg.parents_of(bb.first_label())); + os << bb.first_label() << ":\n"; + for (const label_t& label : bb) { + printer.print_line_info(label); + for (const auto& pre : cfg.assertions_at(label)) { + os << " " << "assert " << pre << ";\n"; } + os << " " << cfg.instruction_at(label) << ";\n"; } + print_jump(os, "goto", cfg.children_of(bb.last_label())); + postfunc(os, bb.last_label()); } - o << "\n"; + os << "\n"; } -static void print_cfg(std::ostream& os, const cfg_t& cfg, const bool simplify, const invariant_table_t* invariants) { - LineInfoPrinter printer{os}; - for (const auto& bb : basic_block_t::collect_basic_blocks(cfg, simplify)) { - if (invariants) { - os << "\nPre-invariant : " << invariants->at(bb.first_label()).pre << "\n"; - } - const value_t& first_node = cfg.get_node(bb.first_label()); - print_from(os, first_node); - print_label(os, first_node); - for (const label_t& label : bb) { - printer.print_line_info(label); - const value_t& node = cfg.get_node(label); - print_assertions(os, node); - print_instruction(os, node); +static void nop(std::ostream&, const label_t&) {} + +void print_cfg(const crab::cfg_t& cfg, std::ostream& os, const bool simplify) { + print_cfg(cfg, os, simplify, nop, nop); +} + +void print_dot(const crab::cfg_t& cfg, std::ostream& out) { + out << "digraph program {\n"; + out << " node [shape = rectangle];\n"; + for (const auto& label : cfg.labels()) { + out << " \"" << label << "\"[xlabel=\"" << label << "\",label=\""; + + for (const auto& pre : cfg.assertions_at(label)) { + out << "assert " << pre << "\\l"; } - print_goto(os, cfg.get_node(bb.last_label())); - if (invariants) { - os << "\nPost-invariant: " << invariants->at(bb.last_label()).post << "\n"; + out << cfg.instruction_at(label) << "\\l"; + + out << "\"];\n"; + for (const label_t& next : cfg.children_of(label)) { + out << " \"" << label << "\" -> \"" << next << "\";\n"; } + out << "\n"; } - os << "\n"; + out << "}\n"; } -void print_cfg(std::ostream& os, const cfg_t& cfg, const bool simplify) { print_cfg(os, cfg, simplify, nullptr); } -} // namespace crab +void print_dot(const crab::cfg_t& cfg, const std::string& outfile) { + std::ofstream out{outfile}; + if (out.fail()) { + throw std::runtime_error(std::string("Could not open file ") + outfile); + } + print_dot(cfg, out); +} void print_reachability(std::ostream& os, const Report& report) { for (const auto& [label, notes] : report.reachability) { @@ -204,8 +174,15 @@ void print_all_messages(std::ostream& os, const Report& report) { print_warnings(os, report); } -void print_invariants(std::ostream& os, const cfg_t& cfg, const bool simplify, const Invariants& invariants) { - print_cfg(os, cfg, simplify, &invariants.invariants); +void print_invariants(std::ostream& os, const crab::cfg_t& cfg, const bool simplify, const Invariants& invariants) { + print_cfg( + cfg, os, simplify, + [&](std::ostream& os, const label_t& label) -> void { + os << "\nPre-invariant : " << invariants.invariants.at(label).pre << "\n"; + }, + [&](std::ostream& os, const label_t& label) -> void { + os << "\nPost-invariant : " << invariants.invariants.at(label).post << "\n"; + }); } namespace asm_syntax { @@ -631,13 +608,9 @@ void print(const InstructionSeq& insts, std::ostream& out, const std::optional& descriptors, std::ostream& o) { @@ -652,4 +625,4 @@ std::ostream& operator<<(std::ostream& os, const btf_line_info_t& line_info) { os << "; " << line_info.file_name << ":" << line_info.line_number << "\n"; os << "; " << line_info.source_line << "\n"; return os; -} +} \ No newline at end of file diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 50c99efdc..31ec6b9a9 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -212,7 +212,7 @@ Instruction parse_instruction(const std::string& line, const std::map #include "asm_unmarshal.hpp" +#include "crab_utils/debug.hpp" #include "crab_utils/num_safety.hpp" #include "ebpf_vm_isa.hpp" @@ -416,8 +417,8 @@ struct Unmarshaller { } [[nodiscard]] - auto makeLddw(const ebpf_inst inst, const int32_t next_imm, const vector& insts, const pc_t pc) const - -> Instruction { + auto makeLddw(const ebpf_inst inst, const int32_t next_imm, const vector& insts, + const pc_t pc) const -> Instruction { if (!info.platform->supports_group(bpf_conformance_groups_t::base64)) { throw InvalidInstruction{pc, inst.opcode}; } diff --git a/src/assertions.cpp b/src/assertions.cpp index e7f769d35..3b82b2929 100644 --- a/src/assertions.cpp +++ b/src/assertions.cpp @@ -1,7 +1,7 @@ // Copyright (c) Prevail Verifier contributors. // SPDX-License-Identifier: MIT +#include #include - #include #include @@ -177,7 +177,7 @@ class AssertExtractor { } vector operator()(const Assume& ins) const { - if (ins.is_explicit) { + if (!ins.is_implicit) { return explicate(ins.cond); } return {}; @@ -297,19 +297,11 @@ class AssertExtractor { } }; -vector get_assertions(Instruction ins, const program_info& info, const std::optional& label) { - return std::visit(AssertExtractor{info, label}, ins); -} - /// Annotate the CFG by adding explicit assertions for all the preconditions /// of any instruction. For example, jump instructions are asserted not to /// compare numbers and pointers, or pointers to potentially distinct memory /// regions. The verifier will use these assertions to treat the program as /// unsafe unless it can prove that the assertions can never fail. -void explicate_assertions(cfg_t& cfg, const program_info& info) { - for (auto& [label, value] : cfg) { - (void)label; // unused - auto& ins = value.instruction(); - ins.preconditions = get_assertions(ins.cmd, info, value.label()); - } +vector get_assertions(Instruction ins, const program_info& info, const std::optional& label) { + return std::visit(AssertExtractor{info, label}, ins); } diff --git a/src/config.hpp b/src/config.hpp index c46629e03..cbf727363 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -2,7 +2,12 @@ // SPDX-License-Identifier: MIT #pragma once -#include "crab/cfg.hpp" +struct prepare_cfg_options { + /// When true, verifies that the program terminates. + bool check_for_termination = false; + /// When true, ensures the program has a valid exit block. + bool must_have_exit = true; +}; struct verbosity_options_t { /// When true, prints simplified control flow graph by merging chains into basic blocks. diff --git a/src/crab/cfg.hpp b/src/crab/cfg.hpp index 8072bcf0e..583bfe9ba 100644 --- a/src/crab/cfg.hpp +++ b/src/crab/cfg.hpp @@ -3,376 +3,172 @@ #pragma once /* - * Build a CFG to interface with the abstract domains and fixpoint iterators. + * a CFG to interface with the fixpoint iterators. */ +#include #include #include +#include #include #include #include -#include -#include -#include - +#include "config.hpp" +// TODO: avoid including asm_syntax.hpp here #include "asm_syntax.hpp" +#include "crab/label.hpp" #include "crab_utils/debug.hpp" -#include "crab_utils/num_big.hpp" -#include "spec_type_descriptors.hpp" namespace crab { -class InvalidControlFlow final : public std::runtime_error { - public: - explicit InvalidControlFlow(const std::string& what) : std::runtime_error(what) {} -}; - -class cfg_t; - -// Node type for the CFG -class value_t final { - friend class cfg_t; - - public: - value_t(const value_t&) = delete; - - using label_vec_t = std::set; - using neighbour_const_iterator = label_vec_t::const_iterator; - using neighbour_const_reverse_iterator = label_vec_t::const_reverse_iterator; - - private: - label_t m_label; - GuardedInstruction m_instruction{.cmd = Undefined{}}; - label_vec_t m_prev, m_next; - - public: - explicit value_t(label_t _label) : m_label{std::move(_label)} {} - - ~value_t() = default; - - [[nodiscard]] - label_t label() const { - return m_label; - } - - [[nodiscard]] - GuardedInstruction& instruction() { - return m_instruction; - } - - [[nodiscard]] - const GuardedInstruction& instruction() const { - return m_instruction; - } - - [[nodiscard]] - std::pair next_labels() const { - return std::make_pair(m_next.begin(), m_next.end()); - } - [[nodiscard]] - std::pair next_labels_reversed() const { - return std::make_pair(m_next.rbegin(), m_next.rend()); - } - - [[nodiscard]] - std::pair prev_labels() const { - return std::make_pair(m_prev.begin(), m_prev.end()); - } - - [[nodiscard]] - const label_vec_t& next_labels_set() const { - return m_next; - } - - [[nodiscard]] - const label_vec_t& prev_labels_set() const { - return m_prev; - } - - // Add a cfg_t edge from *this to b - void operator>>(value_t& b) { - assert(b.label() != label_t::entry); - assert(this->label() != label_t::exit); - m_next.insert(b.m_label); - b.m_prev.insert(m_label); - } - - // Remove a cfg_t edge from *this to b - void operator-=(value_t& b) { - m_next.erase(b.m_label); - b.m_prev.erase(m_label); - } - - [[nodiscard]] - size_t in_degree() const { - return m_prev.size(); - } - - [[nodiscard]] - size_t out_degree() const { - return m_next.size(); - } -}; - -void print_label(std::ostream& o, const value_t& value); -void print_assertions(std::ostream& o, const value_t& value); -void print_instruction(std::ostream& o, const value_t& value); -void print_goto(std::ostream& o, const value_t& value); -void print_from(std::ostream& o, const value_t& value); - /// Control-Flow Graph class cfg_t final { - public: - using node_t = label_t; // for Bgl graphs + friend struct cfg_builder_t; - using neighbour_const_iterator = value_t::neighbour_const_iterator; - using neighbour_const_reverse_iterator = value_t::neighbour_const_reverse_iterator; + // the choice to use set means that unmarshaling a conditional jump to the same target may be different + using label_vec_t = std::set; - using neighbour_const_range = boost::iterator_range; - using neighbour_const_reverse_range = boost::iterator_range; + struct adjacent_t final { + label_vec_t parents; + label_vec_t children; - private: - using map_t = std::map; - using binding_t = map_t::value_type; + [[nodiscard]] + size_t in_degree() const { + return parents.size(); + } - struct get_label { - label_t operator()(const binding_t& p) const { return p.second.label(); } + [[nodiscard]] + size_t out_degree() const { + return children.size(); + } }; - public: - using iterator = map_t::iterator; - using const_iterator = map_t::const_iterator; - using label_iterator = boost::transform_iterator; - using const_label_iterator = boost::transform_iterator; - - private: - map_t m_map; - - using visited_t = std::set; - - public: - cfg_t() { - m_map.emplace(entry_label(), entry_label()); - m_map.emplace(exit_label(), exit_label()); - } - - cfg_t(const cfg_t&) = delete; - - cfg_t(cfg_t&& o) noexcept : m_map(std::move(o.m_map)) {} - - ~cfg_t() = default; - - [[nodiscard]] - label_t exit_label() const { - return label_t::exit; - } - - // --- Begin ikos fixpoint API - - [[nodiscard]] - label_t entry_label() const { - return label_t::entry; - } + std::map neighbours{{label_t::entry, adjacent_t{}}, {label_t::exit, adjacent_t{}}}; + // Helpers [[nodiscard]] - neighbour_const_range next_nodes(const label_t& _label) const { - return boost::make_iterator_range(get_node(_label).next_labels()); + bool has_one_child(const label_t& label) const { + return out_degree(label) == 1; } [[nodiscard]] - neighbour_const_reverse_range next_nodes_reversed(const label_t& _label) const { - return boost::make_iterator_range(get_node(_label).next_labels_reversed()); + bool has_one_parent(const label_t& label) const { + return in_degree(label) == 1; } [[nodiscard]] - neighbour_const_range prev_nodes(const label_t& _label) const { - return boost::make_iterator_range(get_node(_label).prev_labels()); - } - - value_t& get_node(const label_t& _label) { - const auto it = m_map.find(_label); - if (it == m_map.end()) { + adjacent_t& get_node(const label_t& _label) { + const auto it = neighbours.find(_label); + if (it == neighbours.end()) { CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); } return it->second; } - const value_t& get_node(const label_t& _label) const { - const auto it = m_map.find(_label); - if (it == m_map.end()) { + [[nodiscard]] + const adjacent_t& get_node(const label_t& _label) const { + const auto it = neighbours.find(_label); + if (it == neighbours.end()) { CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); } return it->second; } - GuardedInstruction& at(const label_t& _label) { - const auto it = m_map.find(_label); - if (it == m_map.end()) { - CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); + public: + // TODO: temporarily here. Move to a separate class containing both cfg and instructions + std::map instructions{{label_t::entry, {.cmd = Undefined{}}}, + {label_t::exit, {.cmd = Undefined{}}}}; + const Instruction& instruction_at(const label_t& label) const { + if (!instructions.contains(label)) { + CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); } - return it->second.instruction(); + return instructions.at(label).cmd; } - - [[nodiscard]] - const GuardedInstruction& at(const label_t& _label) const { - const auto it = m_map.find(_label); - if (it == m_map.end()) { - CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); + Instruction& instruction_at(const label_t& label) { + if (!instructions.contains(label)) { + CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); } - return it->second.instruction(); + return instructions.at(label).cmd; } - // --- End ikos fixpoint API - - value_t& insert_after(const label_t& prev_label, const label_t& new_label, const Instruction& _ins) { - value_t& res = insert(new_label, GuardedInstruction{.cmd = _ins}); - value_t& prev = get_node(prev_label); - std::vector nexts; - for (const label_t& next : prev.next_labels_set()) { - nexts.push_back(next); + void set_assertions(const label_t& label, const std::vector& assertions) { + if (!instructions.contains(label)) { + CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); } - prev.m_next.clear(); - - std::vector prevs; - for (const label_t& next_label : nexts) { - get_node(next_label).m_prev.erase(prev_label); - } - - for (const label_t& next : nexts) { - get_node(prev_label) >> res; - res >> get_node(next); - } - return res; + instructions.at(label).preconditions = assertions; } - - value_t& insert(const label_t& _label, const Instruction& _ins) { - return insert(_label, GuardedInstruction{.cmd = _ins}); - } - - value_t& insert(const label_t& _label, GuardedInstruction&& _ins) { - const auto it = m_map.find(_label); - if (it != m_map.end()) { - return it->second; + const auto assertions_at(const label_t& label) const { + if (!instructions.contains(label)) { + CRAB_ERROR("Label ", to_string(label), " not found in the CFG: "); } - - m_map.emplace(_label, _label); - value_t& v = get_node(_label); - v.m_instruction = std::move(_ins); - return v; + return instructions.at(label).preconditions; } - void remove(const label_t& _label) { - if (_label == entry_label()) { - CRAB_ERROR("Cannot remove entry block"); - } - - if (_label == exit_label()) { - CRAB_ERROR("Cannot remove exit block"); - } - - std::vector> dead_edges; - auto& bb = get_node(_label); - - for (const auto& id : boost::make_iterator_range(bb.prev_labels())) { - if (_label != id) { - dead_edges.emplace_back(&get_node(id), &bb); - } - } - - for (const auto& id : boost::make_iterator_range(bb.next_labels())) { - if (_label != id) { - dead_edges.emplace_back(&bb, &get_node(id)); - } - } - - for (const auto& p : dead_edges) { - *p.first -= *p.second; - } - - m_map.erase(_label); + [[nodiscard]] + label_t exit_label() const { + return label_t::exit; } - //! return a begin iterator of basic_block_t's - iterator begin() { return m_map.begin(); } - - //! return an end iterator of basic_block_t's - iterator end() { return m_map.end(); } - [[nodiscard]] - const_iterator begin() const { - return m_map.begin(); + label_t entry_label() const { + return label_t::entry; } [[nodiscard]] - const_iterator end() const { - return m_map.end(); + const label_vec_t& children_of(const label_t& _label) const { + return get_node(_label).children; } - //! return a begin iterator of label_t's - const_label_iterator label_begin() const { return boost::make_transform_iterator(m_map.begin(), get_label()); } - - //! return an end iterator of label_t's - const_label_iterator label_end() const { return boost::make_transform_iterator(m_map.end(), get_label()); } - - //! return a begin iterator of label_t's [[nodiscard]] - std::vector labels() const { - std::vector res; - res.reserve(m_map.size()); - for (const auto& p : m_map) { - res.push_back(p.first); - } - return res; + const label_vec_t& parents_of(const label_t& _label) const { + return get_node(_label).parents; } + //! return a view of the labels, including entry and exit [[nodiscard]] - size_t size() const { - return gsl::narrow(std::distance(begin(), end())); + auto labels() const { + return std::views::keys(neighbours); } [[nodiscard]] - std::vector sorted_labels() const { - std::vector labels = this->labels(); - std::ranges::sort(labels); - return labels; + size_t size() const { + return neighbours.size(); } - value_t& get_child(const label_t& b) { - assert(has_one_child(b)); - const auto rng = next_nodes(b); - return get_node(*rng.begin()); + [[nodiscard]] + label_t get_child(const label_t& label) const { + if (!has_one_child(label)) { + CRAB_ERROR("Label ", to_string(label), " does not have a single child"); + } + return *get_node(label).children.begin(); } - value_t& get_parent(const label_t& b) { - assert(has_one_parent(b)); - const auto rng = prev_nodes(b); - return get_node(*rng.begin()); + [[nodiscard]] + label_t get_parent(const label_t& label) const { + if (!has_one_parent(label)) { + CRAB_ERROR("Label ", to_string(label), " does not have a single parent"); + } + return *get_node(label).parents.begin(); } - const value_t& get_child(const label_t& b) const { - assert(has_one_child(b)); - const auto rng = next_nodes(b); - return get_node(*rng.begin()); + [[nodiscard]] + bool contains(const label_t& label) const { + return neighbours.contains(label); } - const value_t& get_parent(const label_t& b) const { - assert(has_one_parent(b)); - const auto rng = prev_nodes(b); - return get_node(*rng.begin()); + [[nodiscard]] + int num_siblings(const label_t& label) const { + return get_node(get_parent(label)).out_degree(); } - private: - // Helpers [[nodiscard]] - bool has_one_child(const label_t& b) const { - const auto rng = next_nodes(b); - return std::distance(rng.begin(), rng.end()) == 1; + int in_degree(const label_t& label) const { + return get_node(label).in_degree(); } [[nodiscard]] - bool has_one_parent(const label_t& b) const { - const auto rng = prev_nodes(b); - return std::distance(rng.begin(), rng.end()) == 1; + int out_degree(const label_t& label) const { + return get_node(label).out_degree(); } }; @@ -416,28 +212,23 @@ class basic_block_t final { } }; -void print_dot(const cfg_t& cfg, std::ostream& out); -void print_dot(const cfg_t& cfg, const std::string& outfile); +cfg_t cfg_from_adjacency_list(const std::map>& adj_list); -void print_cfg(std::ostream& os, const cfg_t& cfg, bool simplify); +class InvalidControlFlow final : public std::runtime_error { + public: + explicit InvalidControlFlow(const std::string& what) : std::runtime_error(what) {} +}; } // end namespace crab -using crab::basic_block_t; -using crab::cfg_t; +std::vector get_assertions(Instruction ins, const program_info& info, const std::optional& label); +crab::cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const prepare_cfg_options& options); std::vector stats_headers(); +std::map collect_stats(const crab::cfg_t& cfg); -std::map collect_stats(const cfg_t&); - -struct prepare_cfg_options { - /// When true, verifies that the program terminates. - bool check_for_termination = false; - /// When true, ensures the program has a valid exit block. - bool must_have_exit = true; -}; - -cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const prepare_cfg_options& options); - -void explicate_assertions(cfg_t& cfg, const program_info& info); -std::vector get_assertions(Instruction ins, const program_info& info, const std::optional& label); +using printfunc = std::function; +void print_cfg(const crab::cfg_t& cfg, std::ostream& os, const bool simplify, const printfunc& prefunc, + const printfunc& postfunc); +void print_cfg(const crab::cfg_t& cfg, std::ostream& os, const bool simplify); +void print_dot(const crab::cfg_t& cfg, const std::string& outfile); diff --git a/src/crab/fwd_analyzer.cpp b/src/crab/fwd_analyzer.cpp index 72fbbb42c..b650c7d86 100644 --- a/src/crab/fwd_analyzer.cpp +++ b/src/crab/fwd_analyzer.cpp @@ -35,15 +35,13 @@ class interleaved_fwd_fixpoint_iterator_t final { ebpf_domain_t get_post(const label_t& node) const { return _inv.at(node).post; } void transform_to_post(const label_t& label, ebpf_domain_t pre) { - const GuardedInstruction& ins = _cfg.at(label); - if (thread_local_options.assume_assertions) { - for (const auto& assertion : ins.preconditions) { + for (const auto& assertion : _cfg.assertions_at(label)) { // avoid redundant errors ebpf_domain_assume(pre, assertion); } } - ebpf_domain_transform(pre, ins.cmd); + ebpf_domain_transform(pre, _cfg.instruction_at(label)); _inv.at(label).post = std::move(pre); } @@ -53,7 +51,7 @@ class interleaved_fwd_fixpoint_iterator_t final { return get_pre(node); } ebpf_domain_t res = ebpf_domain_t::bottom(); - for (const label_t& prev : _cfg.prev_nodes(node)) { + for (const label_t& prev : _cfg.parents_of(node)) { res |= get_post(prev); } return res; @@ -145,7 +143,7 @@ void interleaved_fwd_fixpoint_iterator_t::operator()(const std::shared_ptr cycle_nesting)) { invariant |= get_post(prev); } diff --git a/src/crab/split_dbm.cpp b/src/crab/split_dbm.cpp index db0d52e0c..29d61a222 100644 --- a/src/crab/split_dbm.cpp +++ b/src/crab/split_dbm.cpp @@ -785,8 +785,7 @@ bool SplitDBM::add_constraint(const linear_constraint_t& cst) { case constraint_kind_t::EQUALS_ZERO: { const linear_expression_t& exp = cst.expression(); if (!add_linear_leq(exp) || !add_linear_leq(exp.negate())) { - CRAB_LOG("zones-split", std::cout << " ~~> _|_" - << "\n"); + CRAB_LOG("zones-split", std::cout << " ~~> _|_" << "\n"); return false; } // g.check_adjs(); @@ -1222,7 +1221,8 @@ string_invariant SplitDBM::to_set() const { std::ostream& operator<<(std::ostream& o, const SplitDBM& dom) { return o << dom.to_set(); } bool SplitDBM::eval_expression_overflow(const linear_expression_t& e, Weight& out) const { - [[maybe_unused]] const bool overflow = convert_NtoW_overflow(e.constant_term(), out); + [[maybe_unused]] + const bool overflow = convert_NtoW_overflow(e.constant_term(), out); assert(!overflow); for (const auto& [variable, coefficient] : e.variable_terms()) { Weight coef; diff --git a/src/crab/thresholds.cpp b/src/crab/thresholds.cpp index 5cf436d30..2cb88d659 100644 --- a/src/crab/thresholds.cpp +++ b/src/crab/thresholds.cpp @@ -49,7 +49,7 @@ std::ostream& operator<<(std::ostream& o, const thresholds_t& t) { return o; } -void wto_thresholds_t::get_thresholds(const value_t& bb, thresholds_t& thresholds) const {} +void wto_thresholds_t::get_thresholds(const label_t& label, thresholds_t& thresholds) const {} void wto_thresholds_t::operator()(const label_t& vertex) { if (m_stack.empty()) { @@ -60,8 +60,7 @@ void wto_thresholds_t::operator()(const label_t& vertex) { const auto it = m_head_to_thresholds.find(head); if (it != m_head_to_thresholds.end()) { thresholds_t& thresholds = it->second; - const value_t& bb = m_cfg.get_node(vertex); - get_thresholds(bb, thresholds); + get_thresholds(vertex, thresholds); } else { CRAB_ERROR("No head found while gathering thresholds"); } @@ -69,15 +68,14 @@ void wto_thresholds_t::operator()(const label_t& vertex) { void wto_thresholds_t::operator()(const std::shared_ptr& cycle) { thresholds_t thresholds(m_max_size); - const auto& bb = m_cfg.get_node(cycle->head()); - get_thresholds(bb, thresholds); + const auto& head = cycle->head(); + get_thresholds(head, thresholds); // XXX: if we want to consider constants from loop // initializations - for (const auto& pre : boost::make_iterator_range(bb.prev_labels())) { - if (pre != cycle->head()) { - auto& pred_bb = m_cfg.get_node(pre); - get_thresholds(pred_bb, thresholds); + for (const auto& pre : m_cfg.parents_of(head)) { + if (pre != head) { + get_thresholds(pre, thresholds); } } diff --git a/src/crab/thresholds.hpp b/src/crab/thresholds.hpp index 1e3d67635..364cf4f65 100644 --- a/src/crab/thresholds.hpp +++ b/src/crab/thresholds.hpp @@ -58,7 +58,7 @@ class wto_thresholds_t final { // the top of the stack is the current wto head std::vector m_stack; - void get_thresholds(const value_t& bb, thresholds_t& thresholds) const; + void get_thresholds(const label_t& label, thresholds_t& thresholds) const; public: wto_thresholds_t(cfg_t& cfg, const size_t max_size) : m_cfg(cfg), m_max_size(max_size) {} diff --git a/src/crab/var_factory.cpp b/src/crab/var_factory.cpp index 5497a384a..830634aa1 100644 --- a/src/crab/var_factory.cpp +++ b/src/crab/var_factory.cpp @@ -148,8 +148,7 @@ std::ostream& operator<<(std::ostream& o, const data_kind_t& s) { return o << na static std::string mk_scalar_name(const data_kind_t kind, const number_t& o, const number_t& size) { std::stringstream os; - os << "s" - << "[" << o; + os << "s" << "[" << o; if (size != 1) { os << "..." << o + size - 1; } diff --git a/src/crab/wto.cpp b/src/crab/wto.cpp index fdf87e4d3..c95b6fd3a 100644 --- a/src/crab/wto.cpp +++ b/src/crab/wto.cpp @@ -4,6 +4,8 @@ #include "wto.hpp" +using crab::cfg_t; + // This file contains an iterative implementation of the recursive algorithm in // Bourdoncle, "Efficient chaotic iteration strategies with widenings", 1993 // http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3574 @@ -57,7 +59,7 @@ struct visit_args_t { std::weak_ptr containing_cycle; visit_args_t(const visit_task_type_t t, label_t v, wto_partition_t& p, std::weak_ptr cc) - : type(t), vertex(std::move(v)), partition(p), containing_cycle(std::move(cc)) {}; + : type(t), vertex(std::move(v)), partition(p), containing_cycle(std::move(cc)){}; }; struct wto_vertex_data_t { @@ -94,7 +96,6 @@ class wto_builder_t final { // the algorithm of figure 4 in the paper, where this constructor matches // what is shown there as the Partition function. explicit wto_builder_t(const cfg_t& cfg); - friend std::ostream& operator<<(std::ostream& o, const wto_builder_t& wto); }; void wto_builder_t::push_successors(const label_t& vertex, wto_partition_t& partition, @@ -109,7 +110,7 @@ void wto_builder_t::push_successors(const label_t& vertex, wto_partition_t& part // Schedule the next task for this vertex once we're done with anything else. _visit_stack.emplace(visit_task_type_t::StartVisit, vertex, partition, containing_cycle); - for (const label_t& succ : _cfg.next_nodes_reversed(vertex)) { + for (const label_t& succ : std::ranges::reverse_view(_cfg.children_of(vertex))) { if (_vertex_data[succ].dfn == 0) { _visit_stack.emplace(visit_task_type_t::PushSuccessors, succ, partition, containing_cycle); } @@ -121,7 +122,7 @@ void wto_builder_t::start_visit(const label_t& vertex, wto_partition_t& partitio wto_vertex_data_t& vertex_data = _vertex_data[vertex]; int head_dfn = vertex_data.dfn; bool loop = false; - for (const label_t& succ : _cfg.next_nodes(vertex)) { + for (const label_t& succ : _cfg.children_of(vertex)) { const wto_vertex_data_t& data = _vertex_data[succ]; int min_dfn = data.dfn; if (data.head_dfn != 0 && data.dfn != DFN_INF) { @@ -157,7 +158,7 @@ void wto_builder_t::start_visit(const label_t& vertex, wto_partition_t& partitio // Walk the control flow graph, adding nodes to this cycle. // This is the Component() function described in figure 4 of the paper. - for (const label_t& succ : _cfg.next_nodes_reversed(vertex)) { + for (const label_t& succ : std::ranges::reverse_view(_cfg.children_of(vertex))) { if (_vertex_data.at(succ).dfn == 0) { _visit_stack.emplace(visit_task_type_t::PushSuccessors, succ, cycle->_components, cycle); } diff --git a/src/crab/wto.hpp b/src/crab/wto.hpp index f825ad97f..ebfe21175 100644 --- a/src/crab/wto.hpp +++ b/src/crab/wto.hpp @@ -114,7 +114,7 @@ class wto_t final { friend class wto_builder_t; public: - explicit wto_t(const cfg_t& cfg); + explicit wto_t(const crab::cfg_t& cfg); [[nodiscard]] wto_partition_t::const_reverse_iterator begin() const { diff --git a/src/crab_utils/graph_ops.hpp b/src/crab_utils/graph_ops.hpp index cdc6d5181..931a1694f 100644 --- a/src/crab_utils/graph_ops.hpp +++ b/src/crab_utils/graph_ops.hpp @@ -725,7 +725,6 @@ class GraphOps { } // Run Bellman-ford on each SCC. - // for(std::vector& scc : sccs) // Current implementation returns sccs in reverse topological order. for (std::vector& scc : sccs) { @@ -741,7 +740,8 @@ class GraphOps { ++qtail; } - for (vert_id _ : scc) { + for ([[maybe_unused]] + vert_id _ : scc) { while (qtail != qhead) { vert_id s = *--qtail; // If it _was_ on the queue, it must be in the SCC diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index 049c091c8..957030c67 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -5,11 +5,9 @@ * the verification process and returning the results. **/ -#include #include #include #include -#include #include "asm_files.hpp" #include "asm_syntax.hpp" @@ -19,6 +17,7 @@ #include "crab_verifier.hpp" #include "string_constraints.hpp" +using crab::cfg_t; using crab::ebpf_domain_t; using std::string; @@ -69,7 +68,7 @@ bool Invariants::verified(const cfg_t& cfg) const { if (inv_pair.pre.is_bottom()) { continue; } - for (const Assertion& assertion : cfg.at(label).preconditions) { + for (const Assertion& assertion : cfg.assertions_at(label)) { if (!ebpf_domain_check(inv_pair.pre, assertion).empty()) { return false; } @@ -84,16 +83,15 @@ Report Invariants::check_assertions(const cfg_t& cfg) const { if (inv_pair.pre.is_bottom()) { continue; } - const auto ins = cfg.at(label); - for (const Assertion& assertion : ins.preconditions) { + for (const Assertion& assertion : cfg.assertions_at(label)) { const auto warnings = ebpf_domain_check(inv_pair.pre, assertion); for (const auto& msg : warnings) { report.warnings[label].emplace_back(msg); } } - if (std::holds_alternative(ins.cmd)) { + if (const auto passume = std::get_if(&cfg.instruction_at(label))) { if (inv_pair.post.is_bottom()) { - const auto s = to_string(std::get(ins.cmd)); + const auto s = to_string(*passume); report.reachability[label].emplace_back("Code becomes unreachable (" + s + ")"); } } diff --git a/src/crab_verifier.hpp b/src/crab_verifier.hpp index b6d3c253a..ec8cc1274 100644 --- a/src/crab_verifier.hpp +++ b/src/crab_verifier.hpp @@ -28,8 +28,8 @@ class Report final { std::set reachability_set() const { std::set result; - for (const auto& [label, warnings] : reachability) { - for (const auto& msg : warnings) { + for (const auto& [label, reach_warning] : reachability) { + for (const auto& msg : reach_warning) { result.insert(to_string(label) + ": " + msg); } } @@ -38,8 +38,8 @@ class Report final { std::set warning_set() const { std::set result; - for (const auto& [label, warnings] : warnings) { - for (const auto& msg : warnings) { + for (const auto& [label, warning_vec] : warnings) { + for (const auto& msg : warning_vec) { result.insert(to_string(label) + ": " + msg); } } @@ -64,15 +64,15 @@ class Invariants final { crab::interval_t exit_value() const; int max_loop_count() const; - bool verified(const cfg_t& cfg) const; - Report check_assertions(const cfg_t& cfg) const; + bool verified(const crab::cfg_t& cfg) const; + Report check_assertions(const crab::cfg_t& cfg) const; - friend void print_invariants(std::ostream& os, const cfg_t& cfg, bool simplify, const Invariants& invariants); + friend void print_invariants(std::ostream& os, const crab::cfg_t& cfg, bool simplify, const Invariants& invariants); }; -Invariants analyze(const cfg_t& cfg); -Invariants analyze(const cfg_t& cfg, const string_invariant& entry_invariant); -inline bool verify(const cfg_t& cfg) { return analyze(cfg).verified(cfg); } +Invariants analyze(const crab::cfg_t& cfg); +Invariants analyze(const crab::cfg_t& cfg, const string_invariant& entry_invariant); +inline bool verify(const crab::cfg_t& cfg) { return analyze(cfg).verified(cfg); } int create_map_crab(const EbpfMapType& map_type, uint32_t key_size, uint32_t value_size, uint32_t max_entries, ebpf_verifier_options_t options); diff --git a/src/main/check.cpp b/src/main/check.cpp index 7503d9c49..abf460608 100644 --- a/src/main/check.cpp +++ b/src/main/check.cpp @@ -239,9 +239,9 @@ int main(int argc, char** argv) { // Convert the instruction sequence to a control-flow graph. try { const auto verbosity = ebpf_verifier_options.verbosity_opts; - const cfg_t cfg = prepare_cfg(prog, raw_prog.info, ebpf_verifier_options.cfg_opts); + const crab::cfg_t cfg = prepare_cfg(prog, raw_prog.info, ebpf_verifier_options.cfg_opts); if (domain == "cfg") { - print_cfg(std::cout, cfg, verbosity.simplify); + print_cfg(cfg, std::cout, verbosity.simplify); return 0; } const auto begin = std::chrono::steady_clock::now(); @@ -277,7 +277,7 @@ int main(int argc, char** argv) { return !res; } else if (domain == "stats") { // Convert the instruction sequence to a control-flow graph. - const cfg_t cfg = prepare_cfg(prog, raw_prog.info, ebpf_verifier_options.cfg_opts); + const crab::cfg_t cfg = prepare_cfg(prog, raw_prog.info, ebpf_verifier_options.cfg_opts); // Just print eBPF program stats. auto stats = collect_stats(cfg); diff --git a/src/string_constraints.hpp b/src/string_constraints.hpp index c3192081f..cdc37b271 100644 --- a/src/string_constraints.hpp +++ b/src/string_constraints.hpp @@ -16,7 +16,7 @@ struct string_invariant { string_invariant() = default; - explicit string_invariant(std::set inv) : maybe_inv(std::move(inv)) {}; + explicit string_invariant(std::set inv) : maybe_inv(std::move(inv)){}; string_invariant(const string_invariant& inv) = default; string_invariant& operator=(const string_invariant& inv) = default; diff --git a/src/test/ebpf_yaml.cpp b/src/test/ebpf_yaml.cpp index a822a6fef..cd0eb1e1e 100644 --- a/src/test/ebpf_yaml.cpp +++ b/src/test/ebpf_yaml.cpp @@ -17,6 +17,7 @@ #include "ebpf_yaml.hpp" #include "string_constraints.hpp" +using crab::cfg_t; using std::string; using std::vector; diff --git a/src/test/test_verify.cpp b/src/test/test_verify.cpp index cae8f3106..c606410e1 100644 --- a/src/test/test_verify.cpp +++ b/src/test/test_verify.cpp @@ -1,9 +1,12 @@ // Copyright (c) Prevail Verifier contributors. // SPDX-License-Identifier: MIT -#include "ebpf_verifier.hpp" #include #include +#include "ebpf_verifier.hpp" + +using crab::cfg_t; + #define FAIL_LOAD_ELF(dirname, filename, sectionname) \ TEST_CASE("Try loading nonexisting program: " dirname "/" filename, "[elf]") { \ try { \ diff --git a/src/test/test_wto.cpp b/src/test/test_wto.cpp index 091efebc3..8406482f8 100644 --- a/src/test/test_wto.cpp +++ b/src/test/test_wto.cpp @@ -1,35 +1,24 @@ // Copyright (c) Prevail Verifier contributors. // SPDX-License-Identifier: MIT +#include + #include "crab/cfg.hpp" #include "crab/wto.hpp" -#include -TEST_CASE("wto figure 1", "[wto]") { - cfg_t cfg; +using crab::label_t; +TEST_CASE("wto figure 1", "[wto]") { // Construct the example graph in figure 1 of Bourdoncle, // "Efficient chaotic iteration strategies with widenings", 1993. - - // Add nodes. - for (int i = 1; i <= 8; i++) { - cfg.insert(label_t{i}, Undefined{}); - } - - // Add edges. - cfg.get_node(label_t::entry) >> cfg.get_node(label_t{1}); - cfg.get_node(label_t{1}) >> cfg.get_node(label_t{2}); - cfg.get_node(label_t{2}) >> cfg.get_node(label_t{3}); - cfg.get_node(label_t{3}) >> cfg.get_node(label_t{4}); - cfg.get_node(label_t{4}) >> cfg.get_node(label_t{5}); - cfg.get_node(label_t{4}) >> cfg.get_node(label_t{7}); - cfg.get_node(label_t{5}) >> cfg.get_node(label_t{6}); - cfg.get_node(label_t{6}) >> cfg.get_node(label_t{5}); - cfg.get_node(label_t{6}) >> cfg.get_node(label_t{7}); - cfg.get_node(label_t{7}) >> cfg.get_node(label_t{3}); - cfg.get_node(label_t{7}) >> cfg.get_node(label_t{8}); - cfg.get_node(label_t{8}) >> cfg.get_node(label_t::exit); - - const wto_t wto(cfg); + const wto_t wto(crab::cfg_from_adjacency_list({{crab::label_t::entry, {label_t{1}}}, + {label_t{1}, {label_t{2}}}, + {label_t{2}, {label_t{3}}}, + {label_t{3}, {label_t{4}}}, + {label_t{4}, {label_t{5}, label_t{7}}}, + {label_t{5}, {label_t{6}}}, + {label_t{6}, {label_t{5}, label_t{7}}}, + {label_t{7}, {label_t{3}, label_t{8}}}, + {label_t{8}, {label_t::exit}}})); std::ostringstream os; os << wto; @@ -37,27 +26,14 @@ TEST_CASE("wto figure 1", "[wto]") { } TEST_CASE("wto figure 2a", "[wto]") { - cfg_t cfg; - // Construct the example graph in figure 2a of Bourdoncle, // "Efficient chaotic iteration strategies with widenings", 1993. - - // Add nodes. - for (int i = 1; i <= 5; i++) { - cfg.insert(label_t{i}, Undefined{}); - } - - // Add edges. - cfg.get_node(label_t::entry) >> cfg.get_node(label_t{1}); - cfg.get_node(label_t{1}) >> cfg.get_node(label_t{2}); - cfg.get_node(label_t{1}) >> cfg.get_node(label_t{4}); - cfg.get_node(label_t{2}) >> cfg.get_node(label_t{3}); - cfg.get_node(label_t{3}) >> cfg.get_node(label_t::exit); - cfg.get_node(label_t{4}) >> cfg.get_node(label_t{3}); - cfg.get_node(label_t{4}) >> cfg.get_node(label_t{5}); - cfg.get_node(label_t{5}) >> cfg.get_node(label_t{4}); - - const wto_t wto(cfg); + const wto_t wto(crab::cfg_from_adjacency_list({{crab::label_t::entry, {label_t{1}}}, + {label_t{1}, {label_t{2}, label_t{4}}}, + {label_t{2}, {label_t{3}}}, + {label_t{3}, {label_t::exit}}, + {label_t{4}, {label_t{3}, label_t{5}}}, + {label_t{5}, {label_t{4}}}})); std::ostringstream os; os << wto; @@ -65,26 +41,13 @@ TEST_CASE("wto figure 2a", "[wto]") { } TEST_CASE("wto figure 2b", "[wto]") { - cfg_t cfg; - // Construct the example graph in figure 2b of Bourdoncle, // "Efficient chaotic iteration strategies with widenings", 1993. - - // Add nodes. - for (int i = 1; i <= 4; i++) { - cfg.insert(label_t{i}, Undefined{}); - } - - // Add edges. - cfg.get_node(label_t::entry) >> cfg.get_node(label_t{1}); - cfg.get_node(label_t{1}) >> cfg.get_node(label_t{2}); - cfg.get_node(label_t{1}) >> cfg.get_node(label_t{4}); - cfg.get_node(label_t{2}) >> cfg.get_node(label_t{3}); - cfg.get_node(label_t{3}) >> cfg.get_node(label_t{1}); - cfg.get_node(label_t{3}) >> cfg.get_node(label_t::exit); - cfg.get_node(label_t{4}) >> cfg.get_node(label_t{3}); - - const wto_t wto(cfg); + const wto_t wto(crab::cfg_from_adjacency_list({{crab::label_t::entry, {label_t{1}}}, + {label_t{1}, {label_t{2}, label_t{4}}}, + {label_t{2}, {label_t{3}}}, + {label_t{3}, {label_t{1}, label_t::exit}}, + {label_t{4}, {label_t{3}}}})); std::ostringstream os; os << wto; From d3dec906f422bbf1c7002a94930ba4494bcf5dcf Mon Sep 17 00:00:00 2001 From: Elazar Gershuni Date: Mon, 2 Dec 2024 00:05:11 +0200 Subject: [PATCH 2/2] include cstring for memset Signed-off-by: Elazar Gershuni --- src/main/linux_verifier.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/linux_verifier.cpp b/src/main/linux_verifier.cpp index 8d43cbc34..b1807b287 100644 --- a/src/main/linux_verifier.cpp +++ b/src/main/linux_verifier.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "config.hpp" #include "linux_verifier.hpp" @@ -21,10 +22,10 @@ std::tuple bpf_verify_program(const EbpfProgramType& type, const s ebpf_verifier_options_t* options) { std::vector buf(options->verbosity_opts.print_failures ? 1000000 : 10); buf[0] = 0; - memset(buf.data(), '\0', buf.size()); + std::memset(buf.data(), '\0', buf.size()); union bpf_attr attr {}; - memset(&attr, '\0', sizeof(attr)); + std::memset(&attr, '\0', sizeof(attr)); attr.prog_type = gsl::narrow<__u32>(type.platform_specific_data); attr.insn_cnt = gsl::narrow<__u32>(raw_prog.size()); attr.insns = reinterpret_cast<__u64>(raw_prog.data());