Skip to content

Commit

Permalink
zero-extension and sign-extension
Browse files Browse the repository at this point in the history
Signed-off-by: Elazar Gershuni <[email protected]>
  • Loading branch information
elazarg committed Sep 22, 2023
1 parent 3300cea commit 8fe3744
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 17 deletions.
9 changes: 9 additions & 0 deletions src/asm_marshal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ static uint8_t imm(Un::Op op) {
case Op::LE32: return 32;
case Op::BE64:
case Op::LE64: return 64;
case Op::ZEXT32:
case Op::SEXT32: return 32;
}
assert(false);
return {};
Expand Down Expand Up @@ -138,6 +140,13 @@ struct MarshalVisitor {
.offset = 0,
.imm = imm(b.op),
}};
case Un::Op::ZEXT32:
case Un::Op::SEXT32: {
auto [lsh, rsh] = splitExt32(b);
auto lshift = this->operator()(lsh);
auto rshift = this->operator()(rsh);
return {lshift[0], rshift[0]};
}
}
assert(false);
return {};
Expand Down
2 changes: 2 additions & 0 deletions src/asm_ostream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ struct InstructionPrinterVisitor {
case Un::Op::LE32: os_ << "le32 "; break;
case Un::Op::LE64: os_ << "le64 "; break;
case Un::Op::NEG: os_ << "-"; break;
case Un::Op::ZEXT32: os_ << "zext32 "; break;
case Un::Op::SEXT32: os_ << "sext32 "; break;
}
os_ << b.dst;
}
Expand Down
20 changes: 15 additions & 5 deletions src/asm_parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ using crab::linear_expression_t;
#define OPASSIGN R"_(\s*(\S*)=\s*)_"
#define ASSIGN R"_(\s*=\s*)_"
#define LONGLONG R"_(\s*(ll|)\s*)_"
#define UNOP R"_((-|be16|be32|be64))_"
#define UNOP R"_((-|be16|be32|be64|zext32|sext32))_"

#define PLUSMINUS R"_((\s*[+-])\s*)_"
#define LPAREN R"_(\s*\(\s*)_"
Expand Down Expand Up @@ -68,6 +68,8 @@ static const std::map<std::string, Un::Op> str_to_unop = {
{"le32", Un::Op::LE32},
{"le64", Un::Op::LE64},
{"-", Un::Op::NEG},
{"zext32", Un::Op::ZEXT32},
{"sext32", Un::Op::SEXT32},
};

static const std::map<std::string, Condition::Op> str_to_cmpop = {
Expand Down Expand Up @@ -148,10 +150,18 @@ Instruction parse_instruction(const std::string& line, const std::map<std::strin
return Bin{.op = str_to_binop.at(m[2]), .dst = reg(r), .v = reg(m[3]), .is64 = r.at(0) != 'w', .lddw = false};
}
if (regex_match(text, m, regex(WREG ASSIGN UNOP WREG))) {
if (m[1] != m[3]) throw std::invalid_argument(std::string("Invalid unary operation: ") + text);
std::string r = m[1];
bool is64 = r.at(0) != 'w';
return Un{.op = str_to_unop.at(m[2]), .dst = reg(m[1]), .is64 = is64};
std::string lhs = m[1];
std::string rhs = m[3];
bool is64 = lhs.at(0) != 'w';
auto op = str_to_unop.at(m[2]);
if (op == Un::Op::ZEXT32 || op == Un::Op::SEXT32) {
if (lhs.at(0) != 'r' || rhs.at(0) != 'w' || reg(lhs) != reg(rhs)) {
throw std::invalid_argument("zext/sext operations must be between r and w versions of the same register: rX = zext32 wX.");
}
} else if (m[1] != m[3]) {
throw std::invalid_argument("Unary operations must operate on the same register.");
}
return Un{.op = op, .dst = reg(lhs), .is64 = is64};
}
if (regex_match(text, m, regex(WREG OPASSIGN IMM LONGLONG))) {
std::string r = m[1];
Expand Down
22 changes: 15 additions & 7 deletions src/asm_syntax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,20 +97,28 @@ struct Bin {
/// Unary operation.
struct Un {
enum class Op {
BE16, // dst = htobe16(dst)
BE32, // dst = htobe32(dst)
BE64, // dst = htobe64(dst)
LE16, // dst = htole16(dst)
LE32, // dst = htole32(dst)
LE64, // dst = htole64(dst)
NEG, // dst = -dst
BE16, /// dst = htobe16(dst)
BE32, /// dst = htobe32(dst)
BE64, /// dst = htobe64(dst)
LE16, /// dst = htole16(dst)
LE32, /// dst = htole32(dst)
LE64, /// dst = htole64(dst)
NEG, /// dst = -dst
ZEXT32, /// Zero extension: dst <<= 32; dst >>>= 32
SEXT32, /// Sign extension: dst <<= 32; dst >>= 32
};

Op op;
Reg dst;
bool is64{};
};

inline std::tuple<Bin, Bin> splitExt32(Un un) {
Bin::Op rshift_op = un.op == Un::Op::ZEXT32 ? Bin::Op::RSH : Bin::Op::ARSH;
return {Bin{.op = Bin::Op::LSH, .dst = un.dst, .v = Imm{.v = 32}, .is64 = true, .lddw = false},
Bin{.op = rshift_op, .dst = un.dst, .v = Imm{.v = 32}, .is64 = true, .lddw = false}};
}

/// This instruction is encoded similarly to LDDW.
/// See comment in makeLddw() at asm_unmarshal.cpp
struct LoadMapFd {
Expand Down
37 changes: 33 additions & 4 deletions src/asm_unmarshal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ static auto getMemWidth(uint8_t opcode) -> int {
// return {};
// }

static Instruction shift32(Reg dst, Bin::Op op) {
return (Instruction)Bin{.op=op,.dst = dst, .v = Imm{32}, .is64=true, .lddw=false};
}

struct Unmarshaller {
vector<vector<string>>& notes;
const program_info& info;
Expand Down Expand Up @@ -397,14 +401,14 @@ struct Unmarshaller {
for (size_t pc = 0; pc < insts.size();) {
ebpf_inst inst = insts[pc];
Instruction new_ins;
bool lddw = false;
bool skip_instruction = false;
bool fallthrough = true;
switch (inst.opcode & INST_CLS_MASK) {
case INST_CLS_LD:
if (inst.opcode == INST_OP_LDDW_IMM) {
int32_t next_imm = pc < insts.size() - 1 ? insts[pc + 1].imm : 0;
new_ins = makeLddw(inst, next_imm, insts, static_cast<pc_t>(pc));
lddw = true;
skip_instruction = true;
break;
}
// fallthrough
Expand All @@ -413,7 +417,32 @@ struct Unmarshaller {
case INST_CLS_STX: new_ins = makeMemOp(pc, inst); break;

case INST_CLS_ALU:
case INST_CLS_ALU64: new_ins = makeAluOp(pc, inst); break;
case INST_CLS_ALU64: {
new_ins = makeAluOp(pc, inst);

// Merge (rX <<= 32; rX >>>= 32) into rX = zext32 rX
// (rX <<= 32; rX >>= 32) into rX = sext32 rX
if (pc >= insts.size() - 1)
break;
ebpf_inst next = insts[pc + 1];
auto dst = Reg{inst.dst};

if (new_ins != shift32(dst, Bin::Op::LSH))
break;

if ((next.opcode & INST_CLS_MASK) != INST_CLS_ALU64)
break;
auto next_ins = makeAluOp(pc+1, next);
if (next_ins == shift32(dst, Bin::Op::RSH)) {
new_ins = Un{.op = Un::Op::ZEXT32, .dst = dst, .is64 = true};
skip_instruction = true;
} else if (next_ins == shift32(dst, Bin::Op::ARSH)) {
new_ins = Un{.op = Un::Op::SEXT32, .dst = dst, .is64 = true};
skip_instruction = true;
}

break;
}

case INST_CLS_JMP32:
case INST_CLS_JMP: {
Expand Down Expand Up @@ -454,7 +483,7 @@ struct Unmarshaller {

pc++;
note_next_pc();
if (lddw) {
if (skip_instruction) {
pc++;
note_next_pc();
}
Expand Down
1 change: 1 addition & 0 deletions src/assertions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ class AssertExtractor {

vector<Assert> operator()(Un ins) {
return {
// This is also true for ZEXT and SEXT
Assert{TypeConstraint{ins.dst, TypeGroup::number}}
};
}
Expand Down
14 changes: 14 additions & 0 deletions src/crab/ebpf_domain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1478,6 +1478,20 @@ void ebpf_domain_t::operator()(const Un& stmt) {
neg(dst.svalue, dst.uvalue, stmt.is64 ? 64 : 32);
havoc_offsets(stmt.dst);
break;
case Un::Op::ZEXT32:
case Un::Op::SEXT32: {
// keep relational information if zext32/sext32 is no-op
if (stmt.op == Un::Op::ZEXT32 && m_inv.eval_interval(dst.uvalue) <= interval_t::unsigned_int(false)) {
break;
}
if (stmt.op == Un::Op::SEXT32 && m_inv.eval_interval(dst.svalue) <= interval_t::signed_int(false)) {
break;
}
auto [lsh, rsh] = splitExt32(stmt);
(*this)(lsh);
(*this)(rsh);
break;
}
}
}

Expand Down
4 changes: 3 additions & 1 deletion src/test/test_marshal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@ TEST_CASE("disasm_marshal", "[disasm][marshal]") {
Un::Op::LE32,
Un::Op::LE64,
Un::Op::NEG,
Un::Op::ZEXT32,
Un::Op::SEXT32,
};
for (auto op : ops)
compare_marshal_unmarshal(Un{.op = op, .dst = Reg{1}});
compare_marshal_unmarshal(Un{.op = op, .dst = Reg{1}, .is64 = true});
}

SECTION("LoadMapFd") { compare_marshal_unmarshal(LoadMapFd{.dst = Reg{1}, .mapfd = 1}, true); }
Expand Down
1 change: 1 addition & 0 deletions src/test/test_yaml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ YAML_CASE("test-data/full64.yaml")
YAML_CASE("test-data/jump.yaml")
YAML_CASE("test-data/packet.yaml")
YAML_CASE("test-data/parse.yaml")
YAML_CASE("test-data/sext.yaml")
YAML_CASE("test-data/shift.yaml")
YAML_CASE("test-data/stack.yaml")
YAML_CASE("test-data/subtract.yaml")
Expand Down
72 changes: 72 additions & 0 deletions test-data/sext.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright (c) Prevail Verifier contributors.
# SPDX-License-Identifier: MIT

# Tests for signed/unsigned extension
---
test-case: small zext relational nop

pre: ["r1.type=number", "r1.uvalue=[0, 5]",
"r2.type=number", "r2.uvalue=[0, 5]",
"r1.uvalue=r2.uvalue"]

code:
<start>: |
r1 = zext32 w1 ; nop
post:
- r1.type=number
- r1.uvalue=[0, 5]
- r2.type=number
- r2.uvalue=[0, 5]
- r1.uvalue=r2.uvalue
---
test-case: small sext relational nop

pre: ["r1.type=number", "r1.svalue=[-5, 5]",
"r2.type=number", "r2.svalue=[-5, 5]",
"r1.svalue=r2.svalue"]

code:
<start>: |
r1 = sext32 w1 ; nop
post:
- r1.type=number
- r1.svalue=[-5, 5]
- r2.type=number
- r2.svalue=[-5, 5]
- r1.svalue=r2.svalue
---
test-case: large zext relational is two shifts

pre: ["r1.type=number", "r1.uvalue=[0, 4294967299]",
"r2.type=number", "r2.uvalue=[0, 4294967299]",
"r1.uvalue=r2.uvalue"]

code:
<start>: |
r1 = zext32 w1 ; nop
post:
- r1.type=number
- r1.svalue=[0, 4294967295]
- r1.svalue=r1.uvalue
- r1.uvalue=[0, 4294967295]
- r2.type=number
- r2.uvalue=[0, 4294967299]
---
test-case: large sext relational is two shifts

pre: ["r1.type=number", "r1.svalue=[-5, 4294967299]",
"r2.type=number", "r2.svalue=[-5, 4294967299]",
"r1.svalue=r2.svalue"]

code:
<start>: |
r1 = sext32 w1 ; nop
post:
- r1.type=number
- r1.svalue=[-2147483648, 2147483647]
- r2.type=number
- r2.svalue=[-5, 4294967299]

0 comments on commit 8fe3744

Please sign in to comment.