From 861930b9c4719a6cf38cfb09896f9753fc735224 Mon Sep 17 00:00:00 2001 From: Josh Watson Date: Fri, 26 Oct 2018 21:04:42 -0700 Subject: [PATCH 1/6] First pass cleaning up the API --- evm_cfg_builder/cfg.py | 304 ----------------------------- evm_cfg_builder/cfg/__init__.py | 188 ++++++++++++++++++ evm_cfg_builder/cfg/basic_block.py | 57 ++++++ evm_cfg_builder/cfg/function.py | 143 ++++++++++++++ evm_cfg_builder/cfg_builder.py | 52 ++--- evm_cfg_builder/evm_helpers.py | 30 --- 6 files changed, 404 insertions(+), 370 deletions(-) delete mode 100644 evm_cfg_builder/cfg.py create mode 100644 evm_cfg_builder/cfg/__init__.py create mode 100644 evm_cfg_builder/cfg/basic_block.py create mode 100644 evm_cfg_builder/cfg/function.py delete mode 100644 evm_cfg_builder/evm_helpers.py diff --git a/evm_cfg_builder/cfg.py b/evm_cfg_builder/cfg.py deleted file mode 100644 index c2ac7f7..0000000 --- a/evm_cfg_builder/cfg.py +++ /dev/null @@ -1,304 +0,0 @@ -from .evm_helpers import BASIC_BLOCK_END - -class BasicBlock(object): - - def __init__(self): - self.instructions = [] - # sons and fathers are dict - # The key is the function hash - # It allows to compute the VSA only - # On a specific function, to separate - # the merging - self.sons = {} - self.fathers = {} - - def add_instruction(self, instruction): - self.instructions += [instruction] - - def __repr__(self): - return ''.format(self.start.pc, self.end.pc) - - @property - def start(self): - return self.instructions[0] - - @property - def end(self): - return self.instructions[-1] - - - def add_son(self, son, key): - if not key in self.sons: - self.sons[key] = [] - if son not in self.sons[key]: - self.sons[key].append(son) - - def add_father(self, father, key): - if not key in self.fathers: - self.fathers[key] = [] - if father not in self.fathers: - self.fathers[key].append(father) - - def ends_with_jumpi(self): - return self.end.name == 'JUMPI' - - def ends_with_jump_or_jumpi(self): - return self.end.name in ['JUMP', 'JUMPI'] - - def true_branch(self, key): - assert(self.ends_with_jumpi()) - - sons = [bb for bb in self.sons[key] if bb.start.pc != (self.end.pc+1)] - assert(len(sons[key]) == 1) - return sons[key][1] - - def false_branch(self, key): - assert(self.ends_with_jumpi()) - - sons = [bb for bb in self.sons[key] if bb.start.pc == (self.end.pc+1)] - assert(len(sons) == 1) - return sons[key][0] - -class Function(object): - - def __init__(self, hash_id, start_addr, entry_basic_block): - self._hash_id = hash_id - self._start_addr = start_addr - self._entry = entry_basic_block - self._name = hex(hash_id) - self._basic_blocks = [] - self._attributes = [] - - @property - def hash_id(self): - return self._hash_id - - @property - def key(self): - return self.hash_id - - @property - def name(self): - return self._name - - @name.setter - def name(self, n): - self._name = n - - @property - def basic_blocks(self): - ''' - Returns - list(BasicBlock) - ''' - return self._basic_blocks - - @basic_blocks.setter - def basic_blocks(self, bbs): - self._basic_blocks = bbs - - @property - def entry(self): - return self._entry - - @property - def attributes(self): - """ - Returns - list(str) - """ - return self._attributes - - def add_attributes(self, attr): - if not attr in self.attributes: - self._attributes.append(attr) - - def check_payable(self): - entry = self.entry - if any(ins.name == 'CALLVALUE' for ins in entry.instructions): - return - self.add_attributes('payable') - - def check_view(self): - changing_state_ops = ['CREATE', - 'CALL', - 'CALLCODE', - 'DELEGATECALL', - 'SELFDESTRUCT', - 'SSTORE'] - - for bb in self.basic_blocks: - if any(ins.name in changing_state_ops for ins in bb.instructions): - return - - self.add_attributes('view') - - def check_pure(self): - state_ops = ['CREATE', - 'CALL', - 'CALLCODE', - 'DELEGATECALL', - 'SELFDESTRUCT', - 'SSTORE', - 'ADDRESS', - 'BALANCE', - 'ORIGIN', - 'CALLER', - 'CALLVALUE', - 'CALLDATALOAD', - 'CALLDATASIZE', - 'CALLDATACOPY' - 'CODESIZE', - 'CODECOPY', - 'EXTCODESIZE', - 'EXTCODECOPY', - 'RETURNDATASIZE', - 'RETURNDATACOPY', - 'BLOCKHASH', - 'COINBASE', - 'TIMESTAMP', - 'NUMBER', - 'DIFFICULTY', - 'GASLIMIT', - 'LOG0', 'LOG1', 'LOG2', 'LOG3', 'LOG4', - 'CREATE', - 'CALL', - 'CALLCODE', - 'DELEGATECALL', - 'STATICCALL', - 'SELFDESTRUCT', - 'SSTORE', - 'SLOAD'] - - for bb in self.basic_blocks: - if any(ins.name in state_ops for ins in bb.instructions): - return - - self.add_attributes('pure') - - def __str__(self): - attrs = '' - if self.attributes: - attrs = ", " + ",".join(self.attributes) - return '{}, {} #bbs {}'.format(self.name, len(self.basic_blocks), attrs) - - def output_to_dot(self, base_filename): - - with open('{}{}.dot'.format(base_filename, self.name), 'w') as f: - f.write('digraph{\n') - for basic_block in self.basic_blocks: - instructions = ['{}:{}'.format(hex(ins.pc), - str(ins)) for ins in basic_block.instructions] - instructions = '\n'.join(instructions) - - f.write('{}[label="{}"]\n'.format(basic_block.start.pc, instructions)) - - if self.key in basic_block.sons: - for son in basic_block.sons[self.key]: - f.write('{} -> {}\n'.format(basic_block.start.pc, son.start.pc)) - - elif basic_block.ends_with_jump_or_jumpi(): - print('Missing branches {}:{}'.format(self.name, - hex(basic_block.end.pc))) - - f.write('\n}') - -def compute_instructions(instructions): - ''' - Split instructions into BasicBlock - Update PC of instructions - Args: - list(Instruction) - Returns: - list(BasicBlocks) - ''' - bbs = [] - bb = BasicBlock() - - addr = 0 - for instruction in instructions: - instruction.pc = addr - addr += instruction.operand_size + 1 - # JUMPDEST can be preceded by a no-jump instruction - if instruction.name == 'JUMPDEST': - if bb.instructions: - bbs.append(bb) - - bb = BasicBlock() - bb.add_instruction(instruction) - else: - bb.add_instruction(instruction) - if instruction.name in BASIC_BLOCK_END: - bbs.append(bb) - bb = BasicBlock() - - return bbs - -def is_jump_to_function(block): - ''' - Heuristic: - Recent solc version add a first check if calldatasize <4 and jump in fallback - Args; - block (BasicBlock) - Returns: - (int): function hash, or None - ''' - - has_calldata_size = False - last_pushed_value = None - previous_last_pushed_value = None - for i in block.instructions: - if i.name == 'CALLDATASIZE': - has_calldata_size = True - - if i.name.startswith('PUSH'): - previous_last_pushed_value = last_pushed_value - last_pushed_value = i.operand - - if i.name == 'JUMPI' and has_calldata_size: - return last_pushed_value, -1 - - if i.name == 'JUMPI' and previous_last_pushed_value: - return last_pushed_value, previous_last_pushed_value - - return None, None - -def find_functions(block, basic_block_as_dict, is_entry_block=False): - - function_start, function_hash = is_jump_to_function(block) - - if(function_start): - # print('Function {} starts at {}'.format(hex(function_hash), hex(function_start))) -# function = {function_start: {'hash':function_hash, 'start_hex':hex(function_start)}} - function = Function(function_hash, function_start, basic_block_as_dict[function_start]) - - ret = {} - if block.ends_with_jumpi(): - false_branch = basic_block_as_dict[block.end.pc + 1] - ret = find_functions(false_branch, basic_block_as_dict) - -# false_branch = block.false_branch() - return [function] + ret - - elif is_entry_block: - if block.ends_with_jumpi(): - false_branch = basic_block_as_dict[block.end.pc + 1] - return find_functions(false_branch, basic_block_as_dict) -# else: - #print('Last {}'.format(hex(block.start.pc))) - return [] - - -def add_simple_edges(basic_blocks, basic_blocks_as_dict): - for bb in basic_blocks: - if bb.end.name == 'JUMPI': - dst = basic_blocks_as_dict[bb.end.pc + 1] - bb.add_son(dst) - dst.add_father(bb) - # A bb can be split in the middle if it has a JUMPDEST - # Because another edge can target the JUMPDEST - if bb.end.name not in BASIC_BLOCK_END: - dst = basic_blocks_as_dict[bb.end.pc + 1 + bb.end.operand_size] - assert dst.start.name == 'JUMPDEST' - bb.add_son(dst) - dst.add_father(bb) diff --git a/evm_cfg_builder/cfg/__init__.py b/evm_cfg_builder/cfg/__init__.py new file mode 100644 index 0000000..c042a47 --- /dev/null +++ b/evm_cfg_builder/cfg/__init__.py @@ -0,0 +1,188 @@ +import basic_block +import function + +import re +from pyevmasm import disassemble_all + +__all__ = ["basic_block", "function"] + +BASIC_BLOCK_END = [ + 'STOP', + 'SELFDESTRUCT', + 'RETURN', + 'REVERT', + 'INVALID', + 'SUICIDE', + 'JUMP', + 'JUMPI' +] + +class ImmutableDict(dict): + def __init__(self, map): + self.update(map) + self.update = self.__update + + def __setitem__(self, key, value): + raise KeyError('ImmutableDict is immutable.') + + def __update(self, new_dict): + raise NotImplementedError() + +class CFG(object): + def __init__(self, bytecode=None): + if bytecode is None: + self.__bytecode = bytes() + else: + self.__bytecode = bytecode + + self.__functions = dict() + self.__basic_blocks = dict() + self.__instructions = dict() + self.edges = dict() + + @property + def bytecode(self): + return self.__bytecode + + @bytecode.setter + def bytecode(self, bytecode): + self.clear() + self.__bytecode = bytecode + + def clear(self): + self.__functions = list() + self.__basic_blocks = dict() + self.__instructions = dict() + self.__edges = dict() + self.__bytecode = dict() + + def remove_metadata(self): + ''' + Init bytecode contains metadata that needs to be removed + see http://solidity.readthedocs.io/en/v0.4.24/metadata.html#encoding-of-the-metadata-hash-in-the-bytecode + ''' + self.bytecode = re.sub( + r'\xa1\x65\x62\x7a\x7a\x72\x30\x58\x20[\x00-\xff]{32}\x00\x29', + '', + self.bytecode + ) + + @property + def basic_blocks(self): + return ImmutableDict(self.__basic_blocks) + + @property + def functions(self): + return iter(self.__functions) + + @property + def instructions(self): + return ImmutableDict(self.__instructions) + + def compute_basic_blocks(self): + ''' + Split instructions into BasicBlock + Args: + self: CFG + Returns: + None + ''' + # Do nothing if basic_blocks already exist + if self.basic_blocks: + return + + bb = basic_block.BasicBlock() + + for instruction in disassemble_all(self.bytecode): + self.__instructions[instruction.pc] = instruction + + if instruction.name == 'JUMPDEST': + # JUMPDEST indicates a new BasicBlock. Set the end pc + # of the current block, and switch to a new one. + if bb.instructions: + self.__basic_blocks[bb.end.pc] = bb + + bb = basic_block.BasicBlock() + + self.__basic_blocks[instruction.pc] = bb + + bb.add_instruction(instruction) + + if bb.start.pc == instruction.pc: + self.__basic_blocks[instruction.pc] = bb + + if bb.end.name in BASIC_BLOCK_END: + self.__basic_blocks[bb.end.pc] = bb + bb = basic_block.BasicBlock() + + def compute_functions(self, block, is_entry_block=False): + + function_start, function_hash = is_jump_to_function(block) + + if(function_start): + new_function = function.Function( + function_hash, + function_start, + self.__basic_blocks[function_start] + ) + + self.__functions.append(new_function) + + if block.ends_with_jumpi(): + false_branch = self.__basic_blocks[block.end.pc + 1] + self.compute_functions(false_branch) + + return + + elif is_entry_block: + if block.ends_with_jumpi(): + false_branch = self.__basic_blocks[block.end.pc + 1] + self.compute_functions(false_branch) + + def add_function(self, func): + assert isinstance(func, function.Function) + self.__functions.append(func) + + def compute_simple_edges(self): + for bb in self.basic_blocks.items(): + if bb.end.name == 'JUMPI': + dst = self.__basic_blocks[bb.end.pc + 1] + bb.add_son(dst) + dst.add_father(bb) + + # A bb can be split in the middle if it has a JUMPDEST + # Because another edge can target the JUMPDEST + if bb.end.name not in BASIC_BLOCK_END: + dst = self.__basic_blocks[bb.end.pc + 1 + bb.end.operand_size] + assert dst.start.name == 'JUMPDEST' + bb.add_son(dst) + dst.add_father(bb) + +def is_jump_to_function(block): + ''' + Heuristic: + Recent solc version add a first check if calldatasize <4 and jump in fallback + Args; + block (BasicBlock) + Returns: + (int): function hash, or None + ''' + + has_calldata_size = False + last_pushed_value = None + previous_last_pushed_value = None + for i in block.instructions: + if i.name == 'CALLDATASIZE': + has_calldata_size = True + + if i.name.startswith('PUSH'): + previous_last_pushed_value = last_pushed_value + last_pushed_value = i.operand + + if i.name == 'JUMPI' and has_calldata_size: + return last_pushed_value, -1 + + if i.name == 'JUMPI' and previous_last_pushed_value: + return last_pushed_value, previous_last_pushed_value + + return None, None \ No newline at end of file diff --git a/evm_cfg_builder/cfg/basic_block.py b/evm_cfg_builder/cfg/basic_block.py new file mode 100644 index 0000000..b6ca890 --- /dev/null +++ b/evm_cfg_builder/cfg/basic_block.py @@ -0,0 +1,57 @@ +class BasicBlock(object): + + def __init__(self): + self.instructions = [] + # sons and fathers are dict + # The key is the function hash + # It allows to compute the VSA only + # On a specific function, to separate + # the merging + self.sons = {} + self.fathers = {} + + def add_instruction(self, instruction): + self.instructions += [instruction] + + def __repr__(self): + return ''.format(self.start.pc, self.end.pc) + + @property + def start(self): + return self.instructions[0] + + @property + def end(self): + return self.instructions[-1] + + def add_son(self, son, key): + if not key in self.sons: + self.sons[key] = [] + if son not in self.sons[key]: + self.sons[key].append(son) + + def add_father(self, father, key): + if not key in self.fathers: + self.fathers[key] = [] + if father not in self.fathers: + self.fathers[key].append(father) + + def ends_with_jumpi(self): + return self.end.name == 'JUMPI' + + def ends_with_jump_or_jumpi(self): + return self.end.name in ['JUMP', 'JUMPI'] + + def true_branch(self, key): + assert(self.ends_with_jumpi()) + + sons = [bb for bb in self.sons[key] if bb.start.pc != (self.end.pc+1)] + assert(len(sons[key]) == 1) + return sons[key][1] + + def false_branch(self, key): + assert(self.ends_with_jumpi()) + + sons = [bb for bb in self.sons[key] if bb.start.pc == (self.end.pc+1)] + assert(len(sons) == 1) + return sons[key][0] \ No newline at end of file diff --git a/evm_cfg_builder/cfg/function.py b/evm_cfg_builder/cfg/function.py new file mode 100644 index 0000000..95116d0 --- /dev/null +++ b/evm_cfg_builder/cfg/function.py @@ -0,0 +1,143 @@ +class Function(object): + + def __init__(self, hash_id, start_addr, entry_basic_block): + self._hash_id = hash_id + self._start_addr = start_addr + self._entry = entry_basic_block + self._name = hex(hash_id) + self._basic_blocks = [] + self._attributes = [] + + @property + def hash_id(self): + return self._hash_id + + @property + def key(self): + return self.hash_id + + @property + def name(self): + return self._name + + @name.setter + def name(self, n): + self._name = n + + @property + def basic_blocks(self): + ''' + Returns + list(BasicBlock) + ''' + return self._basic_blocks + + @basic_blocks.setter + def basic_blocks(self, bbs): + self._basic_blocks = bbs + + @property + def entry(self): + return self._entry + + @property + def attributes(self): + """ + Returns + list(str) + """ + return self._attributes + + def add_attributes(self, attr): + if not attr in self.attributes: + self._attributes.append(attr) + + def check_payable(self): + entry = self.entry + if any(ins.name == 'CALLVALUE' for ins in entry.instructions): + return + self.add_attributes('payable') + + def check_view(self): + changing_state_ops = ['CREATE', + 'CALL', + 'CALLCODE', + 'DELEGATECALL', + 'SELFDESTRUCT', + 'SSTORE'] + + for bb in self.basic_blocks: + if any(ins.name in changing_state_ops for ins in bb.instructions): + return + + self.add_attributes('view') + + def check_pure(self): + state_ops = ['CREATE', + 'CALL', + 'CALLCODE', + 'DELEGATECALL', + 'SELFDESTRUCT', + 'SSTORE', + 'ADDRESS', + 'BALANCE', + 'ORIGIN', + 'CALLER', + 'CALLVALUE', + 'CALLDATALOAD', + 'CALLDATASIZE', + 'CALLDATACOPY' + 'CODESIZE', + 'CODECOPY', + 'EXTCODESIZE', + 'EXTCODECOPY', + 'RETURNDATASIZE', + 'RETURNDATACOPY', + 'BLOCKHASH', + 'COINBASE', + 'TIMESTAMP', + 'NUMBER', + 'DIFFICULTY', + 'GASLIMIT', + 'LOG0', 'LOG1', 'LOG2', 'LOG3', 'LOG4', + 'CREATE', + 'CALL', + 'CALLCODE', + 'DELEGATECALL', + 'STATICCALL', + 'SELFDESTRUCT', + 'SSTORE', + 'SLOAD'] + + for bb in self.basic_blocks: + if any(ins.name in state_ops for ins in bb.instructions): + return + + self.add_attributes('pure') + + def __str__(self): + attrs = '' + if self.attributes: + attrs = ", " + ",".join(self.attributes) + return '{}, {} #bbs {}'.format(self.name, len(self.basic_blocks), attrs) + + def output_to_dot(self, base_filename): + + with open('{}{}.dot'.format(base_filename, self.name), 'w') as f: + f.write('digraph{\n') + for basic_block in self.basic_blocks: + instructions = ['{}:{}'.format(hex(ins.pc), + str(ins)) for ins in basic_block.instructions] + instructions = '\n'.join(instructions) + + f.write('{}[label="{}"]\n'.format(basic_block.start.pc, instructions)) + + if self.key in basic_block.sons: + for son in basic_block.sons[self.key]: + f.write('{} -> {}\n'.format(basic_block.start.pc, son.start.pc)) + + elif basic_block.ends_with_jump_or_jumpi(): + print('Missing branches {}:{}'.format(self.name, + hex(basic_block.end.pc))) + + f.write('\n}') \ No newline at end of file diff --git a/evm_cfg_builder/cfg_builder.py b/evm_cfg_builder/cfg_builder.py index 6b0d343..7168436 100644 --- a/evm_cfg_builder/cfg_builder.py +++ b/evm_cfg_builder/cfg_builder.py @@ -4,8 +4,8 @@ from pyevmasm import disassemble_all -from .cfg import compute_instructions, find_functions, Function -from .evm_helpers import create_dicts_from_basic_blocks +from cfg import CFG +from cfg.function import Function from .known_hashes import known_hashes from .value_set_analysis import StackValueAnalysis @@ -13,30 +13,10 @@ FALLBACK_ID = -1 -def remove_metadata(bytecode): - ''' - Init bytecode contains metadata that needs to be removed - see http://solidity.readthedocs.io/en/v0.4.24/metadata.html#encoding-of-the-metadata-hash-in-the-bytecode - ''' - return re.sub( - r'\xa1\x65\x62\x7a\x7a\x72\x30\x58\x20[\x00-\xff]{32}\x00\x29', - '', - bytecode - ) +def get_info(cfg): + cfg.add_function(Function(DISPATCHER_ID, 0, cfg.basic_blocks[0])) - -def get_info(bytecode): - instructions = disassemble_all(bytecode) - basic_blocks = compute_instructions(instructions) - basic_blocks_as_dict, nodes_as_dict = create_dicts_from_basic_blocks( - basic_blocks - ) - - functions = find_functions(basic_blocks[0], basic_blocks_as_dict, True) - dispatcher = Function(-2, 0, basic_blocks_as_dict[0]) - functions = [dispatcher] + functions - - for function in functions: + for function in cfg.functions: if function.hash_id == FALLBACK_ID: function.name = '_fallback' elif function.hash_id == DISPATCHER_ID: @@ -45,23 +25,21 @@ def get_info(bytecode): if function.hash_id in known_hashes: function.name = known_hashes[function.hash_id] - for function in functions: + for function in cfg.functions: vsa = StackValueAnalysis( function.entry, - basic_blocks_as_dict, - nodes_as_dict, + cfg.basic_blocks, + cfg.instructions, function.hash_id ) bbs = vsa.analyze() - function.basic_blocks = [basic_blocks_as_dict[bb] for bb in bbs] + function.basic_blocks = [cfg.basic_blocks[bb] for bb in bbs] function.check_payable() function.check_view() function.check_pure() - return functions - def output_to_dot(functions): for function in functions: function.output_to_dot('test_') @@ -71,13 +49,15 @@ def main(): with open(filename) as f: bytecode = f.read().replace('\n','') - bytecode = binascii.unhexlify(bytecode) - bytecode = remove_metadata(bytecode) - functions = get_info(bytecode) + cfg = CFG(binascii.unhexlify(bytecode)) + cfg.remove_metadata() + cfg.compute_basic_blocks() + cfg.compute_functions(cfg.basic_blocks[0]) + get_info(cfg) print('End of analysis') - for function in functions: + for function in cfg.functions: print(function) - output_to_dot(functions) + output_to_dot(cfg.functions) if __name__ == '__main__': diff --git a/evm_cfg_builder/evm_helpers.py b/evm_cfg_builder/evm_helpers.py deleted file mode 100644 index 075cfb5..0000000 --- a/evm_cfg_builder/evm_helpers.py +++ /dev/null @@ -1,30 +0,0 @@ -BASIC_BLOCK_END = ['STOP', - 'SELFDESTRUCT', - 'RETURN', - 'REVERT', - 'INVALID', - 'SUICIDE', - 'JUMP', - 'JUMPI'] - - -def create_dicts_from_basic_blocks(basic_blocks): - """ - Create two dict: - - pc -> basic block. PC is either the start or the end of the BB - - pc -> node - - Args: - list(BasicBlock) - Returns - dict(int-> BasicBlock), dict(int->Instruction) - """ - nodes_as_dict = {} - basic_blocks_as_dict = {} - - for bb in basic_blocks: - basic_blocks_as_dict[bb.start.pc] = bb - basic_blocks_as_dict[bb.end.pc] = bb - for ins in bb.instructions: - nodes_as_dict[ins.pc] = ins - return (basic_blocks_as_dict, nodes_as_dict) From 42c95e75ca400ad07d864edb1ca4694d301217b7 Mon Sep 17 00:00:00 2001 From: Josh Watson Date: Fri, 26 Oct 2018 21:29:20 -0700 Subject: [PATCH 2/6] Added a missing parameter to cfg.compute_functions call --- evm_cfg_builder/cfg_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm_cfg_builder/cfg_builder.py b/evm_cfg_builder/cfg_builder.py index 7168436..612c89b 100644 --- a/evm_cfg_builder/cfg_builder.py +++ b/evm_cfg_builder/cfg_builder.py @@ -52,7 +52,7 @@ def main(): cfg = CFG(binascii.unhexlify(bytecode)) cfg.remove_metadata() cfg.compute_basic_blocks() - cfg.compute_functions(cfg.basic_blocks[0]) + cfg.compute_functions(cfg.basic_blocks[0], True) get_info(cfg) print('End of analysis') for function in cfg.functions: From eb056f68e837db4952d53b0f1142b3cba2f4d705 Mon Sep 17 00:00:00 2001 From: Josh Watson Date: Fri, 16 Nov 2018 18:22:17 -0800 Subject: [PATCH 3/6] Improved __init__ --- evm_cfg_builder/cfg/__init__.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/evm_cfg_builder/cfg/__init__.py b/evm_cfg_builder/cfg/__init__.py index c042a47..105c8e1 100644 --- a/evm_cfg_builder/cfg/__init__.py +++ b/evm_cfg_builder/cfg/__init__.py @@ -1,5 +1,5 @@ -import basic_block -import function +from . import basic_block +from . import function import re from pyevmasm import disassemble_all @@ -29,16 +29,19 @@ def __update(self, new_dict): raise NotImplementedError() class CFG(object): - def __init__(self, bytecode=None): - if bytecode is None: - self.__bytecode = bytes() - else: - self.__bytecode = bytecode - - self.__functions = dict() + def __init__(self, bytecode=None, instructions=None, basic_blocks=None, functions=None): + self.__functions = list() self.__basic_blocks = dict() self.__instructions = dict() - self.edges = dict() + + if bytecode is not None: + self.__bytecode = bytecode + if instructions is not None: + self.__instructions = instructions + if basic_blocks is not None: + self.__basic_blocks = basic_blocks + if functions is not None: + self.__functions = functions @property def bytecode(self): @@ -53,7 +56,6 @@ def clear(self): self.__functions = list() self.__basic_blocks = dict() self.__instructions = dict() - self.__edges = dict() self.__bytecode = dict() def remove_metadata(self): From 506ea3949657b3ea1fbbfc212a2cf643779a3055 Mon Sep 17 00:00:00 2001 From: Josh Watson Date: Sun, 25 Nov 2018 13:23:15 -0800 Subject: [PATCH 4/6] Updated vsa to use the refactored conventions for functions and basic blocks --- evm_cfg_builder/value_set_analysis.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/evm_cfg_builder/value_set_analysis.py b/evm_cfg_builder/value_set_analysis.py index c5cddc6..6f2ad81 100644 --- a/evm_cfg_builder/value_set_analysis.py +++ b/evm_cfg_builder/value_set_analysis.py @@ -324,9 +324,8 @@ class StackValueAnalysis(object): ''' def __init__(self, + cfg, entry_point, - basic_blocks_as_dict, - nodes_as_dict, key, maxiteration=1000, maxexploration=100, @@ -361,8 +360,9 @@ def __init__(self, self.initStack = initStack self._entry_point = entry_point - self.basic_blocks_as_dict = basic_blocks_as_dict - self.nodes_as_dict = nodes_as_dict + # self.basic_blocks_as_dict = basic_blocks_as_dict + # self.nodes_as_dict = nodes_as_dict + self.cfg = cfg self._key = key @@ -383,9 +383,9 @@ def is_jumpdst(self, addr): Returns: bool: True if the instruction is a JUMPDEST ''' - if not addr in self.nodes_as_dict: + if not addr in self.cfg.instructions: return False - ins = self.nodes_as_dict[addr] + ins = self.cfg.instructions[addr] return ins.name == 'JUMPDEST' def stub(self, ins, addr, stack): @@ -579,32 +579,33 @@ def explore(self): self.last_discovered_targets = {} for src, dsts in last_discovered_targets.items(): - bb_from = self.basic_blocks_as_dict[src] + bb_from = self.cfg.basic_blocks[src] for dst in dsts: - bb_to = self.basic_blocks_as_dict[dst] + bb_to = self.cfg.basic_blocks[dst] bb_from.add_son(bb_to, self._key) bb_to.add_father(bb_from, self._key) dsts = [dests for (src, dests) in last_discovered_targets.items()] - self._to_explore |= {self.basic_blocks_as_dict[item] for sublist in dsts for item in sublist} + self._to_explore |= {self.cfg.basic_blocks[item] for sublist in dsts for item in sublist} def simple_edges(self): - for bb in self.basic_blocks_as_dict.values(): + for bb in self.cfg.basic_blocks.values(): + print(bb) if bb.end.name == 'JUMPI': - dst = self.basic_blocks_as_dict[bb.end.pc + 1] + dst = self.cfg.basic_blocks[bb.end.pc + 1] bb.add_son(dst, self._key) dst.add_father(bb, self._key) # A bb can be split in the middle if it has a JUMPDEST # Because another edge can target the JUMPDEST if bb.end.name not in BASIC_BLOCK_END: - dst = self.basic_blocks_as_dict[bb.end.pc + 1 + bb.end.operand_size] + dst = self.cfg.basic_blocks[bb.end.pc + 1 + bb.end.operand_size] assert dst.start.name == 'JUMPDEST' bb.add_son(dst, self._key) dst.add_father(bb, self._key) def analyze(self): - self.simple_edges() + # self.simple_edges() while self._to_explore: self.explore() From 8db9a03b0e2186d42589daec20261ef2f1eaaf56 Mon Sep 17 00:00:00 2001 From: Josh Watson Date: Sun, 25 Nov 2018 13:24:05 -0800 Subject: [PATCH 5/6] Removed some commented out stuff --- evm_cfg_builder/value_set_analysis.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/evm_cfg_builder/value_set_analysis.py b/evm_cfg_builder/value_set_analysis.py index 6f2ad81..e112c33 100644 --- a/evm_cfg_builder/value_set_analysis.py +++ b/evm_cfg_builder/value_set_analysis.py @@ -360,8 +360,7 @@ def __init__(self, self.initStack = initStack self._entry_point = entry_point - # self.basic_blocks_as_dict = basic_blocks_as_dict - # self.nodes_as_dict = nodes_as_dict + self.cfg = cfg self._key = key @@ -589,24 +588,8 @@ def explore(self): dsts = [dests for (src, dests) in last_discovered_targets.items()] self._to_explore |= {self.cfg.basic_blocks[item] for sublist in dsts for item in sublist} - def simple_edges(self): - for bb in self.cfg.basic_blocks.values(): - print(bb) - if bb.end.name == 'JUMPI': - dst = self.cfg.basic_blocks[bb.end.pc + 1] - bb.add_son(dst, self._key) - dst.add_father(bb, self._key) - # A bb can be split in the middle if it has a JUMPDEST - # Because another edge can target the JUMPDEST - if bb.end.name not in BASIC_BLOCK_END: - dst = self.cfg.basic_blocks[bb.end.pc + 1 + bb.end.operand_size] - assert dst.start.name == 'JUMPDEST' - bb.add_son(dst, self._key) - dst.add_father(bb, self._key) def analyze(self): - # self.simple_edges() - while self._to_explore: self.explore() From 83db5a30e22cddb0600af06068578df658d6d19d Mon Sep 17 00:00:00 2001 From: Josh Watson Date: Mon, 26 Nov 2018 08:54:53 -0800 Subject: [PATCH 6/6] Fix a few conflicts between python2 and python3 --- evm_cfg_builder/__main__.py | 4 ++-- evm_cfg_builder/cfg/__init__.py | 6 +++--- evm_cfg_builder/cfg_builder.py | 7 +++---- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/evm_cfg_builder/__main__.py b/evm_cfg_builder/__main__.py index 7936053..50e70fc 100644 --- a/evm_cfg_builder/__main__.py +++ b/evm_cfg_builder/__main__.py @@ -1,3 +1,3 @@ -import cfg_builder +from .cfg_builder import main -cfg_builder.main() \ No newline at end of file +main() \ No newline at end of file diff --git a/evm_cfg_builder/cfg/__init__.py b/evm_cfg_builder/cfg/__init__.py index 105c8e1..b2257d6 100644 --- a/evm_cfg_builder/cfg/__init__.py +++ b/evm_cfg_builder/cfg/__init__.py @@ -35,7 +35,7 @@ def __init__(self, bytecode=None, instructions=None, basic_blocks=None, function self.__instructions = dict() if bytecode is not None: - self.__bytecode = bytecode + self.__bytecode = bytes(bytecode) if instructions is not None: self.__instructions = instructions if basic_blocks is not None: @@ -64,8 +64,8 @@ def remove_metadata(self): see http://solidity.readthedocs.io/en/v0.4.24/metadata.html#encoding-of-the-metadata-hash-in-the-bytecode ''' self.bytecode = re.sub( - r'\xa1\x65\x62\x7a\x7a\x72\x30\x58\x20[\x00-\xff]{32}\x00\x29', - '', + bytes(r'\xa1\x65\x62\x7a\x7a\x72\x30\x58\x20[\x00-\xff]{32}\x00\x29'.encode('charmap')), + b'', self.bytecode ) diff --git a/evm_cfg_builder/cfg_builder.py b/evm_cfg_builder/cfg_builder.py index 612c89b..b4e47d7 100644 --- a/evm_cfg_builder/cfg_builder.py +++ b/evm_cfg_builder/cfg_builder.py @@ -4,8 +4,8 @@ from pyevmasm import disassemble_all -from cfg import CFG -from cfg.function import Function +from .cfg import CFG +from .cfg.function import Function from .known_hashes import known_hashes from .value_set_analysis import StackValueAnalysis @@ -27,9 +27,8 @@ def get_info(cfg): for function in cfg.functions: vsa = StackValueAnalysis( + cfg, function.entry, - cfg.basic_blocks, - cfg.instructions, function.hash_id ) bbs = vsa.analyze()