Skip to content

Commit

Permalink
Improve documentation (#8)
Browse files Browse the repository at this point in the history
- Improve Readme
- Add library usage
Improve API (#7)
- Create command line arguments
- Improve CFG module API
Use logging (close #4)
  • Loading branch information
montyly committed Nov 30, 2018
1 parent 2e56f6a commit f581588
Show file tree
Hide file tree
Showing 9 changed files with 312 additions and 89 deletions.
50 changes: 15 additions & 35 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,49 +12,29 @@ evm_cfg_builder is a work in progress, but it's already very useful today:

`evm_cfg_builder` is a reliable foundation to build many possible program analysis tools for EVM. This library is covered by our standard bounty policy and we encourage contributions that address any known [issues](https://github.com/trailofbits/evm_cfg_builder/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc). Join us on the [Empire Hacking Slack](https://empireslacking.herokuapp.com) to discuss using or extending evm_cfg_builder.

## How to install
## Usage

### Command line
To export basic dissassembly information, run:
```
pip install .
evm-cfg-builder mycontract.evm
```
To export the CFG of each function (dot format), run:
```
evm-cfg-builder mycontract.evm --export-dot my_dir
```
dot files can be read using xdot.

## Example
### Library
See [examples/library.py](examples/library.py) for an example.

```
$ evm-cfg-builder tests/fomo3d.evm
...
## How to install

dividendsOf(address), 7 #bbs , view
name(), 16 #bbs , view
calculateTokensReceived(uint256), 29 #bbs , view
totalSupply(), 5 #bbs , view
calculateEthereumReceived(uint256), 26 #bbs , view
onlyAmbassadors(), 5 #bbs , view
decimals(), 5 #bbs , view
administrators(bytes32), 5 #bbs , view
withdraw(), 20 #bbs
sellPrice(), 27 #bbs , view
stakingRequirement(), 5 #bbs , view
myDividends(bool), 13 #bbs , view
totalEthereumBalance(), 5 #bbs , view
balanceOf(address), 5 #bbs , view
setStakingRequirement(uint256), 7 #bbs
buyPrice(), 30 #bbs , view
setAdministrator(bytes32,bool), 7 #bbs
Hourglass(), 5 #bbs , view
myTokens(), 7 #bbs , view
symbol(), 16 #bbs , view
disableInitialStage(), 7 #bbs
transfer(address,uint256), 48 #bbs , view
setSymbol(string), 22 #bbs
setName(string), 22 #bbs
sell(uint256), 42 #bbs
exit(), 63 #bbs
buy(address), 71 #bbs , payable
reinvest(), 86 #bbs
```
git clone https://github.com/trailofbits/evm_cfg_builder
pip install .
```

`test_<name>.dot` files will be generated.

## Requirements

Expand Down
95 changes: 51 additions & 44 deletions evm_cfg_builder/__main__.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,65 @@
import binascii
import re
import sys

import argparse
import logging
import os
from pkg_resources import require
from pyevmasm import disassemble_all

from .cfg import CFG
from .cfg.function import Function
from .known_hashes import known_hashes
from .value_set_analysis import StackValueAnalysis

import time
logging.basicConfig()
logger = logging.getLogger("evm-cfg-builder")

def get_info(cfg):
cfg.add_function(Function(Function.DISPATCHER_ID, 0, cfg.basic_blocks[0]))
def output_to_dot(d, filename, functions):
if not os.path.exists(d):
os.makedirs(d)
filename = os.path.basename(filename)
filename = os.path.join(d, filename+ '_')
for function in functions:
function.output_to_dot(filename)

for function in cfg.functions:
if function.hash_id in known_hashes:
function.name = known_hashes[function.hash_id]
def parse_args():
parser = argparse.ArgumentParser(description='evm-cfg-builder',
usage="evm-cfg-builder contract.evm [flag]")

parser.add_argument('filename',
help='contract.evm')

parser.add_argument('--export-dot',
help='Export the functions to .dot files in the directory',
action='store',
dest='dot_directory',
default='')

parser.add_argument('--version',
help='displays the current version',
version=require('evm-cfg-builder')[0].version,
action='version')

for function in cfg.functions:
vsa = StackValueAnalysis(
cfg,
function.entry,
function.hash_id
)
print(function.name)
start = time.time()
bbs = vsa.analyze()
print(int(time.time() - start))

function.basic_blocks = [cfg.basic_blocks[bb] for bb in bbs]

function.check_payable()
function.check_view()
function.check_pure()

def output_to_dot(functions):
for function in functions:
function.output_to_dot('test_')

if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
return args

def main():
filename = sys.argv[1]

with open(filename) as f:
bytecode = f.read().replace('\n','')
cfg = CFG(binascii.unhexlify(bytecode))
cfg.remove_metadata()
cfg.compute_basic_blocks()
cfg.compute_functions(cfg.basic_blocks[0], True)
get_info(cfg)
print('End of analysis')
for function in cfg.functions:
print(function)
output_to_dot(cfg.functions)

l = logging.getLogger('evm-cfg-builder')
l.setLevel(logging.INFO)
args = parse_args()

with open(args.filename) as f:
bytecode = f.read().replace('\n', '')

cfg = CFG(bytecode)

for function in cfg.functions:
logger.info(function)

if args.dot_directory:
output_to_dot(args.dot_directory, args.filename, cfg.functions)


if __name__ == '__main__':
Expand Down
60 changes: 55 additions & 5 deletions evm_cfg_builder/cfg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import binascii
from . import basic_block
from . import function
from .function import Function

from ..known_hashes import known_hashes
from ..value_set_analysis import StackValueAnalysis

import re
from pyevmasm import disassemble_all
Expand Down Expand Up @@ -29,12 +33,18 @@ def __update(self, new_dict):
raise NotImplementedError()

class CFG(object):
def __init__(self, bytecode=None, instructions=None, basic_blocks=None, functions=None):

def __init__(self, bytecode=None, instructions=None, basic_blocks=None, functions=None, remove_metadata=True, analyze=True):
self.__functions = list()
self.__basic_blocks = dict()
self.__instructions = dict()

if bytecode is not None:
if isinstance(bytecode, str):
if bytecode.startswith('0x'):
bytecode = bytecode[2:]
bytecode = bytecode.replace('\n', '')
bytecode = binascii.unhexlify(bytecode)
self.__bytecode = bytes(bytecode)
if instructions is not None:
self.__instructions = instructions
Expand All @@ -43,6 +53,37 @@ def __init__(self, bytecode=None, instructions=None, basic_blocks=None, function
if functions is not None:
self.__functions = functions

if remove_metadata:
self.remove_metadata()
if analyze:
self.analyze()

def analyze(self):
self.compute_basic_blocks()
self.compute_functions(self.basic_blocks[0], True)
self.add_function(Function(Function.DISPATCHER_ID, 0, self.basic_blocks[0], self))

for function in self.functions:
if function.hash_id in known_hashes:
function.name = known_hashes[function.hash_id]

for function in self.functions:
vsa = StackValueAnalysis(
self,
function.entry,
function.hash_id
)
bbs = vsa.analyze()

function.basic_blocks = [self.basic_blocks[bb] for bb in bbs]

if function.hash_id != Function.DISPATCHER_ID:
function.check_payable()
function.check_view()
function.check_pure()



@property
def bytecode(self):
return self.__bytecode
Expand All @@ -57,7 +98,7 @@ def clear(self):
self.__basic_blocks = dict()
self.__instructions = dict()
self.__bytecode = dict()

def remove_metadata(self):
'''
Init bytecode contains metadata that needs to be removed
Expand Down Expand Up @@ -125,7 +166,8 @@ def compute_functions(self, block, is_entry_block=False):
new_function = function.Function(
function_hash,
function_start,
self.__basic_blocks[function_start]
self.__basic_blocks[function_start],
self
)

self.__functions.append(new_function)
Expand All @@ -142,7 +184,7 @@ def compute_functions(self, block, is_entry_block=False):
self.compute_functions(false_branch)

def add_function(self, func):
assert isinstance(func, function.Function)
assert isinstance(func, Function)
self.__functions.append(func)

def compute_simple_edges(self, key):
Expand Down Expand Up @@ -184,6 +226,14 @@ def compute_reachability(self, entry_point, key):
if key in bb.fathers.keys():
del bb.fathers[key]

def export_basic_blocks(self):
return [bb.export() for bb in self.basic_blocks.values()]

def export_functions(self):
return [{'name' : function.name,
'basic_blocks' : function.export_bbs()}
for function in self.functions]

def is_jump_to_function(block):
'''
Heuristic:
Expand Down
20 changes: 20 additions & 0 deletions evm_cfg_builder/cfg/basic_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,23 @@ def false_branch(self, key):
sons = [bb for bb in self.sons[key] if bb.start.pc == (self.end.pc+1)]
assert(len(sons) == 1)
return sons[key][0]

def export(self):
sons = self.sons.values()
sons = [son for sublist in sons for son in sublist]
fathers = self.fathers.values()
fathers = [father for sublist in fathers for father in sublist]
return {'pc_start': self.start.pc,
'pc_end': self.end.pc,
'instructions': self.instructions,
'sons': sons,
'fathers': fathers}

def export_from_function(self, key):
sons = self.sons.get(key, [])
fathers = self.fathers.get(key, [])
return {'pc_start': self.start.pc,
'pc_end': self.end.pc,
'instructions': self.instructions,
'sons': sons,
'fathers': fathers}
49 changes: 45 additions & 4 deletions evm_cfg_builder/cfg/function.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import logging
logger = logging.getLogger("evm-cfg-builder")

class Function(object):

DISPATCHER_ID = -2
FALLBACK_ID = -1

def __init__(self, hash_id, start_addr, entry_basic_block):
def __init__(self, hash_id, start_addr, entry_basic_block, cfg):
self._hash_id = hash_id
self._start_addr = start_addr
self._entry = entry_basic_block
Expand All @@ -15,6 +18,11 @@ def __init__(self, hash_id, start_addr, entry_basic_block):
self._name = hex(hash_id)
self._basic_blocks = []
self._attributes = []
self._cfg = cfg

@property
def start_addr(self):
return self._start_addr

@property
def hash_id(self):
Expand Down Expand Up @@ -124,13 +132,17 @@ def check_pure(self):
self.add_attributes('pure')

def __str__(self):
attrs = ''
attrs = ''
if self.attributes:
attrs = ", " + ",".join(self.attributes)
return '{}, {} #bbs {}'.format(self.name, len(self.basic_blocks), attrs)

def output_to_dot(self, base_filename):

if self.key == Function.DISPATCHER_ID:
self.output_dispatcher_to_dot(base_filename)
return

with open('{}{}.dot'.format(base_filename, self.name), 'w') as f:
f.write('digraph{\n')
for basic_block in self.basic_blocks:
Expand All @@ -145,7 +157,36 @@ def output_to_dot(self, base_filename):
f.write('{} -> {}\n'.format(basic_block.start.pc, son.start.pc))

elif basic_block.ends_with_jump_or_jumpi():
print('Missing branches {}:{}'.format(self.name,
hex(basic_block.end.pc)))
logger.error('Missing branches {}:{}'.format(self.name,
hex(basic_block.end.pc)))

f.write('\n}')

def output_dispatcher_to_dot(self, base_filename):

destinations = {function.hash_id : function.start_addr for function in self._cfg.functions}

with open('{}{}.dot'.format(base_filename, self.name), 'w') as f:
f.write('digraph{\n')
for basic_block in self.basic_blocks:
instructions = ['{}:{}'.format(hex(ins.pc),
str(ins)) for ins in basic_block.instructions]
instructions = '\n'.join(instructions)

f.write('{}[label="{}"]\n'.format(basic_block.start.pc, instructions))

if self.key in basic_block.sons:
for son in basic_block.sons[self.key]:
f.write('{} -> {}\n'.format(basic_block.start.pc, son.start.pc))

elif basic_block.ends_with_jump_or_jumpi():
logger.error('Missing branches {}:{}'.format(self.name,
hex(basic_block.end.pc)))
for function in self._cfg.functions:
if function != self:
f.write('{}[label="Call {}"]\n'.format(function.start_addr, function.name))

f.write('\n}')

def export_bbs(self):
return [bb.export_from_function(self.key) for bb in self.basic_blocks]
Loading

0 comments on commit f581588

Please sign in to comment.