Skip to content

Commit d79d232

Browse files
committed
basic translate works
1 parent 6d379aa commit d79d232

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

125 files changed

+8557
-0
lines changed

Language.ml structure.pdf

29.1 KB
Binary file not shown.

src/backend/ASM.ml

Lines changed: 369 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,369 @@
1+
module Links = Map.Make(struct type t = AST.label let compare = compare end)
2+
3+
type asm =
4+
| EVM_STOP
5+
| EVM_ADD
6+
| EVM_MUL
7+
| EVM_SUB
8+
| EVM_DIV
9+
| EVM_SDIV
10+
| EVM_MOD
11+
| EVM_SMOD
12+
| EVM_ADDMOD
13+
| EVM_MULMOD
14+
| EVM_EXP
15+
| EVM_SIGNEXTEND
16+
| EVM_LT
17+
| EVM_GT
18+
| EVM_SLT
19+
| EVM_SGT
20+
| EVM_EQ
21+
| EVM_ISZERO
22+
| EVM_AND
23+
| EVM_OR
24+
| EVM_XOR
25+
| EVM_NOT
26+
| EVM_BYTE
27+
| EVM_SHA3
28+
| EVM_ADDRESS
29+
| EVM_BALANCE
30+
| EVM_ORIGIN
31+
| EVM_CALLER
32+
| EVM_CALLVALUE
33+
| EVM_CALLDATALOAD
34+
| EVM_CALLDATASIZE
35+
| EVM_CODESIZE
36+
| EVM_GASPRICE
37+
| EVM_EXTCODESIZE
38+
| EVM_BLOCKHASH
39+
| EVM_COINBASE
40+
| EVM_TIMESTAMP
41+
| EVM_NUMBER
42+
| EVM_DIFFICULTY
43+
| EVM_GASLIMIT
44+
| EVM_GAS
45+
| EVM_CODECOPY
46+
| EVM_POP
47+
| EVM_MLOAD
48+
| EVM_MSTORE
49+
| EVM_MSTORE8
50+
| EVM_SLOAD
51+
| EVM_SSTORE
52+
| EVM_JUMP
53+
| EVM_JUMPI
54+
| EVM_JUMPDEST of string
55+
| EVM_PUSH of int * string
56+
| EVM_DUP of int
57+
| EVM_SWAP of int
58+
| EVM_LOG
59+
| EVM_CALL
60+
| EVM_REVERT
61+
| EVM_RETURN
62+
63+
type asm_program = asm list
64+
type evm_program = EVM.evm list
65+
66+
type intermediate = {
67+
constructor : evm_program ;
68+
body : evm_program
69+
}
70+
71+
(* we assume jump destinations can fit in 4 bytes *)
72+
let address_bytes = 4
73+
74+
75+
let pad len s =
76+
String.make (len - (String.length s)) '0' ^ s
77+
78+
(* compute number of bytes needed to represent a value *)
79+
let allocate =
80+
let allocate' p =
81+
let rec count_digits = function
82+
| BinNums.Coq_xI v -> 1 + count_digits v
83+
| BinNums.Coq_xO v -> 1 + count_digits v
84+
| BinNums.Coq_xH -> 1
85+
in (count_digits p + 7) / 8
86+
in function
87+
| BinNums.Z0 -> 1
88+
| BinNums.Zpos x -> allocate' x
89+
| BinNums.Zneg x -> raise (Failure "allocate is undefined on negative numbers")
90+
91+
(* construct a mapping from label -> PC *)
92+
let map_labels program =
93+
let rec map_labels' program counter =
94+
match program with
95+
| [] -> Links.empty
96+
| (x :: xs) ->
97+
match x with
98+
| EVM.Coq_evm_label l -> Links.add l counter (map_labels' xs (counter + 1))
99+
| EVM.Coq_evm_push_label l -> map_labels' xs (counter + 5)
100+
| EVM.Coq_evm_push v -> map_labels' xs (counter + 1 + allocate v)
101+
| _ -> map_labels' xs (counter + 1)
102+
in map_labels' program 0
103+
104+
let hex x = Printf.sprintf "%x" x
105+
106+
(* compute the bytecode for some push data *)
107+
let assemble_pushdata n data =
108+
let hex_of_coq_int =
109+
let rec hex_of_coq_pos partial count =
110+
let rec pow2 = function
111+
| 0 -> 1
112+
| n -> 2 * (pow2 (n-1)) in
113+
let lastchar = if count == 4 then Printf.sprintf "%x" partial else ""
114+
and partial = if count == 4 then 0 else partial
115+
and count = count mod 4
116+
in function
117+
| BinNums.Coq_xI rest -> (hex_of_coq_pos (partial + (pow2 count)) (count + 1) rest) ^ lastchar
118+
| BinNums.Coq_xO rest -> (hex_of_coq_pos partial (count + 1) rest) ^ lastchar
119+
| BinNums.Coq_xH -> (hex (partial + (pow2 count))) ^ lastchar
120+
in function
121+
| BinNums.Z0 -> "0"
122+
| BinNums.Zpos v -> hex_of_coq_pos 0 0 v
123+
| BinNums.Zneg v -> raise (Failure "undefined")
124+
in
125+
pad (n * 2) (hex_of_coq_int data)
126+
127+
(* compute the bytecode for an opcode *)
128+
let assemble_op x = pad 2 (hex x)
129+
130+
(* evm -> asm *)
131+
let transform_inst links = function
132+
| EVM.Coq_evm_stop -> EVM_STOP
133+
| EVM.Coq_evm_add -> EVM_ADD
134+
| EVM.Coq_evm_mul -> EVM_MUL
135+
| EVM.Coq_evm_sub -> EVM_SUB
136+
| EVM.Coq_evm_div -> EVM_DIV
137+
| EVM.Coq_evm_sdiv -> EVM_SDIV
138+
| EVM.Coq_evm_mod -> EVM_MOD
139+
| EVM.Coq_evm_smod -> EVM_SMOD
140+
| EVM.Coq_evm_addmod -> EVM_ADDMOD
141+
| EVM.Coq_evm_mulmod -> EVM_MULMOD
142+
| EVM.Coq_evm_exp -> EVM_EXP
143+
| EVM.Coq_evm_signextend -> EVM_SIGNEXTEND
144+
| EVM.Coq_evm_lt -> EVM_LT
145+
| EVM.Coq_evm_gt -> EVM_GT
146+
| EVM.Coq_evm_slt -> EVM_SLT
147+
| EVM.Coq_evm_sgt -> EVM_SGT
148+
| EVM.Coq_evm_eq -> EVM_EQ
149+
| EVM.Coq_evm_iszero -> EVM_ISZERO
150+
| EVM.Coq_evm_and -> EVM_AND
151+
| EVM.Coq_evm_or -> EVM_OR
152+
| EVM.Coq_evm_xor -> EVM_XOR
153+
| EVM.Coq_evm_not -> EVM_NOT
154+
| EVM.Coq_evm_byte -> EVM_BYTE
155+
| EVM.Coq_evm_sha3 -> EVM_SHA3
156+
| EVM.Coq_evm_address -> EVM_ADDRESS
157+
| EVM.Coq_evm_balance -> EVM_BALANCE
158+
| EVM.Coq_evm_origin -> EVM_ORIGIN
159+
| EVM.Coq_evm_caller -> EVM_CALLER
160+
| EVM.Coq_evm_callvalue -> EVM_CALLVALUE
161+
| EVM.Coq_evm_calldataload -> EVM_CALLDATALOAD
162+
| EVM.Coq_evm_calldatasize -> EVM_CALLDATASIZE
163+
| EVM.Coq_evm_codesize -> EVM_CODESIZE
164+
| EVM.Coq_evm_gasprice -> EVM_GASPRICE
165+
| EVM.Coq_evm_extcodesize -> EVM_EXTCODESIZE
166+
| EVM.Coq_evm_blockhash -> EVM_BLOCKHASH
167+
| EVM.Coq_evm_coinbase -> EVM_COINBASE
168+
| EVM.Coq_evm_timestamp -> EVM_TIMESTAMP
169+
| EVM.Coq_evm_number -> EVM_NUMBER
170+
| EVM.Coq_evm_difficulty -> EVM_DIFFICULTY
171+
| EVM.Coq_evm_gaslimit -> EVM_GASLIMIT
172+
| EVM.Coq_evm_gas -> EVM_GAS
173+
| EVM.Coq_evm_codecopy -> EVM_CODECOPY
174+
| EVM.Coq_evm_pop -> EVM_POP
175+
| EVM.Coq_evm_mload -> EVM_MLOAD
176+
| EVM.Coq_evm_mstore -> EVM_MSTORE
177+
| EVM.Coq_evm_mstore8 -> EVM_MSTORE8
178+
| EVM.Coq_evm_sload -> EVM_SLOAD
179+
| EVM.Coq_evm_sstore -> EVM_SSTORE
180+
| EVM.Coq_evm_jump -> EVM_JUMP
181+
| EVM.Coq_evm_jumpi -> EVM_JUMPI
182+
| EVM.Coq_evm_label l -> EVM_JUMPDEST (Printf.sprintf "%08x" (Links.find l links))
183+
| EVM.Coq_evm_push x ->
184+
EVM_PUSH (allocate x, assemble_pushdata (allocate x) x)
185+
| EVM.Coq_evm_push_label l ->
186+
EVM_PUSH (address_bytes, Printf.sprintf "%08x" (Links.find l links))
187+
| EVM.Coq_evm_dup n ->
188+
EVM_DUP (DatatypesExt.eval_nat n)
189+
| EVM.Coq_evm_swap n ->
190+
EVM_SWAP (DatatypesExt.eval_nat n)
191+
| EVM.Coq_evm_log -> EVM_LOG
192+
| EVM.Coq_evm_call -> EVM_CALL
193+
| EVM.Coq_evm_revert -> EVM_REVERT
194+
| EVM.Coq_evm_return -> EVM_RETURN
195+
196+
(* asm -> bytecode *)
197+
let assemble_inst = function
198+
| EVM_STOP -> "00"
199+
| EVM_ADD -> "01"
200+
| EVM_MUL -> "02"
201+
| EVM_SUB -> "03"
202+
| EVM_DIV -> "04"
203+
| EVM_SDIV -> "05"
204+
| EVM_MOD -> "06"
205+
| EVM_SMOD -> "07"
206+
| EVM_ADDMOD -> "08"
207+
| EVM_MULMOD -> "09"
208+
| EVM_EXP -> "0a"
209+
| EVM_SIGNEXTEND -> "0b"
210+
| EVM_LT -> "10"
211+
| EVM_GT -> "11"
212+
| EVM_SLT -> "12"
213+
| EVM_SGT -> "13"
214+
| EVM_EQ -> "14"
215+
| EVM_ISZERO -> "15"
216+
| EVM_AND -> "16"
217+
| EVM_OR -> "17"
218+
| EVM_XOR -> "18"
219+
| EVM_NOT -> "19"
220+
| EVM_BYTE -> "1a"
221+
| EVM_SHA3 -> "20"
222+
| EVM_ADDRESS -> "30"
223+
| EVM_BALANCE -> "31"
224+
| EVM_ORIGIN -> "32"
225+
| EVM_CALLER -> "33"
226+
| EVM_CALLVALUE -> "34"
227+
| EVM_CALLDATALOAD -> "35"
228+
| EVM_CALLDATASIZE -> "36"
229+
| EVM_CODESIZE -> "38"
230+
| EVM_GASPRICE -> "3a"
231+
| EVM_EXTCODESIZE -> "3b"
232+
| EVM_BLOCKHASH -> "40"
233+
| EVM_COINBASE -> "41"
234+
| EVM_TIMESTAMP -> "42"
235+
| EVM_NUMBER -> "43"
236+
| EVM_DIFFICULTY -> "44"
237+
| EVM_GASLIMIT -> "45"
238+
| EVM_GAS -> "5a"
239+
| EVM_CODECOPY -> "39"
240+
| EVM_POP -> "50"
241+
| EVM_MLOAD -> "51"
242+
| EVM_MSTORE -> "52"
243+
| EVM_MSTORE8 -> "53"
244+
| EVM_SLOAD -> "54"
245+
| EVM_SSTORE -> "55"
246+
| EVM_JUMP -> "56"
247+
| EVM_JUMPI -> "57"
248+
| EVM_JUMPDEST l -> "5b"
249+
| EVM_PUSH (n, data) ->
250+
(assemble_op (95 + n)) ^ data
251+
| EVM_DUP n -> assemble_op (127 + n)
252+
| EVM_SWAP n -> assemble_op (143 + n)
253+
| EVM_LOG -> "a0"
254+
| EVM_CALL -> "f1"
255+
| EVM_REVERT -> "fd"
256+
| EVM_RETURN -> "f3"
257+
258+
259+
let show_asm_inst = function
260+
| EVM_STOP -> "STOP"
261+
| EVM_ADD -> "ADD"
262+
| EVM_MUL -> "MUL"
263+
| EVM_SUB -> "SUB"
264+
| EVM_DIV -> "DIV"
265+
| EVM_SDIV -> "SDIV"
266+
| EVM_MOD -> "MOD"
267+
| EVM_SMOD -> "SMOD"
268+
| EVM_ADDMOD -> "ADDMOD"
269+
| EVM_MULMOD -> "MULMOD"
270+
| EVM_EXP -> "EXP"
271+
| EVM_SIGNEXTEND -> "SIGNEXTEND"
272+
| EVM_LT -> "LT"
273+
| EVM_GT -> "GT"
274+
| EVM_SLT -> "SLT"
275+
| EVM_SGT -> "SGT"
276+
| EVM_EQ -> "EQ"
277+
| EVM_ISZERO -> "ISZERO"
278+
| EVM_AND -> "AND"
279+
| EVM_OR -> "OR"
280+
| EVM_XOR -> "XOR"
281+
| EVM_NOT -> "NOT"
282+
| EVM_BYTE -> "BYTE"
283+
| EVM_SHA3 -> "SHA3"
284+
| EVM_ADDRESS -> "ADDRESS"
285+
| EVM_BALANCE -> "BALANCE"
286+
| EVM_ORIGIN -> "ORIGIN"
287+
| EVM_CALLER -> "CALLER"
288+
| EVM_CALLVALUE -> "CALLVALUE"
289+
| EVM_CALLDATALOAD -> "CALLDATALOAD"
290+
| EVM_CALLDATASIZE -> "CALLDATASIZE"
291+
| EVM_CODESIZE -> "CODESIZE"
292+
| EVM_GASPRICE -> "GASPRICE"
293+
| EVM_EXTCODESIZE -> "EXTCODESIZE"
294+
| EVM_BLOCKHASH -> "BLOCKHASH"
295+
| EVM_COINBASE -> "COINBASE"
296+
| EVM_TIMESTAMP -> "TIMESTAMP"
297+
| EVM_NUMBER -> "NUMBER"
298+
| EVM_DIFFICULTY -> "DIFFICULTY"
299+
| EVM_GASLIMIT -> "GASLIMIT"
300+
| EVM_GAS -> "GAS"
301+
| EVM_CODECOPY -> "CODECOPY"
302+
| EVM_POP -> "POP"
303+
| EVM_MLOAD -> "MLOAD"
304+
| EVM_MSTORE -> "MSTORE"
305+
| EVM_MSTORE8 -> "MSTORE8"
306+
| EVM_SLOAD -> "SLOAD"
307+
| EVM_SSTORE -> "SSTORE"
308+
| EVM_JUMP -> "JUMP"
309+
| EVM_JUMPI -> "JUMPI"
310+
| EVM_JUMPDEST l -> ("L"^ l ^ ": JUMPDEST")
311+
| EVM_PUSH (n, data) ->
312+
Printf.sprintf "%-16s %s" (Printf.sprintf "PUSH%d" n) data
313+
| EVM_DUP n -> Printf.sprintf "DUP%d" n
314+
| EVM_SWAP n -> Printf.sprintf "SWAP%d" n
315+
| EVM_CALL -> "CALL"
316+
| EVM_REVERT -> "REVERT"
317+
| EVM_RETURN -> "RETURN"
318+
| EVM_LOG -> "LOG"
319+
320+
let show_evm program =
321+
List.fold_left (fun acc x -> acc ^ "\n" ^ (EVMExt.show x)) "" program
322+
323+
let split program label =
324+
let rec constructor_helper target acc = function
325+
| x :: xs ->
326+
if (x = target) then
327+
acc
328+
else
329+
constructor_helper target (acc @ [x]) xs
330+
| [] -> []
331+
and body_helper target = function
332+
| x :: xs ->
333+
if (x = target) then
334+
x :: xs
335+
else
336+
body_helper target xs
337+
| [] -> []
338+
in
339+
{
340+
constructor = constructor_helper (EVM.Coq_evm_label label) [] program ;
341+
body = body_helper (EVM.Coq_evm_label label) program
342+
}
343+
344+
let assemble program =
345+
let bytecode_list = List.map assemble_inst program in
346+
List.fold_left (fun acc x -> acc ^ x) "" bytecode_list
347+
348+
let transform program entrypoint =
349+
let transform_intermediate program =
350+
let transform' links p =
351+
List.map (transform_inst links) p in
352+
let size_of_inst = function
353+
| EVM_PUSH (n, data) -> 1 + n
354+
| _ -> 1 in
355+
let size_of_program asm =
356+
List.fold_left (fun acc x -> acc + (size_of_inst x)) 0 asm in
357+
let chop_links n =
358+
Links.map (fun x -> x - n) in
359+
let links = map_labels (program.constructor @ program.body) in
360+
let constructor_asm = transform' links program.constructor in
361+
let constructor_size = size_of_program constructor_asm in
362+
let links' = chop_links constructor_size links in
363+
let body_asm = transform' links' program.body in
364+
constructor_asm @ body_asm
365+
in
366+
transform_intermediate (split program entrypoint)
367+
368+
let mnemonics program =
369+
List.fold_left (fun acc x -> acc ^ "\n" ^ (show_asm_inst x)) "" program

src/backend/ASM.mli

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
type asm (* representation corresponding directly to real EVM instructions *)
3+
type asm_program = asm list
4+
type evm_program = EVM.evm list
5+
type intermediate = {
6+
constructor : evm_program ;
7+
body : evm_program
8+
}
9+
10+
(* *)
11+
val transform : evm_program -> AST.label -> asm_program
12+
13+
(* final bytecode output *)
14+
val assemble : asm_program -> string
15+
16+
(* representation for humans *)
17+
val mnemonics : asm_program -> string

0 commit comments

Comments
 (0)