diff --git a/sledge/semantics/llairScript.sml b/sledge/semantics/llairScript.sml index a7d02aa96..5c6657ce4 100644 --- a/sledge/semantics/llairScript.sml +++ b/sledge/semantics/llairScript.sml @@ -97,7 +97,7 @@ Datatype ` func = <| params : var list; locals : var set; entry : label; - cfg : label |-> block; + cfg : (label, block) alist; freturn : var; fthrow : var |>`; @@ -106,7 +106,7 @@ Datatype ` global = <| var : var; init : (exp # int) option; typ: typ |>`; Datatype ` - llair = <| globals : global list; functions : label |-> func |>`; + llair = <| globals : global list; functions : (label, func) alist |>`; (* ----- Semantic states ----- *) diff --git a/sledge/semantics/llvmScript.sml b/sledge/semantics/llvmScript.sml index 6bbef3036..7ca53c278 100644 --- a/sledge/semantics/llvmScript.sml +++ b/sledge/semantics/llvmScript.sml @@ -75,7 +75,7 @@ Datatype ` | Gep reg targ (targ list) | Ptrtoint reg targ ty | Inttoptr reg targ ty - | Icmp cond ty arg arg + | Icmp reg cond ty arg arg | Call reg ty fun_name (targ list) (* C++ runtime functions *) | Cxa_allocate_exn reg arg @@ -85,7 +85,7 @@ Datatype ` | Cxa_get_exception_ptr reg arg`; Datatype ` - phi = Phi reg ty (label option |-> arg)`; + phi = Phi reg ty ((label option, arg) alist)`; Datatype ` clause = Catch targ`; @@ -106,9 +106,9 @@ Datatype ` <| r : ty; params : reg list; (* None -> entry block, and Some name -> non-entry block *) - blocks : label option |-> block |>`; + blocks : (label option, block) alist |>`; -type_abbrev ("prog", ``:fun_name |-> def``); +type_abbrev ("prog", ``:(fun_name, def) alist``); Definition terminator_def: (terminator (Ret _) ⇔ T) ∧ @@ -443,7 +443,7 @@ End Definition do_phi_def: do_phi from_l s (Phi id _ entries) = - option_map (λarg. (id, eval s arg)) (flookup entries from_l) + option_map (λarg. (id, eval s arg)) (alookup entries from_l) End Definition extract_value_def: @@ -507,8 +507,8 @@ Inductive step_instr: *) (eval s a = <| poison := p; value := W1V tf |> ∧ l = Some (if tf = 1w then l1 else l2) ∧ - flookup prog s.ip.f = Some d ∧ - flookup d.blocks l = Some <| h := Head phis None; body := b |> ∧ + alookup prog s.ip.f = Some d ∧ + alookup d.blocks l = Some <| h := Head phis None; body := b |> ∧ map (do_phi l s) phis = map Some updates ⇒ step_instr prog s @@ -598,10 +598,10 @@ Inductive step_instr: (inc_pc (update_result r <| poison := v1.poison; value := PtrV w |> s))) ∧ (step_instr prog s - (Icmp c t a1 a2) + (Icmp r c t a1 a2) (inc_pc (update_result r (do_icmp c (eval s a1) (eval s a2)) s))) ∧ - (flookup prog fname = Some d + (alookup prog fname = Some d ⇒ step_instr prog s (Call r t fname targs) @@ -629,8 +629,8 @@ Inductive step_instr: End Inductive next_instr: - flookup p s.ip.f = Some d ∧ - flookup d.blocks s.ip.b = Some b ∧ + alookup p s.ip.f = Some d ∧ + alookup d.blocks s.ip.b = Some b ∧ s.ip.i < length b.body ⇒ next_instr p s (el s.ip.i b.body) @@ -673,22 +673,22 @@ End (* Instruction pointer points to an instruction *) Definition ip_ok_def: ip_ok p ip ⇔ - ∃dec block. flookup p ip.f = Some dec ∧ flookup dec.blocks ip.b = Some block ∧ ip.i < length block.body + ∃dec block. alookup p ip.f = Some dec ∧ alookup dec.blocks ip.b = Some block ∧ ip.i < length block.body End Definition prog_ok_def: prog_ok p ⇔ ((* All blocks end with terminators *) ∀fname dec bname block. - flookup p fname = Some dec ∧ - flookup dec.blocks bname = Some block + alookup p fname = Some dec ∧ + alookup dec.blocks bname = Some block ⇒ block.body ≠ [] ∧ terminator (last block.body)) ∧ ((* All functions have an entry block *) ∀fname dec. - flookup p fname = Some dec ⇒ ∃block. flookup dec.blocks None = Some block) ∧ + alookup p fname = Some dec ⇒ ∃block. alookup dec.blocks None = Some block) ∧ (* There is a main function *) - ∃dec. flookup p (Fn "main") = Some dec + ∃dec. alookup p (Fn "main") = Some dec End (* All call frames have a good return address, and the stack allocations of the diff --git a/sledge/semantics/llvm_propScript.sml b/sledge/semantics/llvm_propScript.sml index 20b8db6c6..8d6f5cca0 100644 --- a/sledge/semantics/llvm_propScript.sml +++ b/sledge/semantics/llvm_propScript.sml @@ -715,7 +715,7 @@ Proof fs [state_invariant_def] >> rw [] >- ( rw [ip_ok_def] >> fs [prog_ok_def, NOT_NIL_EQ_LENGTH_NOT_0] >> - qpat_x_assum `flookup _ (Fn "main") = _` kall_tac >> + qpat_x_assum `alookup _ (Fn "main") = _` kall_tac >> last_x_assum drule >> disch_then drule >> fs []) >- (fs [allocations_ok_def] >> metis_tac []) >- (fs [heap_ok_def] >> metis_tac []) @@ -725,7 +725,7 @@ Proof fs [state_invariant_def] >> rw [] >- ( rw [ip_ok_def] >> fs [prog_ok_def, NOT_NIL_EQ_LENGTH_NOT_0] >> - qpat_x_assum `flookup _ (Fn "main") = _` kall_tac >> + qpat_x_assum `alookup _ (Fn "main") = _` kall_tac >> last_x_assum drule >> disch_then drule >> fs []) >- (fs [allocations_ok_def] >> metis_tac []) >- (fs [heap_ok_def] >> metis_tac []) diff --git a/sledge/semantics/llvm_to_llairScript.sml b/sledge/semantics/llvm_to_llairScript.sml new file mode 100644 index 000000000..5813cf6e5 --- /dev/null +++ b/sledge/semantics/llvm_to_llairScript.sml @@ -0,0 +1,349 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +(* Transformation from llvm to llair *) + +open HolKernel boolLib bossLib Parse; +open arithmeticTheory pred_setTheory; +open settingsTheory llvmTheory llairTheory; + +new_theory "llvm_to_llair"; + +numLib.prefer_num (); + +Definition the_def: + (the None x = x) ∧ + (the (Some x) _ = x) +End + +Definition find_name_def: + find_name used new suff = + let n = new ++ (toString suff) in + if n ∉ used ∨ ¬finite used then + n + else + find_name used new (suff + 1n) +Termination + WF_REL_TAC `measure (λ(u,new,s). card { str | ?n. str = new++toString n ∧ str ∈ u ∧ s ≤ n })` >> rw [] >> + qmatch_abbrev_tac `card s1 < card s2` >> + `s2 ⊆ used` by rw [Abbr `s1`, Abbr `s2`, SUBSET_DEF] >> + `s1 ⊆ s2` by (rw [Abbr `s1`, Abbr `s2`, SUBSET_DEF] >> qexists_tac `n` >> rw []) >> + `s1 ≠ s2` + by ( + rw [Abbr `s1`, Abbr `s2`, EXTENSION] >> qexists_tac `new ++ toString suff` >> rw []) >> + metis_tac [CARD_SUBSET, SUBSET_FINITE, SUBSET_EQ_CARD, LESS_OR_EQ] +End + +Definition gen_name_def: + gen_name used new = + if new ∈ used then + find_name used new 0 + else + new +End + +Definition gen_names_def: + (gen_names used [] = (used, [])) ∧ + (gen_names used (n::ns) = + let n = gen_name used n in + let (used, names) = gen_names ({n} ∪ used) ns in + (used, n::names)) +End + +Definition translate_size_def: + (translate_size llvm$W1 = 1) ∧ + (translate_size W8 = 8) ∧ + (translate_size W32 = 32) ∧ + (translate_size W64 = 64) +End + +(* TODO *) +Definition translate_ty_def: + translate_ty = ARB : ty -> typ +End + +Definition translate_glob_var_def: + translate_glob_var (Glob_var g) = Var_name g +End + +Definition translate_reg_def: + translate_reg (Reg r) = Var_name r +End + +Definition translate_label_def: + translate_label f (Lab l) = Lab_name f l +End + +Definition translate_const_def: + (translate_const (IntC s i) = Integer i (IntegerT (translate_size s))) ∧ + (translate_const (StrC tcs) = + Record (map (λ(ty, c). translate_const c) tcs)) ∧ + (translate_const (ArrC tcs) = + Record (map (λ(ty, c). translate_const c) tcs)) ∧ + (translate_const (GlobalC g) = Var (translate_glob_var g)) ∧ + (* TODO *) + (translate_const (GepC _ _ _ _) = ARB) ∧ + (translate_const UndefC = Nondet) +Termination + WF_REL_TAC `measure const_size` >> + Induct_on `tcs` >> rw [] >> rw [const_size_def] >> + first_x_assum drule >> decide_tac +End + +Definition translate_arg_def: + (translate_arg emap (Constant c) = translate_const c) ∧ + (translate_arg emap (Variable r) = + case flookup emap r of + | None => Var (translate_reg r) + | Some e => e) +End + +Definition translate_updatevalue_def: + (translate_updatevalue a v [] = v) ∧ + (translate_updatevalue a v (c::cs) = + let c' = translate_const c in + Update a c' (translate_updatevalue (Select a c') v cs)) +End + +(* TODO *) +Definition translate_instr_to_exp_def: + (translate_instr_to_exp emap (llvm$Sub _ _ _ ty a1 a2) = + llair$Sub (translate_ty ty) (translate_arg emap a1) (translate_arg emap a2)) ∧ + (translate_instr_to_exp emap (Extractvalue _ (_, a) cs) = + foldl (λe c. Select e (translate_const c)) (translate_arg emap a) cs) ∧ + (translate_instr_to_exp emap (Insertvalue _ (_, a1) (_, a2) cs) = + translate_updatevalue (translate_arg emap a1) (translate_arg emap a2) cs) +End + +(* This translation of insertvalue to update and select is quadratic in the + * number of indices, but we haven't observed clang-generated code with multiple + * indices. + * + * Insertvalue a v [c1; c2; c3] becomes + * + * Up a c1 (Up (Sel a c1) c2 (Up (Sel (Sel a c1) c2) c3 v)) + * + * We could store each of the selections and get a linear size list of + * instructions instead of a single expression. + * + * Examples: + * EVAL ``translate_instr_to_exp fempty (Extractvalue r (t,a) [c1; c2; c3; c4; c5])`` + ⊢ translate_instr_to_exp fempty (Extractvalue r (t,a) [c1; c2; c3; c4; c5]) = + Select + (Select + (Select + (Select (Select (translate_arg fempty a) (translate_const c1)) + (translate_const c2)) (translate_const c3)) + (translate_const c4)) (translate_const c5): thm + * + * EVAL ``translate_instr_to_exp fempty (Insertvalue r (t,a) (t,v) [c1; c2; c3; c4; c5])`` + ⊢ translate_instr_to_exp fempty (Insertvalue r (t,a) (t,v) [c1; c2; c3; c4; c5]) = + Update (translate_arg fempty a) (translate_const c1) + (Update (Select (translate_arg fempty a) (translate_const c1)) + (translate_const c2) + (Update + (Select (Select (translate_arg fempty a) (translate_const c1)) + (translate_const c2)) (translate_const c3) + (Update + (Select + (Select + (Select (translate_arg fempty a) (translate_const c1)) + (translate_const c2)) (translate_const c3)) + (translate_const c4) + (Update + (Select + (Select + (Select + (Select (translate_arg fempty a) + (translate_const c1)) (translate_const c2)) + (translate_const c3)) (translate_const c4)) + (translate_const c5) (translate_arg fempty v))))): thm + * + * *) + + +(* TODO *) +Definition translate_instr_to_inst_def: + (translate_instr_to_inst emap (llvm$Store (t1, a1) (t2, a2)) = + llair$Store (translate_arg emap a1) (translate_arg emap a2) (sizeof t2)) ∧ + (translate_instr_to_inst emap (Load r t (t1, a1)) = + Load (translate_reg r) (translate_arg emap a1) (sizeof t)) +End + +(* TODO *) +Definition translate_instr_to_term_def: + translate_instr_to_term f emap (Br a l1 l2) = + Iswitch (translate_arg emap a) [translate_label f l2; translate_label f l1] +End + +Datatype ` + instr_class = + | Exp reg + | Non_exp + | Term + | Call`; + +Definition classify_instr_def: + (classify_instr (Call _ _ _ _) = Call) ∧ + (classify_instr (Ret _) = Term) ∧ + (classify_instr (Br _ _ _) = Term) ∧ + (classify_instr (Invoke _ _ _ _ _ _) = Term) ∧ + (classify_instr Unreachable = Term) ∧ + (classify_instr (Load _ _ _) = Non_exp) ∧ + (classify_instr (Store _ _) = Non_exp) ∧ + (classify_instr (Cxa_throw _ _ _) = Non_exp) ∧ + (classify_instr Cxa_end_catch = Non_exp) ∧ + (classify_instr (Cxa_begin_catch _ _) = Non_exp) ∧ + (classify_instr (Sub r _ _ _ _ _) = Exp r) ∧ + (classify_instr (Extractvalue r _ _) = Exp r) ∧ + (classify_instr (Insertvalue r _ _ _) = Exp r) ∧ + (classify_instr (Alloca r _ _) = Exp r) ∧ + (classify_instr (Gep r _ _) = Exp r) ∧ + (classify_instr (Ptrtoint r _ _) = Exp r) ∧ + (classify_instr (Inttoptr r _ _) = Exp r) ∧ + (classify_instr (Icmp r _ _ _ _) = Exp r) ∧ + (classify_instr (Cxa_allocate_exn r _) = Exp r) ∧ + (classify_instr (Cxa_get_exception_ptr r _) = Exp r) +End + +(* Translate a list of instructions into an block. f is the name of the function + * that the instructions are in, live_out is the set of variables that are live + * on exit of the instructions, emap is a mapping of registers to expressions + * that compute the value that should have been in the expression. + * + * This tries to build large expressions, and omits intermediate assignments + * wherever possible. However, if a register is live on the exit to the block, + * we can't omit the assignment to it. + * For example: + * r2 = sub r0 r1 + * r3 = sub r2 r1 + * r4 = sub r3 r0 + * + * becomes + * r4 = sub (sub (sub r0 r1) r1) r0 + * + * if r4 is the only register live at the exit of the block + * + * TODO: make this more aggressive and build expressions across blocks + *) +Definition translate_instrs_def: + (translate_instrs f live_out emap [] = <| cmnd := []; term := Unreachable |>) ∧ + (translate_instrs f live_out emap (i :: is) = + case classify_instr i of + | Exp r => + let x = translate_reg r in + let e = translate_instr_to_exp emap i in + if r ∈ live_out then + let b = translate_instrs f live_out (emap |+ (r, Var x)) is in + b with cmnd := Move [(x, e)] :: b.cmnd + else + translate_instrs f live_out (emap |+ (r, e)) is + | Non_exp => + let b = translate_instrs f live_out emap is in + b with cmnd := translate_instr_to_inst emap i :: b.cmnd + | Term => + <| cmnd := []; term := translate_instr_to_term f emap i |> + (* TODO *) + | Call => ARB) +End + +Definition dest_label_def: + dest_label (Lab s) = s +End + +Definition dest_phi_def: + dest_phi (Phi r _ largs) = (r, largs) +End + +Definition translate_label_opt_def: + (translate_label_opt f entry None = Lab_name f entry) ∧ + (translate_label_opt f entry (Some l) = translate_label f l) +End + +Definition translate_header_def: + (translate_header f entry Entry = []) ∧ + (translate_header f entry (Head phis _) = + map + (λ(r, largs). + (translate_reg r, + map (λ(l, arg). (translate_label_opt f entry l, translate_arg fempty arg)) largs)) + (map dest_phi phis)) +End + +Definition translate_block_def: + translate_block f entry_n live_out (l, b) = + (Lab_name f (the (option_map dest_label l) entry_n), + (translate_header f entry_n b.h, translate_instrs f live_out fempty b.body)) +End + +(* Given a label and phi node, get the assignment for that incoming label. It's + * convenient to return a list, but we expect there to be exactly 1 element. *) +Definition build_move_for_lab_def: + build_move_for_lab l (r, les) = + let les = filter (λ(l', e). l = l') les in + map (λ(l', e). (r,e)) les +End + +(* Given a list of phis and a label, get the move corresponding to entering + * the block targeted by l_to from block l_from *) +Definition generate_move_block_def: + generate_move_block phis l_from l_to = + let t = Iswitch (Integer 0 (IntegerT 1)) [l_to] in + case alookup phis l_to of + | None => <| cmnd := [Move []]; term := t |> + | Some (phis, _) => + <| cmnd := [Move (flat (map (build_move_for_lab l_from) phis))]; + term := t |> +End + +Definition label_name_def: + label_to_name (Lab_name _ l) = l +End + +(* Given association list of labels and phi-block pairs, and a particular block, + * build the new move blocks for its terminator *) +Definition generate_move_blocks_def: + generate_move_blocks f used_names bs (l_from, (_, body)) = + case body.term of + | Iswitch e ls => + let (used_names, new_names) = gen_names used_names (map label_to_name ls) in + let mb = map2 (λl_to new. (Lab_name f new, generate_move_block bs l_from l_to)) ls new_names in + (used_names, (l_from, body with term := Iswitch e (map fst mb)) :: mb) +End + +Definition generate_move_blocks_list_def: + (generate_move_blocks_list f used_names bs [] = (used_names, [])) ∧ + (generate_move_blocks_list f used_names bs (b::bs') = + let (used_names, new_blocks) = generate_move_blocks f used_names bs b in + let (used_names, new_blocks2) = + generate_move_blocks_list f used_names bs bs' + in + (used_names, new_blocks :: new_blocks2)) +End + +(* Givel an association list of labels and phi-block pairs, remove the phi's, + * by generating an extra block along each control flow edge that does the move + * corresponding to the relevant phis. *) +Definition remove_phis_def: + remove_phis f used_names bs = flat (snd (generate_move_blocks_list f used_names bs bs)) +End + +Definition translate_def_def: + translate_def (Lab f) d = + let used_names = ARB in + let entry_name = gen_name used_names "entry" in + let bs = map (translate_block f entry_name UNIV) d.blocks in + <| params := map translate_reg d.params; + (* TODO: calculate these from the produced llair, and not the llvm *) + locals := ARB; + entry := Lab_name f entry_name; + cfg := remove_phis f used_names bs; + freturn := ARB; + fthrow := ARB |> +End + +export_theory (); diff --git a/sledge/semantics/settingsScript.sml b/sledge/semantics/settingsScript.sml index af03c3ef6..f5cb4095f 100644 --- a/sledge/semantics/settingsScript.sml +++ b/sledge/semantics/settingsScript.sml @@ -48,5 +48,10 @@ overload_on ("image", ``IMAGE``); overload_on ("bigunion", ``BIGUNION``); overload_on ("finite", ``FINITE``); overload_on ("card", ``CARD``); +overload_on ("map_keys", ``MAP_KEYS``); +overload_on ("alookup", ``ALOOKUP``); +overload_on ("filter", ``FILTER``); +overload_on ("map2", ``list$MAP2``); +overload_on ("foldl", ``FOLDL``); export_theory ();