From 84883127afa98810893c1819ecb5acd01e6a7316 Mon Sep 17 00:00:00 2001 From: Scott Owens Date: Fri, 23 Aug 2019 07:12:19 -0700 Subject: [PATCH] Add a skeleton of an approach to llvm->llair Summary: This sketches out how translation can be approached. It is partially based on the Sledge code. For basic blocks, isn't based on the Sledge code, but just my own thoughts as a starting point. Essentially, we are trying to build up larger expressions, and so not assigning to temporary registers that don't live past the end of the block. This does remove sharing, so a fancier approach could check for multiple uses of end-of-block dead registers, or look at the sizes of expressions. The approach should be flexible enough to accommodate such changes. Fix icmp syntax Using finite maps is elegant in the semantics, but awkward for writing the translation function. Refactor the mappings from labels to functions and from labels to blocks to use association lists instead. To remove phi nodes, the translation takes every edge in the control flow graph and makes a new basic block that contains a single parallel move instruction that corresponds to the action of the phi node of the target block. Reviewed By: jberdine Differential Revision: D16831051 fbshipit-source-id: 005663e26 --- sledge/semantics/llairScript.sml | 4 +- sledge/semantics/llvmScript.sml | 32 +-- sledge/semantics/llvm_propScript.sml | 4 +- sledge/semantics/llvm_to_llairScript.sml | 349 +++++++++++++++++++++++ sledge/semantics/settingsScript.sml | 5 + 5 files changed, 374 insertions(+), 20 deletions(-) create mode 100644 sledge/semantics/llvm_to_llairScript.sml diff --git a/sledge/semantics/llairScript.sml b/sledge/semantics/llairScript.sml index a7d02aa96..5c6657ce4 100644 --- a/sledge/semantics/llairScript.sml +++ b/sledge/semantics/llairScript.sml @@ -97,7 +97,7 @@ Datatype ` func = <| params : var list; locals : var set; entry : label; - cfg : label |-> block; + cfg : (label, block) alist; freturn : var; fthrow : var |>`; @@ -106,7 +106,7 @@ Datatype ` global = <| var : var; init : (exp # int) option; typ: typ |>`; Datatype ` - llair = <| globals : global list; functions : label |-> func |>`; + llair = <| globals : global list; functions : (label, func) alist |>`; (* ----- Semantic states ----- *) diff --git a/sledge/semantics/llvmScript.sml b/sledge/semantics/llvmScript.sml index 6bbef3036..7ca53c278 100644 --- a/sledge/semantics/llvmScript.sml +++ b/sledge/semantics/llvmScript.sml @@ -75,7 +75,7 @@ Datatype ` | Gep reg targ (targ list) | Ptrtoint reg targ ty | Inttoptr reg targ ty - | Icmp cond ty arg arg + | Icmp reg cond ty arg arg | Call reg ty fun_name (targ list) (* C++ runtime functions *) | Cxa_allocate_exn reg arg @@ -85,7 +85,7 @@ Datatype ` | Cxa_get_exception_ptr reg arg`; Datatype ` - phi = Phi reg ty (label option |-> arg)`; + phi = Phi reg ty ((label option, arg) alist)`; Datatype ` clause = Catch targ`; @@ -106,9 +106,9 @@ Datatype ` <| r : ty; params : reg list; (* None -> entry block, and Some name -> non-entry block *) - blocks : label option |-> block |>`; + blocks : (label option, block) alist |>`; -type_abbrev ("prog", ``:fun_name |-> def``); +type_abbrev ("prog", ``:(fun_name, def) alist``); Definition terminator_def: (terminator (Ret _) ⇔ T) ∧ @@ -443,7 +443,7 @@ End Definition do_phi_def: do_phi from_l s (Phi id _ entries) = - option_map (λarg. (id, eval s arg)) (flookup entries from_l) + option_map (λarg. (id, eval s arg)) (alookup entries from_l) End Definition extract_value_def: @@ -507,8 +507,8 @@ Inductive step_instr: *) (eval s a = <| poison := p; value := W1V tf |> ∧ l = Some (if tf = 1w then l1 else l2) ∧ - flookup prog s.ip.f = Some d ∧ - flookup d.blocks l = Some <| h := Head phis None; body := b |> ∧ + alookup prog s.ip.f = Some d ∧ + alookup d.blocks l = Some <| h := Head phis None; body := b |> ∧ map (do_phi l s) phis = map Some updates ⇒ step_instr prog s @@ -598,10 +598,10 @@ Inductive step_instr: (inc_pc (update_result r <| poison := v1.poison; value := PtrV w |> s))) ∧ (step_instr prog s - (Icmp c t a1 a2) + (Icmp r c t a1 a2) (inc_pc (update_result r (do_icmp c (eval s a1) (eval s a2)) s))) ∧ - (flookup prog fname = Some d + (alookup prog fname = Some d ⇒ step_instr prog s (Call r t fname targs) @@ -629,8 +629,8 @@ Inductive step_instr: End Inductive next_instr: - flookup p s.ip.f = Some d ∧ - flookup d.blocks s.ip.b = Some b ∧ + alookup p s.ip.f = Some d ∧ + alookup d.blocks s.ip.b = Some b ∧ s.ip.i < length b.body ⇒ next_instr p s (el s.ip.i b.body) @@ -673,22 +673,22 @@ End (* Instruction pointer points to an instruction *) Definition ip_ok_def: ip_ok p ip ⇔ - ∃dec block. flookup p ip.f = Some dec ∧ flookup dec.blocks ip.b = Some block ∧ ip.i < length block.body + ∃dec block. alookup p ip.f = Some dec ∧ alookup dec.blocks ip.b = Some block ∧ ip.i < length block.body End Definition prog_ok_def: prog_ok p ⇔ ((* All blocks end with terminators *) ∀fname dec bname block. - flookup p fname = Some dec ∧ - flookup dec.blocks bname = Some block + alookup p fname = Some dec ∧ + alookup dec.blocks bname = Some block ⇒ block.body ≠ [] ∧ terminator (last block.body)) ∧ ((* All functions have an entry block *) ∀fname dec. - flookup p fname = Some dec ⇒ ∃block. flookup dec.blocks None = Some block) ∧ + alookup p fname = Some dec ⇒ ∃block. alookup dec.blocks None = Some block) ∧ (* There is a main function *) - ∃dec. flookup p (Fn "main") = Some dec + ∃dec. alookup p (Fn "main") = Some dec End (* All call frames have a good return address, and the stack allocations of the diff --git a/sledge/semantics/llvm_propScript.sml b/sledge/semantics/llvm_propScript.sml index 20b8db6c6..8d6f5cca0 100644 --- a/sledge/semantics/llvm_propScript.sml +++ b/sledge/semantics/llvm_propScript.sml @@ -715,7 +715,7 @@ Proof fs [state_invariant_def] >> rw [] >- ( rw [ip_ok_def] >> fs [prog_ok_def, NOT_NIL_EQ_LENGTH_NOT_0] >> - qpat_x_assum `flookup _ (Fn "main") = _` kall_tac >> + qpat_x_assum `alookup _ (Fn "main") = _` kall_tac >> last_x_assum drule >> disch_then drule >> fs []) >- (fs [allocations_ok_def] >> metis_tac []) >- (fs [heap_ok_def] >> metis_tac []) @@ -725,7 +725,7 @@ Proof fs [state_invariant_def] >> rw [] >- ( rw [ip_ok_def] >> fs [prog_ok_def, NOT_NIL_EQ_LENGTH_NOT_0] >> - qpat_x_assum `flookup _ (Fn "main") = _` kall_tac >> + qpat_x_assum `alookup _ (Fn "main") = _` kall_tac >> last_x_assum drule >> disch_then drule >> fs []) >- (fs [allocations_ok_def] >> metis_tac []) >- (fs [heap_ok_def] >> metis_tac []) diff --git a/sledge/semantics/llvm_to_llairScript.sml b/sledge/semantics/llvm_to_llairScript.sml new file mode 100644 index 000000000..5813cf6e5 --- /dev/null +++ b/sledge/semantics/llvm_to_llairScript.sml @@ -0,0 +1,349 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +(* Transformation from llvm to llair *) + +open HolKernel boolLib bossLib Parse; +open arithmeticTheory pred_setTheory; +open settingsTheory llvmTheory llairTheory; + +new_theory "llvm_to_llair"; + +numLib.prefer_num (); + +Definition the_def: + (the None x = x) ∧ + (the (Some x) _ = x) +End + +Definition find_name_def: + find_name used new suff = + let n = new ++ (toString suff) in + if n ∉ used ∨ ¬finite used then + n + else + find_name used new (suff + 1n) +Termination + WF_REL_TAC `measure (λ(u,new,s). card { str | ?n. str = new++toString n ∧ str ∈ u ∧ s ≤ n })` >> rw [] >> + qmatch_abbrev_tac `card s1 < card s2` >> + `s2 ⊆ used` by rw [Abbr `s1`, Abbr `s2`, SUBSET_DEF] >> + `s1 ⊆ s2` by (rw [Abbr `s1`, Abbr `s2`, SUBSET_DEF] >> qexists_tac `n` >> rw []) >> + `s1 ≠ s2` + by ( + rw [Abbr `s1`, Abbr `s2`, EXTENSION] >> qexists_tac `new ++ toString suff` >> rw []) >> + metis_tac [CARD_SUBSET, SUBSET_FINITE, SUBSET_EQ_CARD, LESS_OR_EQ] +End + +Definition gen_name_def: + gen_name used new = + if new ∈ used then + find_name used new 0 + else + new +End + +Definition gen_names_def: + (gen_names used [] = (used, [])) ∧ + (gen_names used (n::ns) = + let n = gen_name used n in + let (used, names) = gen_names ({n} ∪ used) ns in + (used, n::names)) +End + +Definition translate_size_def: + (translate_size llvm$W1 = 1) ∧ + (translate_size W8 = 8) ∧ + (translate_size W32 = 32) ∧ + (translate_size W64 = 64) +End + +(* TODO *) +Definition translate_ty_def: + translate_ty = ARB : ty -> typ +End + +Definition translate_glob_var_def: + translate_glob_var (Glob_var g) = Var_name g +End + +Definition translate_reg_def: + translate_reg (Reg r) = Var_name r +End + +Definition translate_label_def: + translate_label f (Lab l) = Lab_name f l +End + +Definition translate_const_def: + (translate_const (IntC s i) = Integer i (IntegerT (translate_size s))) ∧ + (translate_const (StrC tcs) = + Record (map (λ(ty, c). translate_const c) tcs)) ∧ + (translate_const (ArrC tcs) = + Record (map (λ(ty, c). translate_const c) tcs)) ∧ + (translate_const (GlobalC g) = Var (translate_glob_var g)) ∧ + (* TODO *) + (translate_const (GepC _ _ _ _) = ARB) ∧ + (translate_const UndefC = Nondet) +Termination + WF_REL_TAC `measure const_size` >> + Induct_on `tcs` >> rw [] >> rw [const_size_def] >> + first_x_assum drule >> decide_tac +End + +Definition translate_arg_def: + (translate_arg emap (Constant c) = translate_const c) ∧ + (translate_arg emap (Variable r) = + case flookup emap r of + | None => Var (translate_reg r) + | Some e => e) +End + +Definition translate_updatevalue_def: + (translate_updatevalue a v [] = v) ∧ + (translate_updatevalue a v (c::cs) = + let c' = translate_const c in + Update a c' (translate_updatevalue (Select a c') v cs)) +End + +(* TODO *) +Definition translate_instr_to_exp_def: + (translate_instr_to_exp emap (llvm$Sub _ _ _ ty a1 a2) = + llair$Sub (translate_ty ty) (translate_arg emap a1) (translate_arg emap a2)) ∧ + (translate_instr_to_exp emap (Extractvalue _ (_, a) cs) = + foldl (λe c. Select e (translate_const c)) (translate_arg emap a) cs) ∧ + (translate_instr_to_exp emap (Insertvalue _ (_, a1) (_, a2) cs) = + translate_updatevalue (translate_arg emap a1) (translate_arg emap a2) cs) +End + +(* This translation of insertvalue to update and select is quadratic in the + * number of indices, but we haven't observed clang-generated code with multiple + * indices. + * + * Insertvalue a v [c1; c2; c3] becomes + * + * Up a c1 (Up (Sel a c1) c2 (Up (Sel (Sel a c1) c2) c3 v)) + * + * We could store each of the selections and get a linear size list of + * instructions instead of a single expression. + * + * Examples: + * EVAL ``translate_instr_to_exp fempty (Extractvalue r (t,a) [c1; c2; c3; c4; c5])`` + ⊢ translate_instr_to_exp fempty (Extractvalue r (t,a) [c1; c2; c3; c4; c5]) = + Select + (Select + (Select + (Select (Select (translate_arg fempty a) (translate_const c1)) + (translate_const c2)) (translate_const c3)) + (translate_const c4)) (translate_const c5): thm + * + * EVAL ``translate_instr_to_exp fempty (Insertvalue r (t,a) (t,v) [c1; c2; c3; c4; c5])`` + ⊢ translate_instr_to_exp fempty (Insertvalue r (t,a) (t,v) [c1; c2; c3; c4; c5]) = + Update (translate_arg fempty a) (translate_const c1) + (Update (Select (translate_arg fempty a) (translate_const c1)) + (translate_const c2) + (Update + (Select (Select (translate_arg fempty a) (translate_const c1)) + (translate_const c2)) (translate_const c3) + (Update + (Select + (Select + (Select (translate_arg fempty a) (translate_const c1)) + (translate_const c2)) (translate_const c3)) + (translate_const c4) + (Update + (Select + (Select + (Select + (Select (translate_arg fempty a) + (translate_const c1)) (translate_const c2)) + (translate_const c3)) (translate_const c4)) + (translate_const c5) (translate_arg fempty v))))): thm + * + * *) + + +(* TODO *) +Definition translate_instr_to_inst_def: + (translate_instr_to_inst emap (llvm$Store (t1, a1) (t2, a2)) = + llair$Store (translate_arg emap a1) (translate_arg emap a2) (sizeof t2)) ∧ + (translate_instr_to_inst emap (Load r t (t1, a1)) = + Load (translate_reg r) (translate_arg emap a1) (sizeof t)) +End + +(* TODO *) +Definition translate_instr_to_term_def: + translate_instr_to_term f emap (Br a l1 l2) = + Iswitch (translate_arg emap a) [translate_label f l2; translate_label f l1] +End + +Datatype ` + instr_class = + | Exp reg + | Non_exp + | Term + | Call`; + +Definition classify_instr_def: + (classify_instr (Call _ _ _ _) = Call) ∧ + (classify_instr (Ret _) = Term) ∧ + (classify_instr (Br _ _ _) = Term) ∧ + (classify_instr (Invoke _ _ _ _ _ _) = Term) ∧ + (classify_instr Unreachable = Term) ∧ + (classify_instr (Load _ _ _) = Non_exp) ∧ + (classify_instr (Store _ _) = Non_exp) ∧ + (classify_instr (Cxa_throw _ _ _) = Non_exp) ∧ + (classify_instr Cxa_end_catch = Non_exp) ∧ + (classify_instr (Cxa_begin_catch _ _) = Non_exp) ∧ + (classify_instr (Sub r _ _ _ _ _) = Exp r) ∧ + (classify_instr (Extractvalue r _ _) = Exp r) ∧ + (classify_instr (Insertvalue r _ _ _) = Exp r) ∧ + (classify_instr (Alloca r _ _) = Exp r) ∧ + (classify_instr (Gep r _ _) = Exp r) ∧ + (classify_instr (Ptrtoint r _ _) = Exp r) ∧ + (classify_instr (Inttoptr r _ _) = Exp r) ∧ + (classify_instr (Icmp r _ _ _ _) = Exp r) ∧ + (classify_instr (Cxa_allocate_exn r _) = Exp r) ∧ + (classify_instr (Cxa_get_exception_ptr r _) = Exp r) +End + +(* Translate a list of instructions into an block. f is the name of the function + * that the instructions are in, live_out is the set of variables that are live + * on exit of the instructions, emap is a mapping of registers to expressions + * that compute the value that should have been in the expression. + * + * This tries to build large expressions, and omits intermediate assignments + * wherever possible. However, if a register is live on the exit to the block, + * we can't omit the assignment to it. + * For example: + * r2 = sub r0 r1 + * r3 = sub r2 r1 + * r4 = sub r3 r0 + * + * becomes + * r4 = sub (sub (sub r0 r1) r1) r0 + * + * if r4 is the only register live at the exit of the block + * + * TODO: make this more aggressive and build expressions across blocks + *) +Definition translate_instrs_def: + (translate_instrs f live_out emap [] = <| cmnd := []; term := Unreachable |>) ∧ + (translate_instrs f live_out emap (i :: is) = + case classify_instr i of + | Exp r => + let x = translate_reg r in + let e = translate_instr_to_exp emap i in + if r ∈ live_out then + let b = translate_instrs f live_out (emap |+ (r, Var x)) is in + b with cmnd := Move [(x, e)] :: b.cmnd + else + translate_instrs f live_out (emap |+ (r, e)) is + | Non_exp => + let b = translate_instrs f live_out emap is in + b with cmnd := translate_instr_to_inst emap i :: b.cmnd + | Term => + <| cmnd := []; term := translate_instr_to_term f emap i |> + (* TODO *) + | Call => ARB) +End + +Definition dest_label_def: + dest_label (Lab s) = s +End + +Definition dest_phi_def: + dest_phi (Phi r _ largs) = (r, largs) +End + +Definition translate_label_opt_def: + (translate_label_opt f entry None = Lab_name f entry) ∧ + (translate_label_opt f entry (Some l) = translate_label f l) +End + +Definition translate_header_def: + (translate_header f entry Entry = []) ∧ + (translate_header f entry (Head phis _) = + map + (λ(r, largs). + (translate_reg r, + map (λ(l, arg). (translate_label_opt f entry l, translate_arg fempty arg)) largs)) + (map dest_phi phis)) +End + +Definition translate_block_def: + translate_block f entry_n live_out (l, b) = + (Lab_name f (the (option_map dest_label l) entry_n), + (translate_header f entry_n b.h, translate_instrs f live_out fempty b.body)) +End + +(* Given a label and phi node, get the assignment for that incoming label. It's + * convenient to return a list, but we expect there to be exactly 1 element. *) +Definition build_move_for_lab_def: + build_move_for_lab l (r, les) = + let les = filter (λ(l', e). l = l') les in + map (λ(l', e). (r,e)) les +End + +(* Given a list of phis and a label, get the move corresponding to entering + * the block targeted by l_to from block l_from *) +Definition generate_move_block_def: + generate_move_block phis l_from l_to = + let t = Iswitch (Integer 0 (IntegerT 1)) [l_to] in + case alookup phis l_to of + | None => <| cmnd := [Move []]; term := t |> + | Some (phis, _) => + <| cmnd := [Move (flat (map (build_move_for_lab l_from) phis))]; + term := t |> +End + +Definition label_name_def: + label_to_name (Lab_name _ l) = l +End + +(* Given association list of labels and phi-block pairs, and a particular block, + * build the new move blocks for its terminator *) +Definition generate_move_blocks_def: + generate_move_blocks f used_names bs (l_from, (_, body)) = + case body.term of + | Iswitch e ls => + let (used_names, new_names) = gen_names used_names (map label_to_name ls) in + let mb = map2 (λl_to new. (Lab_name f new, generate_move_block bs l_from l_to)) ls new_names in + (used_names, (l_from, body with term := Iswitch e (map fst mb)) :: mb) +End + +Definition generate_move_blocks_list_def: + (generate_move_blocks_list f used_names bs [] = (used_names, [])) ∧ + (generate_move_blocks_list f used_names bs (b::bs') = + let (used_names, new_blocks) = generate_move_blocks f used_names bs b in + let (used_names, new_blocks2) = + generate_move_blocks_list f used_names bs bs' + in + (used_names, new_blocks :: new_blocks2)) +End + +(* Givel an association list of labels and phi-block pairs, remove the phi's, + * by generating an extra block along each control flow edge that does the move + * corresponding to the relevant phis. *) +Definition remove_phis_def: + remove_phis f used_names bs = flat (snd (generate_move_blocks_list f used_names bs bs)) +End + +Definition translate_def_def: + translate_def (Lab f) d = + let used_names = ARB in + let entry_name = gen_name used_names "entry" in + let bs = map (translate_block f entry_name UNIV) d.blocks in + <| params := map translate_reg d.params; + (* TODO: calculate these from the produced llair, and not the llvm *) + locals := ARB; + entry := Lab_name f entry_name; + cfg := remove_phis f used_names bs; + freturn := ARB; + fthrow := ARB |> +End + +export_theory (); diff --git a/sledge/semantics/settingsScript.sml b/sledge/semantics/settingsScript.sml index af03c3ef6..f5cb4095f 100644 --- a/sledge/semantics/settingsScript.sml +++ b/sledge/semantics/settingsScript.sml @@ -48,5 +48,10 @@ overload_on ("image", ``IMAGE``); overload_on ("bigunion", ``BIGUNION``); overload_on ("finite", ``FINITE``); overload_on ("card", ``CARD``); +overload_on ("map_keys", ``MAP_KEYS``); +overload_on ("alookup", ``ALOOKUP``); +overload_on ("filter", ``FILTER``); +overload_on ("map2", ``list$MAP2``); +overload_on ("foldl", ``FOLDL``); export_theory ();