You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

777 lines
22 KiB

(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
(* A mini-LLVM model, focussing on the semantics of the parts of the IR that
* are relevant for the LLVM -> LLAIR translation, especially exceptions. *)
open HolKernel boolLib bossLib Parse;
open settingsTheory;
new_theory "llvm";
numLib.prefer_num ();
(* ----- Abstract syntax ----- *)
(* Only support 1, 8, 32, and 64 bit words for now *)
Datatype:
size = W1 | W8 | W32 | W64
End
Datatype:
ty =
| FunT ty (ty list)
| IntT size
| PtrT ty
| ArrT num ty
| StrT (ty list)
End
Datatype:
label = Lab string
End
Datatype:
reg = Reg string
End
Datatype:
glob_var = Glob_var string
End
Datatype:
fun_name = Fn string
End
Datatype:
const =
| IntC size int
| StrC ((ty # const) list)
| ArrC ((ty # const) list)
| GepC ty const (ty # const) ((ty # const) list)
| GlobalC glob_var
| UndefC
End
Datatype:
arg = Constant const | Variable reg
End
Type targ = ``:ty # arg``
Datatype:
cond = Eq | Ult | Slt
End
Datatype:
instr =
(* Terminators *)
| Ret targ
| Br arg label label
| Invoke reg ty arg (targ list) label label
| Unreachable
(* Non-terminators *)
| Sub reg bool bool ty arg arg
| Extractvalue reg targ (const list)
| Insertvalue reg targ targ (const list)
| Alloca reg ty targ
| Load reg ty targ
| Store targ targ
| Gep reg ty targ (targ list)
| Ptrtoint reg targ ty
| Inttoptr reg targ ty
| Icmp reg cond ty arg arg
| Call reg ty fun_name (targ list)
(* C++ runtime functions *)
| Cxa_allocate_exn reg arg
| Cxa_throw arg arg arg
| Cxa_begin_catch reg arg
| Cxa_end_catch
| Cxa_get_exception_ptr reg arg
End
Datatype:
phi = Phi reg ty ((label option, arg) alist)
End
Datatype:
clause = Catch targ
End
Datatype:
landingpad = Landingpad ty bool (clause list)
End
Datatype:
blockHeader =
| Entry
| Head (phi list) (landingpad option)
End
Datatype:
block = <| h : blockHeader; body : instr list |>
End
Datatype:
def =
<| r : ty;
params : (ty # reg) list;
(* None -> entry block, and Some name -> non-entry block *)
blocks : (label option, block) alist |>
End
Type prog = ``:(fun_name, def) alist``
Definition terminator_def:
(terminator (Ret _) T)
(terminator (Br _ _ _) T)
(terminator (Invoke _ _ _ _ _ _) T)
(terminator Unreachable T)
(terminator _ F)
End
(* ----- Semantic states ----- *)
Datatype:
addr = A num
End
Datatype:
v =
| W1V word1
| W8V word8
| W32V word32
| W64V word64
| AggV (v list)
| PtrV word64
| UndefV
End
Datatype:
pv = <| poison : bool; value : v |>
End
Datatype:
pc = <| f : fun_name; b : label option; i : num |>
End
Datatype:
frame = <| ret : pc; saved_locals : reg |-> pv; result_var : reg; stack_allocs : addr list |>
End
Datatype:
state =
<| ip : pc;
(* Keep the size of the global with its memory address *)
globals : glob_var |-> (num # word64);
locals : reg |-> pv;
stack : frame list;
(* The set of allocated ranges. The bool indicates whether the range is
* free-able or not *)
allocations : (bool # num # num) set;
(* A byte addressed heap, with a poison tag *)
heap : addr |-> bool # word8 |>
End
(* ----- Things about types ----- *)
(* How many bytes a value of the given type occupies *)
Definition sizeof_def:
(sizeof (IntT W1) = 1)
(sizeof (IntT W8) = 1)
(sizeof (IntT W32) = 4)
(sizeof (IntT W64) = 8)
(sizeof (PtrT _) = 8)
(sizeof (ArrT n t) = n * sizeof t)
(sizeof (StrT ts) = sum (map sizeof ts))
Termination
WF_REL_TAC `measure ty_size` >> simp [] >>
Induct >> rw [definition "ty_size_def"] >> simp [] >>
first_x_assum drule >> decide_tac
End
Definition first_class_type_def:
(first_class_type (IntT _) T)
(first_class_type (PtrT _) T)
(first_class_type (ArrT _ t) first_class_type t)
(first_class_type (StrT ts) every first_class_type ts)
(first_class_type _ F)
Termination
WF_REL_TAC `measure ty_size` >>
rw [] >>
Induct_on `ts` >> rw [definition "ty_size_def"] >>
res_tac >> decide_tac
End
Definition indices_ok_def:
(indices_ok _ [] T)
(indices_ok (ArrT n t) (i::indices)
i < n indices_ok t indices)
(indices_ok (StrT ts) (i::indices)
i < length ts indices_ok (el i ts) indices)
(indices_ok _ _ F)
End
Inductive value_type:
(value_type (IntT W1) (W1V w1))
(value_type (IntT W8) (W8V w8))
(value_type (IntT W32) (W32V w32))
(value_type (IntT W64) (W64V w64))
(value_type (PtrT _) (PtrV w64))
(every (value_type t) vs length vs = n first_class_type t
value_type (ArrT n t) (AggV vs))
(list_rel value_type ts vs
value_type (StrT ts) (AggV vs))
End
(* ----- Semantic transitions ----- *)
Definition w64_cast_def:
(w64_cast w (IntT W1) = Some (W1V (w2w w)))
(w64_cast w (IntT W8) = Some (W8V (w2w w)))
(w64_cast w (IntT W32) = Some (W32V (w2w w)))
(w64_cast w (IntT W64) = Some (W64V w))
(w64_cast _ _ = None)
End
Definition cast_w64_def:
(cast_w64 (W1V w) = Some (w2w w))
(cast_w64 (W8V w) = Some (w2w w))
(cast_w64 (W32V w) = Some (w2w w))
(cast_w64 (W64V w) = Some w)
(cast_w64 _ = None)
End
Definition cast_num_def:
cast_num v = option_map w2n (cast_w64 v)
End
Definition bool_to_v_def:
bool_to_v b = if b then W1V 1w else W1V 0w
End
(* Calculate the offset given by a list of indices *)
Definition get_offset_def:
(get_offset _ [] = Some 0)
(get_offset (ArrT _ t) (i::is) =
case get_offset t is of
| None => None
| Some off => Some (i * sizeof t + off))
(get_offset (StrT ts) (i::is) =
if i < length ts then
case get_offset (el i ts) is of
| None => None
| Some off => Some (sum (map sizeof (take i ts)) + off)
else
None)
(get_offset _ _ = Some 0)
End
Definition eval_const_def:
(eval_const g (IntC W1 i) = W1V (i2w i))
(eval_const g (IntC W8 i) = W8V (i2w i))
(eval_const g (IntC W32 i) = W32V (i2w i))
(eval_const g (IntC W64 i) = W64V (i2w i))
(eval_const g (StrC tconsts) = AggV (map (eval_const g) (map snd tconsts)))
(eval_const g (ArrC tconsts) = AggV (map (eval_const g) (map snd tconsts)))
(eval_const g (GepC ty ptr (t, idx) indices) =
case (eval_const g ptr, cast_num (eval_const g idx)) of
| (PtrV w, Some n) =>
let ns = map (λ(t,ci). case cast_num (eval_const g ci) of None => 0 | Some n => n) indices in
(case get_offset ty ns of
| None => UndefV
| Some off => PtrV (n2w (w2n w + sizeof ty * n + off)))
| _ => UndefV)
(eval_const g (GlobalC var) =
case flookup g var of
| None => PtrV 0w
| Some (s,w) => PtrV w)
(eval_const g UndefC = UndefV)
Termination
WF_REL_TAC `measure (const_size o snd)` >> rw [listTheory.MEM_MAP] >>
TRY
(TRY (PairCases_on `y`) >> simp [] >>
Induct_on `tconsts` >> rw [] >> rw [definition "const_size_def"] >>
res_tac >> fs [] >> NO_TAC) >>
Induct_on `indices` >> rw [] >> rw [definition "const_size_def"] >>
fs []
End
Definition eval_def:
(eval s (Variable v) =
case flookup s.locals v of
| None => <| poison := F; value := W1V 0w |>
| Some v => v)
(eval s (Constant c) = <| poison := F; value := eval_const s.globals c |>)
End
Definition v2n_def:
(v2n (W1V b) = Some (if T then 1 else 0))
(v2n (W8V w8) = Some (w2n w8))
(v2n (W32V w32) = Some (w2n w32))
(v2n (W64V w64) = Some (w2n w64))
(v2n _ = None)
End
Definition interval_to_set_def:
interval_to_set (_, start,stop) =
{ n | start n n < stop }
End
Definition interval_ok_def:
interval_ok ((_:bool), i1, i2)
i1 i2 i2 < 2 ** 64
End
Definition is_allocated_def:
is_allocated b1 allocs
interval_ok b1
∃b2. b2 allocs fst b1 = fst b2 interval_to_set b1 interval_to_set b2
End
Definition is_free_def:
is_free b1 allocs
interval_ok b1
∀b2. b2 allocs interval_to_set b1 interval_to_set b2 =
End
Definition get_bytes_def:
get_bytes h (_, start, stop) =
map
(λoff.
case flookup h (A (start + off)) of
| None => (F, 0w)
| Some w => w)
(count_list (stop - start))
End
Definition set_bytes_def:
(set_bytes p [] n h = h)
(set_bytes p (b::bs) n h =
set_bytes p bs (Suc n) (h |+ (A n, (p, b))))
End
(* Allocate a free chunk of memory, and write non-deterministic bytes into it *)
Inductive allocate:
v2n v.value = Some m
b = (T, w2n w, w2n w + m * len)
is_free b s.allocations
length bytes = m * len
allocate s v len
(<| poison := v.poison; value := PtrV w |>,
s with <| allocations := { b } s.allocations;
heap := set_bytes v.poison bytes (w2n w) s.heap |>)
End
Definition deallocate_def:
deallocate addrs allocs h =
let to_remove = { (T, n, stop) | A n set addrs (T, n, stop) allocs } in
(allocs DIFF to_remove, fdiff h (image A (bigunion (image interval_to_set to_remove))))
End
(* Read len bytes from the list of bytes, and convert it into a word value,
* little-endian encoding *)
Definition le_read_w_def:
le_read_w len (bs : word8 list) =
if length bs < len then
(l2w 256 (map w2n bs), [])
else
(l2w 256 (map w2n (take len bs)), drop len bs)
End
(* Return len bytes that are the little-endian encoding of the argument word *)
Definition le_write_w_def:
le_write_w len w =
let (l : word8 list) = map n2w (w2l 256 w) in
take len (l ++ replicate (len - length l) 0w)
End
Definition bytes_to_value_def:
(bytes_to_value (IntT W1) (b::bs) = (W1V (w2w b), bs))
(bytes_to_value (IntT W8) (b::bs) = (W8V b, bs))
(bytes_to_value (IntT W32) bs = (W32V ## I) (le_read_w 4 bs))
(bytes_to_value (IntT W64) bs = (W64V ## I) (le_read_w 8 bs))
(bytes_to_value (PtrT _) bs = (PtrV ## I) (le_read_w 8 bs))
(bytes_to_value (ArrT n t) bs = (AggV ## I) (read_array n t bs))
(bytes_to_value (StrT ts) bs = (AggV ## I) (read_str ts bs))
(read_array 0 t bs = ([], bs))
(read_array (Suc n) t bs =
let (v, bs) = bytes_to_value t bs in
let (rest, bs) = read_array n t bs in
(v::rest, bs))
(read_str [] bs = ([], bs))
(read_str (t::ts) bs =
let (v, bs) = bytes_to_value t bs in
let (rest, bs) = read_str ts bs in
(v::rest, bs))
Termination
WF_REL_TAC `measure (λx. case x of
| INL (t, bs) => ty_size t
| INR (INL (n, t, bs)) => n + ty_size t
| INR (INR (ts, bs)) => ty1_size ts)`
End
Definition value_to_bytes_def:
(value_to_bytes (W1V w) = [w2w w])
(value_to_bytes (W8V w) = [w])
(value_to_bytes (W32V w) = le_write_w 4 w)
(value_to_bytes (W64V w) = le_write_w 8 w)
(value_to_bytes (PtrV n) = le_write_w 8 n)
(value_to_bytes (AggV vs) = flat (map value_to_bytes vs))
Termination
WF_REL_TAC `measure v_size` >>
Induct >> rw [definition "v_size_def"] >>
TRY (first_x_assum drule) >>
decide_tac
End
Definition do_sub_def:
do_sub (nuw:bool) (nsw:bool) (v1:pv) (v2:pv) =
let (diff, u_overflow, s_overflow) =
case (v1.value, v2.value) of
| (W1V w1, W1V w2) => (W1V ## I) (add_with_carry (w1, ¬w2, T))
| (W8V w1, W8V w2) => (W8V ## I) (add_with_carry (w1, ¬w2, T))
| (W32V w1, W32V w2) => (W32V ## I) (add_with_carry (w1, ¬w2, T))
| (W64V w1, W64V w2) => (W64V ## I) (add_with_carry (w1, ¬w2, T))
in
let p = ((nuw u_overflow) (nsw s_overflow) v1.poison v2.poison) in
<| poison := p; value := diff |>
End
Definition get_comp_def:
(get_comp Eq = $=)
(get_comp Slt = $<)
(get_comp Ult = $<+)
End
Definition do_icmp_def:
do_icmp c v1 v2 =
<| poison := (v1.poison v2.poison);
value := bool_to_v (
case (v1.value, v2.value) of
| (W1V w1, W1V w2) => (get_comp c) w1 w2
| (W8V w1, W8V w2) => (get_comp c) w1 w2
| (W32V w1, W32V w2) => (get_comp c) w1 w2
| (W64V w1, W64V w2) => (get_comp c) w1 w2
| (PtrV w1, PtrV w2) => (get_comp c) w1 w2) |>
End
Definition do_phi_def:
do_phi from_l s (Phi id _ entries) =
option_map (λarg. (id, eval s arg)) (alookup entries from_l)
End
Definition extract_type_def:
(extract_type t [] = Some t)
(extract_type (ArrT n t) (i::idx) =
if i < n then
extract_type t idx
else
None)
(extract_type (StrT ts) (i::idx) =
if i < length ts then
extract_type (el i ts) idx
else
None)
(extract_type _ _ = None)
End
Definition extract_value_def:
(extract_value v [] = Some v)
(extract_value (AggV vs) (i::indices) =
if i < length vs then
extract_value (el i vs) indices
else
None)
(extract_value _ _ = None)
End
Definition insert_value_def:
(insert_value _ v [] = Some v)
(insert_value (AggV vs) v (i::indices) =
if i < length vs then
case insert_value (el i vs) v indices of
| None => None
| Some v => Some (AggV (list_update v i vs))
else
None)
(insert_value _ _ _ = None)
End
Definition update_result_def:
update_result x v s = s with locals := s.locals |+ (x, v)
End
Definition inc_pc_def:
inc_pc s = s with ip := (s.ip with i := s.ip.i + 1)
End
(* NB, the semantics tracks the poison values, but not much thought has been put
* into getting it exactly right, so we don't have much confidence that it is
* exactly right. We also are currently ignoring the undefined value. *)
Inductive step_instr:
(s.stack = fr::st
deallocate fr.stack_allocs s.allocations s.heap = (new_allocs, new_h)
step_instr prog s
(Ret (t, a))
(update_result fr.result_var (eval s a)
<| ip := fr.ret;
globals := s.globals;
locals := fr.saved_locals;
stack := st;
allocations := new_allocs;
heap := new_h |>))
(* Do the phi assignments in parallel. The manual says "For the purposes of the
* SSA form, the use of each incoming value is deemed to occur on the edge from
* the corresponding predecessor block to the current block (but after any
* definition of an 'invoke' instruction's return value on the same edge)".
* So treat these two as equivalent
* %r1 = phi [0, %l]
* %r2 = phi [%r1, %l]
* and
* %r2 = phi [%r1, %l]
* %r1 = phi [0, %l]
*)
(eval s a = <| poison := p; value := W1V tf |>
l = Some (if tf = 1w then l1 else l2)
alookup prog s.ip.f = Some d
alookup d.blocks l = Some <| h := Head phis None; body := b |>
map (do_phi l s) phis = map Some updates
step_instr prog s
(Br a l1 l2)
(s with <| ip := <| f := s.ip.f; b := l; i := 0 |>;
locals := s.locals |++ updates |>))
(* TODO *)
(step_instr prog s (Invoke r t a args l1 l2) s)
(step_instr prog s
(Sub r nuw nsw t a1 a2)
(inc_pc (update_result r (do_sub nuw nsw (eval s a1) (eval s a2)) s)))
(eval s a = v
map (λci. cast_num (eval_const s.globals ci)) const_indices = map Some ns
extract_value v.value ns = Some result
step_instr prog s
(Extractvalue r (t, a) const_indices)
(inc_pc (update_result r
<| poison := v.poison; value := result |> s)))
(eval s a1 = v1
eval s a2 = v2
map (λci. cast_num (eval_const s.globals ci)) const_indices = map Some ns
insert_value v1.value v2.value ns = Some result
step_instr prog s
(Insertvalue r (t1, a1) (t2, a2) const_indices)
(inc_pc (update_result r
<| poison := (v1.poison v2.poison); value := result |> s)))
(allocate s (eval s a1) (sizeof t) (v2, s2)
step_instr prog s
(Alloca r t (t1, a1))
(inc_pc (update_result r v2 s2)))
(eval s a1 = <| poison := p1; value := PtrV w |>
interval = (freeable, w2n w, w2n w + sizeof t)
is_allocated interval s.allocations
pbytes = get_bytes s.heap interval
step_instr prog s
(Load r t (t1, a1))
(inc_pc (update_result r <| poison := (T set (map fst pbytes));
value := fst (bytes_to_value t (map snd pbytes)) |>
s)))
(eval s a2 = <| poison := p2; value := PtrV w |>
interval = (freeable, w2n w, w2n w + sizeof t)
is_allocated interval s.allocations
bytes = value_to_bytes (eval s a1).value
length bytes = sizeof t
step_instr prog s
(Store (t1, a1) (t2, a2))
(inc_pc (s with heap := set_bytes p2 bytes (w2n w) s.heap)))
(map (eval s o snd) tindices = i1::indices
(eval s a1).value = PtrV w1
cast_num i1.value = Some n
map (λx. cast_num x.value) indices = map Some ns
get_offset t1 ns = Some off
step_instr prog s
(Gep r t ((PtrT t1), a1) tindices)
(inc_pc (update_result r
<| poison := (v1.poison i1.poison exists (λv. v.poison) indices);
value := PtrV (n2w (w2n w1 + sizeof t1 * n + off)) |>
s)))
(eval s a1 = v1
v1.value = PtrV w
w64_cast w t = Some int_v
step_instr prog s
(Ptrtoint r (t1, a1) t)
(inc_pc (update_result r <| poison := v1.poison; value := int_v |> s)))
(eval s a1 = v1
cast_w64 v1.value = Some w
step_instr prog s
(Inttoptr r (t1, a1) t)
(inc_pc (update_result r <| poison := v1.poison; value := PtrV w |> s)))
(step_instr prog s
(Icmp r c t a1 a2)
(inc_pc (update_result r (do_icmp c (eval s a1) (eval s a2)) s)))
(alookup prog fname = Some d
step_instr prog s
(Call r t fname targs)
<| ip := <| f := fname; b := None; i := 0 |>;
locals := alist_to_fmap (zip (map snd d.params, map (eval s o snd) targs));
globals := s.globals;
allocations:= s.allocations;
stack :=
<| ret := s.ip with i := s.ip.i + 1;
saved_locals := s.locals;
result_var := r;
stack_allocs := [] |> :: s.stack;
heap := s.heap |>)
(* TODO *)
(step_instr prog s (Cxa_allocate_exn r a) s)
(* TODO *)
(step_instr prog s (Cxa_throw a1 a2 a3) s)
(* TODO *)
(step_instr prog s (Cxa_begin_catch r a) s)
(* TODO *)
(step_instr prog s (Cxa_end_catch) s)
(* TODO *)
(step_instr prog s (Cxa_get_exception_ptr r a) s)
End
Inductive next_instr:
alookup p s.ip.f = Some d
alookup d.blocks s.ip.b = Some b
s.ip.i < length b.body
next_instr p s (el s.ip.i b.body)
End
Inductive step:
next_instr p s i
step_instr p s i s'
step p s s'
End
(* ----- Invariants on state ----- *)
(* The allocations are of intervals that don't overlap *)
Definition allocations_ok_def:
allocations_ok s
∀i1 i2.
i1 s.allocations i2 s.allocations
interval_ok i1 interval_ok i2
(interval_to_set i1 interval_to_set i2 i1 = i2)
End
(* The heap maps exactly the address in the allocations *)
Definition heap_ok_def:
heap_ok s
∀n. flookup s.heap (A n) None ∃i. i s.allocations n interval_to_set i
End
(* All global variables are allocated in non-freeable memory *)
Definition globals_ok_def:
globals_ok s
∀g n w.
flookup s.globals g = Some (n, w)
is_allocated (F, w2n w, w2n w + n) s.allocations
End
(* Instruction pointer points to an instruction *)
Definition ip_ok_def:
ip_ok p ip
∃dec block. alookup p ip.f = Some dec alookup dec.blocks ip.b = Some block ip.i < length block.body
End
Definition prog_ok_def:
prog_ok p
((* All blocks end with terminators *)
∀fname dec bname block.
alookup p fname = Some dec
alookup dec.blocks bname = Some block
block.body [] terminator (last block.body))
((* All functions have an entry block *)
∀fname dec.
alookup p fname = Some dec ∃block. alookup dec.blocks None = Some block)
(* There is a main function *)
∃dec. alookup p (Fn "main") = Some dec
End
(* All call frames have a good return address, and the stack allocations of the
* frame are all in freeable memory *)
Definition frame_ok_def:
frame_ok p s f
ip_ok p f.ret
every (λn. ∃start stop. n = A start (T, start, stop) s.allocations) f.stack_allocs
End
(* The frames are all of, and no two stack allocations have the same address *)
Definition stack_ok_def:
stack_ok p s
every (frame_ok p s) s.stack
all_distinct (flat (map (λf. f.stack_allocs) s.stack))
End
Definition state_invariant_def:
state_invariant p s
ip_ok p s.ip allocations_ok s heap_ok s globals_ok s stack_ok p s
End
(* ----- Initial state ----- *)
(* The initial state contains allocations for the initialised global variables *)
Definition is_init_state_def:
is_init_state s (global_init : glob_var |-> ty # v)
s.ip.f = Fn "main"
s.ip.b = None
s.ip.i = 0
s.locals = fempty
s.stack = []
allocations_ok s
globals_ok s
heap_ok s
fdom s.globals = fdom global_init
(* The initial allocations for globals are not freeable *)
s.allocations { (F, start, stop) | T }
(* The heap starts with the initial values of the globals written to their
* addresses *)
∀g w t v n.
flookup s.globals g = Some (n, w) flookup global_init g = Some (t,v)
∃bytes.
get_bytes s.heap (F, w2n w, w2n w + sizeof t) = map (λb. (F,b)) bytes
bytes_to_value t bytes = (v, [])
End
export_theory();