Add initial mini-LLVM semantics written in HOL4

Summary:
Start working on a simple model of LLVM with the ultimate goal of
handling relevant and/or tricky aspects of LLVM and LLAIR and then
formalising the translation from LLVM to LLAIR.

This is a complete initial model of everything that we are interested in
except for exceptions, which should be tricky. Also no thought has gone
into the treatment of poison and the undefined value, so the treatment
is naive, which is at least partially justified because we are
interested in the semantics of LLVM IR after the optimisation passes
have run.

Include some sanity checking theorems.

Reviewed By: jberdine

Differential Revision: D16731885

fbshipit-source-id: fd53949fe
master
Scott Owens 5 years ago committed by Facebook Github Bot
parent 89782dfff9
commit 97eb280cb5

@ -0,0 +1,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
all: $(DEFAULT_TARGETS)
HOLHEAP = heap
EXTRA_CLEANS = heap
OBJNAMES = wordsLib.uo intLib.uo stringLib.uo integer_wordLib.uo finite_mapLib.uo alistLib.uo
DEPS = $(patsubst %,$(dprot $(SIGOBJ)/%),$(OBJNAMES))
$(HOLHEAP): $(DEPS)
$(protect $(HOLDIR)/bin/buildheap) -o $@ $(OBJNAMES)

@ -0,0 +1,626 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
(* A mini-LLVM model, focussing on the semantics of the parts of the IR that
* are relevant for the LLVM -> LLAIR translation, especially exceptions. *)
open HolKernel boolLib bossLib Parse;
open settingsTheory;
new_theory "llvm";
numLib.prefer_num ();
(* ----- Abstract syntax ----- *)
(* Only support 1, 8, 32, and 64 bit words for now *)
Datatype `
size = W1 | W8 | W32 | W64`;
Datatype `
ty =
| FunT ty (ty list)
| IntT size
| PtrT ty
| ArrT num ty
| StrT (ty list)`;
Datatype `
label = Lab string`;
Datatype `
loc_var = Loc string`;
Datatype `
glob_var = GlobName string`;
Datatype `
fun_name = Fn string`;
Datatype `
const =
| IntC size int
| StrC ((ty # const) list)
| ArrC ((ty # const) list)
| GepC ty const (ty # const) ((ty # const) list)
| GlobalC glob_var
| UndefC`;
Datatype `
arg = Constant const | Variable loc_var`;
type_abbrev ("targ", ``:ty # arg``);
Datatype `
cond = Eq | Ult | Slt`;
Datatype `
instr =
(* Terminators *)
| Ret targ
| Br arg label label
| Invoke loc_var ty arg (targ list) label label
| Unreachable
(* Non-terminators *)
| Sub loc_var bool bool ty arg arg
| Extractvalue loc_var targ (const list)
| Insertvalue loc_var targ targ (const list)
| Alloca loc_var ty targ
| Load loc_var ty targ
| Store targ targ
| Gep loc_var targ (targ list)
| Ptrtoint loc_var targ ty
| Inttoptr loc_var targ ty
| Icmp cond ty arg arg
| Call loc_var ty fun_name (targ list)
(* C++ runtime functions *)
| Cxa_allocate_exn loc_var arg
| Cxa_throw arg arg arg
| Cxa_begin_catch loc_var arg
| Cxa_end_catch
| Cxa_get_exception_ptr loc_var arg`;
Datatype `
phi = Phi loc_var ty (label option |-> arg)`;
Datatype `
clause = Catch targ`;
Datatype `
landingpad = Landingpad ty bool (clause list)`;
Datatype `
blockHeader =
| Entry
| Head (phi list) (landingpad option)`;
Datatype `
block = <| h : blockHeader; body : instr list |>`;
Datatype `
def =
<| r : ty;
params : loc_var list;
(* None -> entry block, and Some name -> non-entry block *)
blocks : label option |-> block |>`;
type_abbrev ("prog", ``:fun_name |-> def``);
(* ----- Semantic states ----- *)
Datatype `
addr = A num`;
Datatype `
v =
| W1V word1
| W8V word8
| W32V word32
| W64V word64
| AggV (v list)
| PtrV word64
| UndefV`;
Datatype `
pv = <| poison : bool; value : v |>`;
Datatype `
pc = <| f : fun_name; b : label option; i : num |>`;
Datatype `
frame = <| ret : pc; saved_locals : loc_var |-> pv; result_var : loc_var; stack_allocs : addr list |>`;
Datatype `
state =
<| ip : pc;
globals : glob_var |-> word64;
locals : loc_var |-> pv;
stack : frame list;
allocations : (num # num) set;
heap : addr |-> bool # word8 |>`;
(* ----- Things about types ----- *)
Definition sizeof_def:
(sizeof (IntT W1) = 1)
(sizeof (IntT W8) = 1)
(sizeof (IntT W32) = 4)
(sizeof (IntT W64) = 8)
(sizeof (PtrT _) = 8)
(sizeof (ArrT n t) = n * sizeof t)
(sizeof (StrT ts) = sum (map sizeof ts))
Termination
WF_REL_TAC `measure ty_size` >> simp [] >>
Induct >> rw [definition "ty_size_def"] >> simp [] >>
first_x_assum drule >> decide_tac
End
Definition first_class_type_def:
(first_class_type (IntT _) T)
(first_class_type (PtrT _) T)
(first_class_type (ArrT _ t) first_class_type t)
(first_class_type (StrT ts) every first_class_type ts)
(first_class_type _ F)
Termination
WF_REL_TAC `measure ty_size` >>
rw [] >>
Induct_on `ts` >> rw [definition "ty_size_def"] >>
res_tac >> decide_tac
End
Definition indices_ok_def:
(indices_ok _ [] T)
(indices_ok (ArrT n t) (i::indices)
i < n indices_ok t indices)
(indices_ok (StrT ts) (i::indices)
i < length ts indices_ok (el i ts) indices)
(indices_ok _ _ F)
End
Inductive value_type:
(value_type (IntT W1) (W1V w1))
(value_type (IntT W8) (W8V w8))
(value_type (IntT W32) (W32V w32))
(value_type (IntT W64) (W64V w64))
(value_type (PtrT _) (PtrV w64))
(every (value_type t) vs length vs = n first_class_type t
value_type (ArrT n t) (AggV vs))
(list_rel value_type ts vs
value_type (StrT ts) (AggV vs))
End
(* ----- Semantic transitions ----- *)
Definition w64_cast_def:
(w64_cast w (IntT W1) = Some (W1V (w2w w)))
(w64_cast w (IntT W8) = Some (W8V (w2w w)))
(w64_cast w (IntT W32) = Some (W32V (w2w w)))
(w64_cast w (IntT W64) = Some (W64V w))
(w64_cast _ _ = None)
End
Definition cast_w64_def:
(cast_w64 (W1V w) = Some (w2w w))
(cast_w64 (W8V w) = Some (w2w w))
(cast_w64 (W32V w) = Some (w2w w))
(cast_w64 (W64V w) = Some w)
(cast_w64 _ = None)
End
Definition cast_num_def:
cast_num v = option_map w2n (cast_w64 v)
End
Definition bool_to_v_def:
bool_to_v b = if b then W1V 1w else W1V 0w
End
Definition get_offset_def:
(get_offset _ [] = Some 0)
(get_offset (ArrT _ t) (i::is) =
case get_offset t is of
| None => None
| Some off => Some (i * sizeof t + off))
(get_offset (StrT ts) (i::is) =
if i < length ts then
case get_offset (el i ts) is of
| None => None
| Some off => Some (sum (map sizeof (take i ts)) + off)
else
None)
(get_offset _ _ = Some 0)
End
Definition eval_const_def:
(eval_const g (IntC W1 i) = W1V (i2w i))
(eval_const g (IntC W8 i) = W8V (i2w i))
(eval_const g (IntC W32 i) = W32V (i2w i))
(eval_const g (IntC W64 i) = W64V (i2w i))
(eval_const g (StrC tconsts) = AggV (map (eval_const g) (map snd tconsts)))
(eval_const g (ArrC tconsts) = AggV (map (eval_const g) (map snd tconsts)))
(eval_const g (GepC ty ptr (t, idx) indices) =
case (eval_const g ptr, cast_num (eval_const g idx)) of
| (PtrV w, Some n) =>
let ns = map (λ(t,ci). case cast_num (eval_const g ci) of None => 0 | Some n => n) indices in
(case get_offset ty ns of
| None => UndefV
| Some off => PtrV (n2w (w2n w + sizeof ty * n + off)))
| _ => UndefV)
(eval_const g (GlobalC var) =
case flookup g var of
| None => PtrV 0w
| Some w => PtrV w)
(eval_const g UndefC = UndefV)
Termination
WF_REL_TAC `measure (const_size o snd)` >> rw [listTheory.MEM_MAP] >>
TRY
(TRY (PairCases_on `y`) >> simp [] >>
Induct_on `tconsts` >> rw [] >> rw [definition "const_size_def"] >>
res_tac >> fs [] >> NO_TAC) >>
Induct_on `indices` >> rw [] >> rw [definition "const_size_def"] >>
fs []
End
Definition eval_def:
(eval s (Variable v) =
case flookup s.locals v of
| None => <| poison := F; value := W1V 0w |>
| Some v => v)
(eval s (Constant c) = <| poison := F; value := eval_const s.globals c |>)
End
Definition v2n_def:
(v2n (W1V b) = Some (if T then 1 else 0))
(v2n (W8V w8) = Some (w2n w8))
(v2n (W32V w32) = Some (w2n w32))
(v2n (W64V w64) = Some (w2n w64))
(v2n _ = None)
End
Definition update_result_def:
update_result x v s = s with locals := s.locals |+ (x, v)
End
Definition inc_pc_def:
inc_pc s = s with ip := (s.ip with i := s.ip.i + 1)
End
Definition interval_to_set_def:
interval_to_set (start,stop) =
{ n | start n n < stop }
End
Definition interval_ok_def:
interval_ok (i1, i2)
i1 i2 i2 < 2 ** 64
End
Definition is_allocated_def:
is_allocated b1 allocs
interval_ok b1
∃b2. b2 allocs interval_to_set b1 interval_to_set b2
End
Definition is_free_def:
is_free b1 allocs
interval_ok b1
∀b2. b2 allocs interval_to_set b1 interval_to_set b2 =
End
Inductive allocate:
(v2n v.value = Some m
b = (w2n w, w2n w + m * len)
is_free b s.allocations
allocate s v len
(<| poison := v.poison; value := PtrV w |>,
s with allocations := { b } s.allocations))
End
Definition deallocate_def:
(deallocate (A n) (Some allocs) =
if ∃m. (n,m) allocs then
Some { (start,stop) | (start,stop) allocs start n }
else
None)
(deallocate _ None = None)
End
Definition get_bytes_def:
get_bytes h (start, stop) =
map
(λoff.
case flookup h (A (start + off)) of
| None => (F, 0w)
| Some w => w)
(count_list (stop - start))
End
Definition le_read_w_def:
le_read_w len (bs : word8 list) =
if length bs < len then
(l2w 256 (map w2n bs), [])
else
(l2w 256 (map w2n (take len bs)), drop len bs)
End
Definition le_write_w_def:
le_write_w len w =
let (l : word8 list) = map n2w (w2l 256 w) in
take len (l ++ replicate (len - length l) 0w)
End
Definition bytes_to_value_def:
(bytes_to_value (IntT W1) (b::bs) = (W1V (w2w b), bs))
(bytes_to_value (IntT W8) (b::bs) = (W8V b, bs))
(bytes_to_value (IntT W32) bs = (W32V ## I) (le_read_w 4 bs))
(bytes_to_value (IntT W64) bs = (W64V ## I) (le_read_w 8 bs))
(bytes_to_value (PtrT _) bs = (PtrV ## I) (le_read_w 8 bs))
(bytes_to_value (ArrT n t) bs = (AggV ## I) (read_array n t bs))
(bytes_to_value (StrT ts) bs = (AggV ## I) (read_str ts bs))
(read_array 0 t bs = ([], bs))
(read_array (Suc n) t bs =
let (v, bs) = bytes_to_value t bs in
let (rest, bs) = read_array n t bs in
(v::rest, bs))
(read_str [] bs = ([], bs))
(read_str (t::ts) bs =
let (v, bs) = bytes_to_value t bs in
let (rest, bs) = read_str ts bs in
(v::rest, bs))
Termination
WF_REL_TAC `measure (λx. case x of
| INL (t, bs) => ty_size t
| INR (INL (n, t, bs)) => n + ty_size t
| INR (INR (ts, bs)) => ty1_size ts)`
End
Definition value_to_bytes_def:
(value_to_bytes (W1V w) = [w2w w])
(value_to_bytes (W8V w) = [w])
(value_to_bytes (W32V w) = le_write_w 4 w)
(value_to_bytes (W64V w) = le_write_w 8 w)
(value_to_bytes (PtrV n) = le_write_w 8 n)
(value_to_bytes (AggV vs) = flat (map value_to_bytes vs))
Termination
WF_REL_TAC `measure v_size` >>
Induct >> rw [definition "v_size_def"] >>
TRY (first_x_assum drule) >>
decide_tac
End
Definition set_bytes_def:
(set_bytes p [] n h = h)
(set_bytes p (b::bs) n h =
set_bytes p bs (Suc n) (h |+ (A n, (p, b))))
End
Definition do_sub_def:
do_sub (nuw:bool) (nsw:bool) (v1:pv) (v2:pv) =
let (diff, u_overflow, s_overflow) =
case (v1.value, v2.value) of
| (W1V w1, W1V w2) => (W1V ## I) (add_with_carry (w1, ¬w2, T))
| (W8V w1, W8V w2) => (W8V ## I) (add_with_carry (w1, ¬w2, T))
| (W32V w1, W32V w2) => (W32V ## I) (add_with_carry (w1, ¬w2, T))
| (W64V w1, W64V w2) => (W64V ## I) (add_with_carry (w1, ¬w2, T))
in
let p = ((nuw u_overflow) (nsw s_overflow) v1.poison v2.poison) in
<| poison := p; value := diff |>
End
Definition get_comp_def:
(get_comp Eq = $=)
(get_comp Slt = $<)
(get_comp Ult = $<+)
End
Definition do_icmp_def:
do_icmp c v1 v2 =
<| poison := (v1.poison v2.poison);
value := bool_to_v (
case (v1.value, v2.value) of
| (W1V w1, W1V w2) => (get_comp c) w1 w2
| (W8V w1, W8V w2) => (get_comp c) w1 w2
| (W32V w1, W32V w2) => (get_comp c) w1 w2
| (W64V w1, W64V w2) => (get_comp c) w1 w2
| (PtrV w1, PtrV w2) => (get_comp c) w1 w2) |>
End
Definition do_phi_def:
do_phi from_l s (Phi id _ entries) =
option_map (λarg. (id, eval s arg)) (flookup entries from_l)
End
Definition extract_value_def:
(extract_value v [] = Some v)
(extract_value (AggV vs) (i::indices) =
if i < length vs then
extract_value (el i vs) indices
else
None)
(extract_value _ _ = None)
End
Definition insert_value_def:
(insert_value _ v [] = Some v)
(insert_value (AggV vs) v (i::indices) =
if i < length vs then
case insert_value (el i vs) v indices of
| None => None
| Some v => Some (AggV (list_update v i vs))
else
None)
(insert_value _ _ _ = None)
End
(* NB, the semantics tracks the poison values, but not much thought has been put
* into getting it exactly right, so we don't have much confidence that it is
* exactly right. We also are currently ignoring the undefined value. *)
Inductive step_instr:
(s.stack = fr::st
FOLDR deallocate (Some s.allocations) fr.stack_allocs = Some new_allocs
step_instr prog s
(Ret (t, a))
(update_result fr.result_var (eval s a)
<| ip := fr.ret;
locals := fr.saved_locals;
stack := st;
allocations := new_allocs;
heap := heap |>))
(* Do the phi assignments in parallel. The manual says "For the purposes of the
* SSA form, the use of each incoming value is deemed to occur on the edge from
* the corresponding predecessor block to the current block (but after any
* definition of an 'invoke' instruction's return value on the same edge)".
* So treat these two as equivalent
* %r1 = phi [0, %l]
* %r2 = phi [%r1, %l]
* and
* %r2 = phi [%r1, %l]
* %r1 = phi [0, %l]
*)
(eval s a = <| poison := p; value := W1V tf |>
l = Some (if tf = 1w then l1 else l2)
flookup prog s.ip.f = Some d
flookup d.blocks l = Some <| h := Head phis None; body := b |>
map (do_phi l s) phis = map Some updates
step_instr prog s
(Br a l1 l2)
(s with <| ip := <| f := s.ip.f; b := l; i := 0 |>;
locals := s.locals |++ updates |>))
(* TODO *)
(step_instr prog s (Invoke r t a args l1 l2) s)
(step_instr prog s
(Sub r nuw nsw t a1 a2)
(inc_pc (update_result r (do_sub nuw nsw (eval s a1) (eval s a2)) s)))
(eval s a = v
map (λci. cast_num (eval_const s.globals ci)) const_indices = map Some ns
extract_value v.value ns = Some result
step_instr prog s
(Extractvalue r (t, a) const_indices)
(inc_pc (update_result r
<| poison := v.poison; value := result |> s)))
(eval s a1 = v1
eval s a2 = v2
map (λci. cast_num (eval_const s.globals ci)) const_indices = map Some ns
insert_value v1.value v2.value ns = Some result
step_instr prog s
(Insertvalue r (t1, a1) (t2, a2) const_indices)
(inc_pc (update_result r
<| poison := (v1.poison v2.poison); value := result |> s)))
(allocate s (eval s a1) (sizeof t) (v2, s2)
step_instr prog s
(Alloca r t (t1, a1))
(inc_pc (update_result r v2 s2)))
(eval s a1 = <| poison := p1; value := PtrV w |>
interval = (w2n w, w2n w + sizeof t)
is_allocated interval s.allocations
pbytes = get_bytes s.heap interval
step_instr prog s
(Load r t (t1, a1))
(inc_pc (update_result r <| poison := (T set (map fst pbytes));
value := fst (bytes_to_value t (map snd pbytes)) |>
s)))
(eval s a2 = <| poison := p2; value := PtrV w |>
interval = (w2n w, w2n w + sizeof t)
is_allocated interval s.allocations
bytes = value_to_bytes (eval s a1).value
length bytes = sizeof t
step_instr prog s
(Store (t1, a1) (t2, a2))
(inc_pc (s with heap := set_bytes p2 bytes (w2n w) s.heap)))
(map (eval s o snd) tindices = i1::indices
(eval s a1).value = PtrV w1
cast_num i1.value = Some n
map (λx. cast_num x.value) indices = map Some ns
get_offset t1 ns = Some off
step_instr prog s
(Gep r ((PtrT t1), a1) tindices)
(inc_pc (update_result r
<| poison := (v1.poison i1.poison exists (λv. v.poison) indices);
value := PtrV (n2w (w2n w1 + sizeof t1 * n + off)) |>
s)))
(eval s a1 = v1
v1.value = PtrV w
w64_cast w t = Some int_v
step_instr prog s
(Ptrtoint r (t1, a1) t)
(inc_pc (update_result r <| poison := v1.poison; value := int_v |> s)))
(eval s a1 = v1
cast_w64 v1.value = Some w
step_instr prog s
(Inttoptr r (t1, a1) t)
(inc_pc (update_result r <| poison := v1.poison; value := PtrV w |> s)))
(step_instr prog s
(Icmp c t a1 a2)
(inc_pc (update_result r (do_icmp c (eval s a1) (eval s a2)) s)))
(flookup prog fname = Some d
step_instr prog s
(Call r t fname targs)
<| ip := <| f := fname; b := None; i := 0 |>;
locals := alist_to_fmap (zip (d.params, map (eval s o snd) targs));
stack :=
<| ret := s.ip with i := s.ip.i + 1;
saved_locals := s.locals;
result_var := r;
stack_allocs := [] |> :: s.stack;
heap := heap |>)
(* TODO *)
(step_instr prog s (Cxa_allocate_exn r a) s)
(* TODO *)
(step_instr prog s (Cxa_throw a1 a2 a3) s)
(* TODO *)
(step_instr prog s (Cxa_begin_catch r a) s)
(* TODO *)
(step_instr prog s (Cxa_end_catch) s)
(* TODO *)
(step_instr prog s (Cxa_get_exception_ptr r a) s)
End
Inductive next_instr:
flookup p s.ip.f = Some d
flookup d.blocks s.ip.b = Some b
s.ip.i < length b.body
next_instr p (el s.ip.i b.body)
End
Inductive step:
next_instr p i
step_instr p s i s'
step p s s'
End
export_theory();

@ -0,0 +1,596 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
(* Properties of the mini-LLVM model *)
open HolKernel boolLib bossLib Parse;
open pairTheory listTheory rich_listTheory arithmeticTheory wordsTheory;
open logrootTheory numposrepTheory;
open settingsTheory llvmTheory;
new_theory "llvm_prop";
numLib.prefer_num();
(* ----- Theorems about list library functions ----- *)
(* Could be upstreamed to HOL *)
Theorem dropWhile_map:
∀P f l. dropWhile P (map f l) = map f (dropWhile (P o f) l)
Proof
Induct_on `l` >> rw []
QED
Theorem dropWhile_prop:
∀P l x. x < length l - length (dropWhile P l) P (el x l)
Proof
Induct_on `l` >> rw [] >>
Cases_on `x` >> fs []
QED
Theorem dropWhile_rev_take:
∀P n l x.
let len = length (dropWhile P (reverse (take n l))) in
x + len < n n length l P (el (x + len) l)
Proof
rw [] >>
`P (el ((n - 1 - x - length (dropWhile P (reverse (take n l))))) (reverse (take n l)))`
by (irule dropWhile_prop >> simp [LENGTH_REVERSE]) >>
rfs [EL_REVERSE, EL_TAKE, PRE_SUB1]
QED
Theorem take_replicate:
∀m n x. take m (replicate n x) = replicate (min m n) x
Proof
Induct_on `n` >> rw [TAKE_def, MIN_DEF] >> fs [] >>
Cases_on `m` >> rw []
QED
Theorem length_take_less_eq:
∀n l. length (take n l) n
Proof
Induct_on `l` >> rw [TAKE_def] >>
Cases_on `n` >> fs []
QED
Theorem flat_drop:
∀n m ls. flat (drop m ls) = drop (length (flat (take m ls))) (flat ls)
Proof
Induct_on `ls` >> rw [DROP_def, DROP_APPEND] >>
irule (GSYM DROP_LENGTH_TOO_LONG) >> simp []
QED
Theorem take_is_prefix:
∀n l. take n l l
Proof
Induct_on `l` >> rw [TAKE_def]
QED
Theorem sum_prefix:
∀l1 l2. l1 l2 sum l1 sum l2
Proof
Induct >> rw [] >> Cases_on `l2` >> fs []
QED
(* ----- Theorems about log ----- *)
(* Could be upstreamed to HOL *)
Theorem mul_div_bound:
∀m n. n 0 m - (n - 1) n * (m DIV n) n * (m DIV n) m
Proof
rw [] >>
`0 < n` by decide_tac >>
drule DIVISION >> disch_then (qspec_then `m` mp_tac) >>
decide_tac
QED
Theorem exp_log_bound:
∀b n. 1 < b n 0 n DIV b + 1 b ** (log b n) b ** (log b n) n
Proof
rw [] >> `0 < n` by decide_tac >>
drule LOG >> disch_then drule >> rw [] >>
fs [ADD1, EXP_ADD] >>
simp [DECIDE ``∀x y. x + 1 y x < y``] >>
`∃x. b = Suc x` by intLib.COOPER_TAC >>
`b * (n DIV b) < b * b ** log b n` suffices_by metis_tac [LESS_MULT_MONO] >>
pop_assum kall_tac >>
`b 0` by decide_tac >>
drule mul_div_bound >> disch_then (qspec_then `n` mp_tac) >>
decide_tac
QED
Theorem log_base_power:
∀n b. 1 < b log b (b ** n) = n
Proof
Induct >> rw [EXP, LOG_1] >>
Cases_on `n` >> rw [LOG_BASE] >>
first_x_assum drule >> rw [] >>
simp [Once EXP, LOG_MULT]
QED
Theorem log_change_base_power:
∀m n b. 1 < b m 0 n 0 log (b ** n) m = log b m DIV n
Proof
rw [] >> irule LOG_UNIQUE >>
rw [ADD1, EXP_MUL, LEFT_ADD_DISTRIB] >>
qmatch_goalsub_abbrev_tac `x DIV _` >>
drule mul_div_bound >> disch_then (qspec_then `x` mp_tac) >> rw []
>- (
irule LESS_LESS_EQ_TRANS >>
qexists_tac `b ** (x+1)` >> rw [] >>
unabbrev_all_tac >>
simp [EXP_ADD] >>
`b * (m DIV b + 1) b * b ** log b m`
by metis_tac [exp_log_bound, LESS_MONO_MULT, MULT_COMM] >>
`m < b * (m DIV b + 1)` suffices_by decide_tac >>
simp [LEFT_ADD_DISTRIB] >>
`b 0` by decide_tac >>
`m - (b - 1) b * (m DIV b)` by metis_tac [mul_div_bound] >>
fs [])
>- (
irule LESS_EQ_TRANS >>
qexists_tac `b ** (log b m)` >> rw [] >>
unabbrev_all_tac >>
metis_tac [exp_log_bound])
QED
(* ----- Theorems about word stuff ----- *)
Theorem l2n_padding:
∀ws n. l2n 256 (ws ++ map w2n (replicate n 0w)) = l2n 256 ws
Proof
Induct >> rw [l2n_def] >>
Induct_on `n` >> rw [l2n_def]
QED
Theorem l2n_0:
∀l b. b 0 every ($> b) l (l2n b l = 0 every ($= 0) l)
Proof
Induct >> rw [l2n_def] >>
eq_tac >> rw []
QED
Theorem mod_n2l:
∀d n. 0 < d map (\x. x MOD d) (n2l d n) = n2l d n
Proof
rw [] >> drule n2l_BOUND >> disch_then (qspec_then `n` mp_tac) >>
qspec_tac (`n2l d n`, `l`) >>
Induct >> rw []
QED
(* ----- Theorems about converting between values and byte lists ----- *)
Theorem le_write_w_length:
∀l x. length (le_write_w l w) = l
Proof
rw [le_write_w_def]
QED
Theorem v2b_size:
∀t v. value_type t v length (value_to_bytes v) = sizeof t
Proof
ho_match_mp_tac value_type_ind >>
rw [value_to_bytes_def, sizeof_def]
>- metis_tac [le_write_w_length]
>- metis_tac [le_write_w_length]
>- metis_tac [le_write_w_length]
>- (Induct_on `vs` >> rw [ADD1] >> fs [])
>- (
pop_assum mp_tac >>
qid_spec_tac `vs` >> qid_spec_tac `ts` >>
ho_match_mp_tac LIST_REL_ind >> rw [])
QED
Theorem b2v_size:
(∀t bs. first_class_type t sizeof t length bs
∃v. bytes_to_value t bs = (v, drop (sizeof t) bs))
(∀n t bs. first_class_type t n * sizeof t length bs
∃vs. read_array n t bs = (vs, drop (n * sizeof t) bs))
(∀ts bs. every first_class_type ts sum (map sizeof ts) length bs
∃vs. read_str ts bs = (vs, drop (sum (map sizeof ts)) bs))
Proof
ho_match_mp_tac bytes_to_value_ind >>
rw [sizeof_def, bytes_to_value_def, le_read_w_def] >>
fs [first_class_type_def]
>- (simp [PAIR_MAP] >> metis_tac [SND])
>- (
pairarg_tac >> rw [] >> pairarg_tac >> rw [] >>
fs [ADD1] >> rw [] >> fs [DROP_DROP_T, LEFT_ADD_DISTRIB])
>- fs [DROP_DROP_T, LEFT_ADD_DISTRIB]
QED
Theorem b2v_smaller:
∀t bs. first_class_type t sizeof t length bs
length (snd (bytes_to_value t bs)) = length bs - sizeof t
Proof
rw [] >> imp_res_tac b2v_size >>
Cases_on `bytes_to_value t bs` >> fs []
QED
Theorem b2v_append:
(∀t bs. first_class_type t sizeof t length bs
bytes_to_value t (bs ++ bs') = (I ## (λx. x ++ bs')) (bytes_to_value t bs))
(∀n t bs. first_class_type t n * sizeof t length bs
∃vs. read_array n t (bs ++ bs') = (I ## (λx. x ++ bs')) (read_array n t bs))
(∀ts bs. every first_class_type ts sum (map sizeof ts) length bs
∃vs. read_str ts (bs ++ bs') = (I ## (λx. x ++ bs')) (read_str ts bs))
Proof
ho_match_mp_tac bytes_to_value_ind >>
rw [sizeof_def, bytes_to_value_def, le_read_w_def] >>
fs [first_class_type_def, TAKE_APPEND, DROP_APPEND,
DECIDE ``!x y. x y x - y = 0n``, ETA_THM]
>- (simp [PAIR_MAP] >> metis_tac [SND])
>- (simp [PAIR_MAP] >> metis_tac [SND])
>- (
rpt (pairarg_tac >> simp []) >> fs [ADD1] >>
BasicProvers.VAR_EQ_TAC >> fs [LEFT_ADD_DISTRIB] >>
first_x_assum irule >>
`sizeof t length bs` by decide_tac >>
imp_res_tac b2v_smaller >> rfs [])
>- (
rpt (pairarg_tac >> simp []) >> fs [ADD1] >>
BasicProvers.VAR_EQ_TAC >> fs [LEFT_ADD_DISTRIB] >>
first_x_assum irule >>
`sizeof t length bs` by decide_tac >>
imp_res_tac b2v_smaller >> rfs [])
QED
Theorem le_read_write:
∀n w bs.
n 0 dimword (:'a) 256 ** n le_read_w n (le_write_w n (w :'a word) bs) = (w, bs)
Proof
rw [le_read_w_def, le_write_w_length]
>- (
`take n (le_write_w n w bs) = le_write_w n w`
by metis_tac [le_write_w_length, TAKE_LENGTH_APPEND] >>
simp [le_write_w_def, w2l_def, l2w_def] >>
Cases_on `w` >> simp [] >> fs [l2n_padding, TAKE_APPEND, take_replicate] >>
simp [MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF, mod_n2l] >>
rename1 `n2l 256 m` >>
`length (n2l 256 m) n`
by (
rw [LENGTH_n2l] >>
`256 = 2 ** 8` by EVAL_TAC >>
ASM_REWRITE_TAC [] >> simp [log_change_base_power, GSYM LESS_EQ] >>
`n2w m 0w` by simp [] >>
drule LOG2_w2n_lt >> rw [] >> fs [bitTheory.LOG2_def, dimword_def] >>
`8 * (log 2 m DIV 8) log 2 m` by metis_tac [mul_div_bound, EVAL ``8 0n``] >>
`LOG 2 (2 ** dimindex (:'a)) LOG 2 (256 ** n)` by simp [LOG_LE_MONO] >>
pop_assum mp_tac >>
`256 = 2 ** 8` by EVAL_TAC >>
ASM_REWRITE_TAC [EXP_MUL] >> simp [log_base_power]) >>
simp [mod_n2l, l2n_n2l, TAKE_LENGTH_TOO_LONG])
>- metis_tac [le_write_w_length, DROP_LENGTH_APPEND]
QED
Theorem le_write_read:
∀n w bs bs'.
256 ** n dimword (:'a)
n length bs
le_read_w n bs = (w:'a word, bs')
le_write_w n w ++ bs' = bs
Proof
rw [le_read_w_def] >>
qmatch_goalsub_abbrev_tac `l2w _ l` >>
`le_write_w n (l2w 256 l) = take n bs` suffices_by metis_tac [TAKE_DROP] >>
simp [le_write_w_def, w2l_l2w] >>
`l2n 256 l < 256 ** n`
by (
`n length bs` by decide_tac >>
metis_tac [l2n_lt, LENGTH_TAKE, LENGTH_MAP, EVAL ``0n < 256``]) >>
fs [] >>
`every ($> 256) l`
by (
simp [EVERY_MAP, Abbr `l`] >> irule EVERY_TAKE >> simp [] >>
rpt (pop_assum kall_tac) >>
Induct_on `bs` >> rw [] >>
Cases_on `h` >> fs []) >>
rw [n2l_l2n]
>- (
rw [TAKE_def, take_replicate] >>
Cases_on `n` >> fs [] >>
rfs [l2n_0] >> unabbrev_all_tac >> fs [EVERY_MAP] >>
ONCE_REWRITE_TAC [GSYM REPLICATE] >>
qmatch_goalsub_abbrev_tac `take n _` >>
qpat_assum `¬(_ < _)` mp_tac >>
qpat_assum `every (\x. 0 = w2n x) _` mp_tac >>
rpt (pop_assum kall_tac) >>
qid_spec_tac `bs` >>
Induct_on `n` >> rw [] >>
Cases_on `bs` >> rw [] >> fs [] >>
Cases_on `h` >> fs [] >>
first_x_assum irule >> rw [] >>
irule MONO_EVERY >>
qexists_tac `(λx. 0 = w2n x)` >> rw []) >>
fs [MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF] >>
`exists (\y. 0 y) l`
by (
fs [l2n_eq_0, combinTheory.o_DEF] >> fs [EXISTS_MEM, EVERY_MEM] >>
qexists_tac `x` >> rfs [MOD_MOD, GREATER_DEF]) >>
simp [LOG_l2n_dropWhile] >>
`length (dropWhile ($= 0) (reverse l)) 0`
by (
Cases_on `l` >> fs [dropWhile_eq_nil, combinTheory.o_DEF, EXISTS_REVERSE]) >>
`0 < length (dropWhile ($= 0) (reverse l))` by decide_tac >>
fs [SUC_PRE] >>
`map n2w l = take n bs`
by (simp [Abbr `l`, MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF, n2w_w2n]) >>
simp [TAKE_TAKE_MIN] >>
`length l = n` by simp [Abbr `l`] >>
`length (dropWhile ($= 0) (reverse l)) n`
by metis_tac [LESS_EQ_TRANS, LENGTH_dropWhile_LESS_EQ, LENGTH_REVERSE] >>
rw [MIN_DEF] >> fs []
>- (
simp [TAKE_APPEND, TAKE_TAKE_MIN, MIN_DEF, take_replicate] >>
`replicate (length l length (dropWhile ($= 0) (reverse l))) 0w =
take (length l (length (dropWhile ($= 0) (reverse l)))) (drop (length (dropWhile ($= 0) (reverse l))) bs)`
suffices_by (rw [] >> irule take_drop_partition >> simp []) >>
rw [LIST_EQ_REWRITE, EL_REPLICATE, EL_TAKE, EL_DROP] >>
`length (dropWhile ($= 0) (reverse l)) =
length (dropWhile (λx. 0 = w2n x) (reverse (take (length l) bs)))`
by (
`reverse l = reverse (take (length l) (map w2n bs))` by metis_tac [] >>
ONCE_ASM_REWRITE_TAC [] >>
qpat_x_assum `Abbrev (l = _)` kall_tac >>
simp [GSYM MAP_TAKE, GSYM MAP_REVERSE, dropWhile_map, combinTheory.o_DEF]) >>
fs [] >>
`x + length (dropWhile (λx. 0 = w2n x) (reverse (take (length l) bs))) < length l` by decide_tac >>
drule (SIMP_RULE std_ss [LET_THM] dropWhile_rev_take) >>
rw [] >>
REWRITE_TAC [GSYM w2n_11, word_0_n2w] >>
simp [])
>- rw [TAKE_APPEND, TAKE_TAKE]
QED
Theorem b2v_v2b:
∀v t bs. value_type t v bytes_to_value t (value_to_bytes v ++ bs) = (v, bs)
Proof
gen_tac >> completeInduct_on `v_size v` >>
rw [] >>
pop_assum mp_tac >> simp [value_type_cases] >>
rw [] >>
rw [bytes_to_value_def, value_to_bytes_def, le_read_write]
>- wordsLib.WORD_DECIDE_TAC
>- (
qmatch_abbrev_tac `_ x = _` >>
`x = (vs, bs)` suffices_by (simp [PAIR_MAP] >> metis_tac [PAIR_EQ, FST, SND]) >>
unabbrev_all_tac >>
qid_spec_tac `bs` >> Induct_on `vs` >> simp [bytes_to_value_def] >>
rw [] >> fs [v_size_def] >>
pairarg_tac >> fs [] >>
pairarg_tac >> fs [] >>
rename1 `value_type t v1` >>
first_x_assum (qspec_then `v_size v1` mp_tac) >> simp [] >>
disch_then (qspec_then `v1` mp_tac) >> simp [] >>
disch_then (qspec_then `t` mp_tac) >> simp [] >>
qmatch_assum_abbrev_tac `bytes_to_value _ (_ ++ bs1 ++ _) = _` >>
disch_then (qspec_then `bs1++bs` mp_tac) >> simp [] >>
unabbrev_all_tac >> strip_tac >> fs [] >>
first_x_assum (qspec_then `bs` mp_tac) >> rw [])
>- (
qmatch_abbrev_tac `_ x = _` >>
`x = (vs, bs)` suffices_by (simp [PAIR_MAP] >> metis_tac [PAIR_EQ, FST, SND]) >>
unabbrev_all_tac >>
pop_assum mp_tac >>
qid_spec_tac `bs` >> qid_spec_tac `ts` >> Induct_on `vs` >> simp [bytes_to_value_def] >>
rw [] >> fs [v_size_def, bytes_to_value_def] >>
pairarg_tac >> fs [] >>
pairarg_tac >> fs [] >>
rename1 `value_type t v1` >>
first_x_assum (qspec_then `v_size v1` mp_tac) >> simp [] >>
disch_then (qspec_then `v1` mp_tac) >> simp [] >>
disch_then (qspec_then `t` mp_tac) >> simp [] >>
qmatch_assum_abbrev_tac `bytes_to_value _ (_ ++ bs1 ++ _) = _` >>
disch_then (qspec_then `bs1++bs` mp_tac) >> simp [] >>
unabbrev_all_tac >> strip_tac >> fs [] >>
first_x_assum drule >> metis_tac [PAIR_EQ])
QED
(* ----- Theorems about insert/extract value and get_offset----- *)
Theorem can_extract:
∀v indices t.
indices_ok t indices value_type t v extract_value v indices None
Proof
recInduct extract_value_ind >> rw [extract_value_def]
>- (
pop_assum mp_tac >> rw [value_type_cases] >> fs [indices_ok_def] >>
metis_tac [LIST_REL_LENGTH])
>- (
pop_assum mp_tac >> rw [value_type_cases] >> fs [indices_ok_def] >>
metis_tac [EVERY_EL, LIST_REL_EL_EQN]) >>
Cases_on `t` >> fs [indices_ok_def] >> simp [value_type_cases]
QED
Theorem can_insert:
∀v v2 indices t.
indices_ok t indices value_type t v insert_value v v2 indices None
Proof
recInduct insert_value_ind >> rw [insert_value_def]
>- (
pop_assum mp_tac >> rw [value_type_cases] >> fs [indices_ok_def] >>
metis_tac [LIST_REL_LENGTH])
>- (
pop_assum mp_tac >> rw [value_type_cases] >> fs [indices_ok_def] >>
CASE_TAC >> fs [] >> rfs [] >>
metis_tac [EVERY_EL, LIST_REL_EL_EQN]) >>
Cases_on `t` >> fs [indices_ok_def] >> simp [value_type_cases]
QED
Theorem extract_insertvalue:
∀v1 v2 indices v3.
insert_value v1 v2 indices = Some v3
extract_value v3 indices = Some v2
Proof
recInduct insert_value_ind >> rw [insert_value_def, extract_value_def] >>
pop_assum mp_tac >> CASE_TAC >> fs [] >> rfs [] >>
rw [] >> simp [extract_value_def, EL_LUPDATE]
QED
Theorem extract_insertvalue_other:
∀v1 v2 indices1 indices2 v3.
insert_value v1 v2 indices1 = Some v3
¬(indices1 indices2) ¬(indices2 indices1)
extract_value v3 indices2 = extract_value v1 indices2
Proof
recInduct insert_value_ind >> rw [insert_value_def, extract_value_def] >>
qpat_x_assum `_ = SOME _` mp_tac >> CASE_TAC >> rw [] >> rfs [] >>
qpat_x_assum `¬case _ of [] => F | h::t => P h t` mp_tac >>
CASE_TAC >> fs [] >> rename1 `idx::is` >>
fs [extract_value_def] >> rw [EL_LUPDATE]
QED
Theorem insert_extractvalue:
∀v1 indices v2.
extract_value v1 indices = Some v2
insert_value v1 v2 indices = Some v1
Proof
recInduct extract_value_ind >> rw [insert_value_def, extract_value_def] >> fs [] >>
rw [LUPDATE_SAME]
QED
Definition indices_in_range_def:
(indices_in_range t [] T)
(indices_in_range (ArrT n t) (i::is)
i < n indices_in_range t is)
(indices_in_range (StrT ts) (i::is)
i < length ts indices_in_range (el i ts) is)
(indices_in_range _ _ F)
End
Definition extract_type_def:
(extract_type t [] = Some t)
(extract_type (ArrT n t) (i::idx) =
if i < n then
extract_type t idx
else
None)
(extract_type (StrT ts) (i::idx) =
if i < length ts then
extract_type (el i ts) idx
else
None)
(extract_type _ _ = None)
End
(* The strict inequality does not hold because of 0 length arrays *)
Theorem offset_size_leq:
∀t indices n.
indices_in_range t indices get_offset t indices = Some n
n sizeof t
Proof
recInduct get_offset_ind >> rw [get_offset_def, sizeof_def, indices_in_range_def] >>
BasicProvers.EVERY_CASE_TAC >> fs [] >> rw [] >> rfs []
>- (
`x + i * sizeof t (i + 1) * sizeof t` by decide_tac >>
`i + 1 v1` by decide_tac >>
metis_tac [LESS_MONO_MULT, LESS_EQ_TRANS]) >>
rw [MAP_TAKE, ETA_THM] >>
`take (Suc i) (map sizeof ts) = take i (map sizeof ts) ++ [sizeof (el i ts)]`
by rw [GSYM SNOC_EL_TAKE, EL_MAP] >>
`take (Suc i) (map sizeof ts) (map sizeof ts)` by rw [take_is_prefix] >>
drule sum_prefix >> rw [SUM_APPEND]
QED
Theorem value_type_is_fc:
∀t v. value_type t v first_class_type t
Proof
ho_match_mp_tac value_type_ind >> rw [first_class_type_def] >>
fs [LIST_REL_EL_EQN, EVERY_EL]
QED
Theorem extract_type_fc:
∀t is t'. extract_type t is = Some t' first_class_type t first_class_type t'
Proof
recInduct extract_type_ind >> rw [extract_type_def, first_class_type_def] >>
rw [] >> fs [] >> fs [EVERY_EL]
QED
Theorem extract_offset_size:
∀t indices n t'.
extract_type t indices = Some t'
get_offset t indices = Some n
sizeof t' sizeof t - n
Proof
recInduct get_offset_ind >> rw [get_offset_def, extract_type_def] >>
BasicProvers.EVERY_CASE_TAC >> fs [sizeof_def] >> rfs [] >> rw [ETA_THM]
>- (
`sizeof t (v1 i) * sizeof t` suffices_by decide_tac >>
`1 v1 - i` by decide_tac >>
rw []) >>
rw [MAP_TAKE] >>
`sizeof (el i ts) sum (map sizeof ts) (sum (take i (map sizeof ts)))`
suffices_by decide_tac >>
qpat_x_assum `_ < _` mp_tac >> rpt (pop_assum kall_tac) >> qid_spec_tac `i` >>
Induct_on `ts` >> rw [TAKE_def, EL_CONS, PRE_SUB1]
QED
Theorem read_from_offset_extract:
∀t indices n v t'.
indices_in_range t indices
get_offset t indices = Some n
value_type t v
extract_type t indices = Some t'
extract_value v indices = Some (fst (bytes_to_value t' (drop n (value_to_bytes v))))
Proof
recInduct get_offset_ind >>
rw [extract_value_def, get_offset_def, extract_type_def, indices_in_range_def] >>
simp [DROP_0]
>- metis_tac [APPEND_NIL, FST, b2v_v2b] >>
qpat_x_assum `value_type _ _` mp_tac >>
simp [Once value_type_cases] >> rw [] >> simp [extract_value_def] >>
qpat_x_assum `_ = Some n` mp_tac >> CASE_TAC >> rw [] >> rfs [] >>
simp [value_to_bytes_def]
>- (
`value_type t (el i vs)` by metis_tac [EVERY_EL] >>
first_x_assum drule >>
rw [] >> simp [GSYM DROP_DROP_T, ETA_THM] >>
`i * sizeof t = length (flat (take i (map value_to_bytes vs)))`
by (
simp [LENGTH_FLAT, MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF] >>
`map (λx. length (value_to_bytes x)) vs = replicate (length vs) (sizeof t)`
by (
qpat_x_assum `every _ _` mp_tac >> rpt (pop_assum kall_tac) >>
Induct_on `vs` >> rw [v2b_size]) >>
rw [take_replicate, MIN_DEF]) >>
rw [GSYM flat_drop, GSYM MAP_DROP] >>
drule DROP_CONS_EL >> simp [DROP_APPEND] >> disch_then kall_tac >>
`first_class_type t'` by metis_tac [value_type_is_fc, extract_type_fc] >>
`sizeof t' length (drop x (value_to_bytes (el i vs)))`
by (simp [LENGTH_DROP] >> drule v2b_size >> rw [] >> metis_tac [extract_offset_size]) >>
simp [b2v_append])
>- metis_tac [LIST_REL_LENGTH]
>- (
`value_type (el i ts) (el i vs)` by metis_tac [LIST_REL_EL_EQN] >>
first_x_assum drule >>
rw [] >> simp [GSYM DROP_DROP_T, ETA_THM] >>
`sum (map sizeof (take i ts)) = length (flat (take i (map value_to_bytes vs)))`
by (
simp [LENGTH_FLAT, MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF] >>
`map sizeof ts = map (\x. length (value_to_bytes x)) vs`
by (
qpat_x_assum `list_rel _ _ _` mp_tac >> rpt (pop_assum kall_tac) >>
qid_spec_tac `ts` >>
Induct_on `vs` >> rw [] >> rw [v2b_size]) >>
rw []) >>
rw [GSYM flat_drop, GSYM MAP_DROP] >>
`i < length vs` by metis_tac [LIST_REL_LENGTH] >>
drule DROP_CONS_EL >> simp [DROP_APPEND] >> disch_then kall_tac >>
`first_class_type t'` by metis_tac [value_type_is_fc, extract_type_fc] >>
`sizeof t' length (drop x (value_to_bytes (el i vs)))`
by (simp [LENGTH_DROP] >> drule v2b_size >> rw [] >> metis_tac [extract_offset_size]) >>
simp [b2v_append])
QED
export_theory ();

@ -0,0 +1,43 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
(* Basic HOL initialisation *)
open HolKernel boolLib bossLib Parse;
local open wordsLib intLib stringLib integer_wordLib finite_mapLib alistLib
in end;
new_theory "settings";
(* Reduce HOL's ancient obsession with ALL_CAPS *)
overload_on ("map", ``MAP``);
overload_on ("sum", ``SUM : num list -> num``);
overload_on ("flookup", ``FLOOKUP``);
overload_on ("length", ``LENGTH``);
overload_on ("Some", ``SOME``);
overload_on ("None", ``NONE``);
overload_on ("snd", ``SND``);
overload_on ("fst", ``FST``);
overload_on ("zip", ``ZIP``);
overload_on ("el", ``EL``);
overload_on ("count_list", ``COUNT_LIST``);
overload_on ("Suc", ``SUC``);
overload_on ("flat", ``FLAT``);
overload_on ("take", ``TAKE``);
overload_on ("drop", ``DROP``);
overload_on ("replicate", ``REPLICATE``);
overload_on ("every", ``EVERY``);
overload_on ("exists", ``EXISTS``);
overload_on ("list_rel", ``LIST_REL``);
overload_on ("reverse", ``REVERSE``);
overload_on ("log", ``LOG``);
overload_on ("option_map", ``OPTION_MAP``);
overload_on ("option_join", ``OPTION_JOIN``);
overload_on ("min", ``MIN``);
overload_on ("list_update", ``LUPDATE``);
export_theory ();
Loading…
Cancel
Save