You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

570 lines
19 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
(* A simple, concrete memory model where the heap is an array of bytes. This is
* how llair views memory, and how we give semantics to llvm. Although LLVM's
* real memory model is still a subject of research, this is mostly to do with
* enabling the right collection of compiler optimisations. Since we only want to
* think about LLVM semantics after optimisation, the concrete model is likely to
* be sufficient. *)
open HolKernel boolLib bossLib Parse;
open arithmeticTheory listTheory rich_listTheory pairTheory;
open logrootTheory numposrepTheory wordsTheory pred_setTheory finite_mapTheory;
open settingsTheory miscTheory;
new_theory "memory_model";
numLib.prefer_num ();
(* Heap addresses *)
Datatype:
addr = A num
End
(* Values that fit in registers *)
Datatype:
reg_v =
| FlatV 'a
| AggV (reg_v list)
End
(* An interval of allocated memory.
* Args: whether it's freeable, the starting address (inclusive), and the ending address
* (exclusive)
*)
Datatype:
interval = Interval bool num num
End
Datatype:
heap = <| memory : addr |-> 'a # word8;
allocations : interval set;
valid_addresses : addr set|>
End
(* shapes statically describe the shape of a value in a register. Flat shapes
* take a given number of bytes, Array shapes are replicated a certain number of
* times
*)
Datatype:
shape =
| Flat num 'a
| Array shape num
| Tuple (shape list)
End
(* Does a value have a given shape. The function argument answers the question
* for flat values/shapes.
* We use mutual recursion instead of list_rel to work around a HOL bug. *)
Definition value_shape_def:
(value_shape f (Flat n t) (FlatV x) f n t x)
(value_shape f (Array s n) (AggV vs)
every (value_shape f s) vs length vs = n)
(value_shape f (Tuple ss) (AggV vs)
value_shapes f ss vs)
(value_shape _ _ _ F)
(value_shapes f [] [] T)
(value_shapes f (s::ss) (v::vs)
value_shape f s v value_shapes f ss vs)
(value_shapes _ _ _ F)
Termination
WF_REL_TAC `measure (\x. case x of | INL (_, x, _) => shape_size (\x.0) x | INR (_, y, _) => shape1_size (\x.0) y)`
End
Theorem value_shapes_list_rel:
∀f ss vs. value_shapes f ss vs list_rel (value_shape f) ss vs
Proof
Induct_on `ss` >> Cases_on `vs` >> rw [value_shape_def]
QED
Theorem value_shape_cases:
∀f s v.
value_shape f s v
(∃n t x. s = Flat n t v = FlatV x f n t x)
(∃s2 n vs. s = Array s2 n v = AggV vs
every (value_shape f s2) vs length vs = n)
(∃ss vs. s = Tuple ss v = AggV vs value_shapes f ss vs)
Proof
rw [] >>
Cases_on `s` >> Cases_on `v` >> rw [value_shape_def] >> metis_tac []
QED
Definition sizeof_def:
(sizeof (Flat n t) = n)
(sizeof (Array s n) = n * sizeof s)
(sizeof (Tuple ss) = sum (map sizeof ss))
Termination
WF_REL_TAC `measure (shape_size (\x.0))` >> rw [] >>
Induct_on `ss` >> rw [definition "shape_size_def"] >>
res_tac >> decide_tac
End
Definition interval_to_set_def:
interval_to_set (Interval _ start stop) =
{ n | start n n < stop }
End
Definition interval_ok_def:
interval_ok (Interval b i1 i2) valid_addresses
i1 i2 image A (interval_to_set (Interval b i1 i2)) valid_addresses
End
Definition interval_freeable_def:
interval_freeable (Interval b _ _) b
End
Definition is_allocated_def:
is_allocated b1 h
interval_ok b1 h.valid_addresses
∃b2. b2 h.allocations (interval_freeable b1 interval_freeable b2)
interval_to_set b1 interval_to_set b2
End
Definition is_free_def:
is_free b1 h
interval_ok b1 h.valid_addresses
∀b2. b2 h.allocations interval_to_set b1 interval_to_set b2 =
End
(* The allocations are of intervals that don't overlap *)
Definition allocations_ok_def:
allocations_ok h
∀i1 i2.
i1 h.allocations i2 h.allocations
interval_ok i1 h.valid_addresses interval_ok i2 h.valid_addresses
(interval_to_set i1 interval_to_set i2 i1 = i2)
End
(* The heap maps exactly the address in the allocations *)
Definition heap_ok_def:
heap_ok h
allocations_ok h
∀n. flookup h.memory (A n) None ∃i. i h.allocations n interval_to_set i
End
Definition get_bytes_def:
get_bytes h (Interval _ start stop) =
map
(λoff.
case flookup h.memory (A (start + off)) of
| Some w => w)
(count_list (stop - start))
End
Definition set_bytes_def:
(set_bytes p [] n h = h)
(set_bytes p (b::bs) n h =
set_bytes p bs (Suc n) (h with memory := h.memory |+ (A n, (p, b))))
End
(* Allocate a free chunk of memory, and write non-deterministic bytes into it *)
Inductive allocate:
b = Interval T p (p + size)
is_free b h
length bytes = size
allocate h size tag (p, set_bytes tag bytes p
<| memory := h.memory;
allocations := { b } h.allocations;
valid_addresses := h.valid_addresses |>)
End
Definition deallocate_def:
deallocate addrs h =
let to_remove = { Interval T n stop | A n set addrs Interval T n stop h.allocations } in
<| memory := fdiff h.memory (image A (bigunion (image interval_to_set to_remove)));
allocations := h.allocations DIFF to_remove;
valid_addresses := h.valid_addresses |>
End
(* Read len bytes from the list of bytes, and convert it into a word value,
* little-endian encoding *)
Definition le_read_w_def:
le_read_w len (bs : word8 list) =
if length bs < len then
(l2w 256 (map w2n bs), [])
else
(l2w 256 (map w2n (take len bs)), drop len bs)
End
(* Return len bytes that are the little-endian encoding of the argument word *)
Definition le_write_w_def:
le_write_w len w =
let (l : word8 list) = map n2w (w2l 256 w) in
take len (l ++ replicate (len - length l) 0w)
End
(* Read a from a list of bytes bs to build a register value described by a
* shape. The function f is applied to the word read from a flat list of bytes
* in little-endian order. *)
Definition bytes_to_value_def:
(bytes_to_value f (Flat n t) bs = (FlatV o f n t ## I) (le_read_w n bs))
(bytes_to_value f (Array s n) bs = (AggV ## I) (read_array f n s bs))
(bytes_to_value f (Tuple ts) bs = (AggV ## I) (read_str f ts bs))
(read_array f 0 s bs = ([], bs))
(read_array f (Suc n) s bs =
let (v, bs) = bytes_to_value f s bs in
let (rest, bs) = read_array f n s bs in
(v::rest, bs))
(read_str f [] bs = ([], bs))
(read_str f (s::ss) bs =
let (v, bs) = bytes_to_value f s bs in
let (rest, bs) = read_str f ss bs in
(v::rest, bs))
Termination
WF_REL_TAC `measure (λx. case x of
| INL (_, t, bs) => shape_size (\x.0) t
| INR (INL (_, n, t, bs)) => n + shape_size (\x.0) t
| INR (INR (_, ts, bs)) => shape1_size (\x.0) ts)`
End
(* Convert the given value to a list of bytes, the function gives the size, and
* a machine word for a flat value *)
Definition value_to_bytes_def:
(value_to_bytes f (FlatV x) =
let (size, word) = f x in
le_write_w size word)
(value_to_bytes f (AggV vs) = flat (map (value_to_bytes f) vs))
Termination
WF_REL_TAC `measure (reg_v_size (\x. 0) o snd)` >>
Induct >> rw [definition "reg_v_size_def"] >>
TRY (first_x_assum drule) >>
decide_tac
End
(* ----- Theorems about converting between values and byte lists ----- *)
Theorem le_write_w_length:
∀l x. length (le_write_w l w) = l
Proof
rw [le_write_w_def]
QED
Theorem v2b_size_lem:
(∀(f:num->'a->'b->bool) s v. (∀n t x. f n t x fst (g x) = n) value_shape f s v length (value_to_bytes g v) = sizeof s)
(∀(f:num->'a->'b->bool) ss vs. (∀n t x. f n t x fst (g x) = n) value_shapes f ss vs sum (map (length o value_to_bytes g) vs) = sum (map sizeof ss))
Proof
ho_match_mp_tac value_shape_ind >>
rw [value_to_bytes_def, sizeof_def, value_shape_def]
>- (
pairarg_tac >> simp [] >>
metis_tac [FST, le_write_w_length])
>- (
simp [LENGTH_FLAT, MAP_MAP_o, combinTheory.o_DEF] >>
Induct_on `vs` >> rw [ADD1] >> fs [LEFT_ADD_DISTRIB] >>
metis_tac [])
>- (fs [LENGTH_FLAT, ETA_THM, MAP_MAP_o, combinTheory.o_DEF] >> metis_tac [])
>- metis_tac [ADD_COMM]
QED
Theorem v2b_size:
∀f s v g. (∀n t x. f n t x fst (g x) = n) value_shape f s v length (value_to_bytes g v) = sizeof s
Proof
metis_tac [v2b_size_lem]
QED
Theorem b2v_size_lem:
(∀(f:num->'c ->'a word -> 'b) s bs. sizeof s length bs
∃v. bytes_to_value f s bs = (v, drop (sizeof s) bs))
(∀(f:num->'c ->'a word -> 'b) n s bs. n * sizeof s length bs
∃vs. read_array f n s bs = (vs, drop (n * sizeof s) bs))
(∀(f:num->'c ->'a word -> 'b) ss bs. sum (map sizeof ss) length bs
∃vs. read_str f ss bs = (vs, drop (sum (map sizeof ss)) bs))
Proof
ho_match_mp_tac bytes_to_value_ind >>
rw [sizeof_def, bytes_to_value_def, le_read_w_def] >>
fs []
>- (simp [PAIR_MAP] >> metis_tac [SND])
>- (
pairarg_tac >> rw [] >> pairarg_tac >> rw [] >>
fs [ADD1] >> rw [] >> fs [DROP_DROP_T, LEFT_ADD_DISTRIB])
>- fs [DROP_DROP_T, LEFT_ADD_DISTRIB]
QED
Theorem b2v_size:
∀f s bs. sizeof s length bs
∃v. bytes_to_value f s bs = (v, drop (sizeof s) bs)
Proof
metis_tac [b2v_size_lem]
QED
Theorem b2v_smaller:
∀f s bs. sizeof s length bs
length (snd (bytes_to_value f s bs)) = length bs - sizeof s
Proof
rw [] >> imp_res_tac b2v_size >>
Cases_on `bytes_to_value f s bs` >> fs [] >>
first_x_assum (qspec_then `f` mp_tac) >> rw []
QED
Theorem b2v_append_lem:
(∀(f:num->'c->'a word -> 'b) s bs. sizeof s length bs
bytes_to_value f s (bs ++ bs') = (I ## (λx. x ++ bs')) (bytes_to_value f s bs))
(∀(f:num->'c->'a word -> 'b) n s bs. n * sizeof s length bs
∃vs. read_array f n s (bs ++ bs') = (I ## (λx. x ++ bs')) (read_array f n s bs))
(∀(f:num->'c->'a word -> 'b) ss bs. sum (map sizeof ss) length bs
∃vs. read_str f ss (bs ++ bs') = (I ## (λx. x ++ bs')) (read_str f ss bs))
Proof
ho_match_mp_tac bytes_to_value_ind >>
rw [sizeof_def, bytes_to_value_def, le_read_w_def] >>
fs [TAKE_APPEND, DROP_APPEND,
DECIDE ``!x y. x y x - y = 0n``, ETA_THM]
>- (simp [PAIR_MAP] >> metis_tac [SND])
>- (simp [PAIR_MAP] >> metis_tac [SND])
>- (
rpt (pairarg_tac >> simp []) >> fs [ADD1] >>
BasicProvers.VAR_EQ_TAC >> fs [LEFT_ADD_DISTRIB] >>
first_x_assum irule >>
`sizeof s length bs` by decide_tac >>
qspec_then `f` drule b2v_smaller >>
disch_then (qspec_then `f` mp_tac) >>
rw [])
>- (
rpt (pairarg_tac >> simp []) >> fs [ADD1] >>
BasicProvers.VAR_EQ_TAC >> fs [LEFT_ADD_DISTRIB] >>
first_x_assum irule >>
`sizeof s length bs` by decide_tac >>
qspec_then `f` drule b2v_smaller >>
disch_then (qspec_then `f` mp_tac) >>
rw [])
QED
Theorem b2v_append:
∀f s bs bs'. sizeof s length bs
bytes_to_value f s (bs ++ bs') = (I ## (λx. x ++ bs')) (bytes_to_value f s bs)
Proof
metis_tac [b2v_append_lem]
QED
Theorem le_read_write:
∀n w bs.
w2n w < 256 ** n le_read_w n (le_write_w n w bs) = (w, bs)
Proof
rw [le_read_w_def, le_write_w_length]
>- (
`take n (le_write_w n w bs) = le_write_w n w`
by metis_tac [le_write_w_length, TAKE_LENGTH_APPEND] >>
simp [le_write_w_def, w2l_def, l2w_def] >>
Cases_on `w` >> simp [] >> fs [l2n_padding, TAKE_APPEND, take_replicate] >>
simp [MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF, mod_n2l] >>
rename1 `n2l 256 m` >>
Cases_on `n = 0` >> fs [] >>
`length (n2l 256 m) n`
by (
rw [LENGTH_n2l] >>
`256 = 2 ** 8` by EVAL_TAC >>
ASM_REWRITE_TAC [] >> simp [log_change_base_power, GSYM LESS_EQ] >>
`n2w m 0w` by simp [] >>
rw [] >> fs [bitTheory.LOG2_def, dimword_def] >>
`8 * (log 2 m DIV 8) log 2 m` by metis_tac [mul_div_bound, EVAL ``8 0n``] >>
`LOG 2 m LOG 2 (256 ** n)` by simp [LOG_LE_MONO] >>
pop_assum mp_tac >>
`256 = 2 ** 8` by EVAL_TAC >>
ASM_REWRITE_TAC [EXP_MUL] >> simp [log_base_power] >>
Cases_on `log 2 m DIV 8 = n` >> rw [] >>
CCONTR_TAC >> fs [] >>
`log 2 m = (8 * (log 2 m DIV 8))` by intLib.COOPER_TAC >> fs [] >>
`2 ** log 2 m = 2 ** (8 * (log 2 m DIV 8))` by rw [] >>
fs [EXP_EXP_MULT] >>
`2 ** log 2 m m` by rw [exp_log_bound] >>
decide_tac) >>
simp [mod_n2l, l2n_n2l, TAKE_LENGTH_TOO_LONG])
>- metis_tac [le_write_w_length, DROP_LENGTH_APPEND]
QED
Theorem le_write_read:
∀n w bs bs'.
256 ** n dimword (:'a)
n length bs
le_read_w n bs = (w:'a word, bs')
le_write_w n w ++ bs' = bs
Proof
rw [le_read_w_def] >>
qmatch_goalsub_abbrev_tac `l2w _ l` >>
`le_write_w n (l2w 256 l) = take n bs` suffices_by metis_tac [TAKE_DROP] >>
simp [le_write_w_def, w2l_l2w] >>
`l2n 256 l < 256 ** n`
by (
`n length bs` by decide_tac >>
metis_tac [l2n_lt, LENGTH_TAKE, LENGTH_MAP, EVAL ``0n < 256``]) >>
fs [] >>
`every ($> 256) l`
by (
simp [EVERY_MAP, Abbr `l`] >> irule EVERY_TAKE >> simp [] >>
rpt (pop_assum kall_tac) >>
Induct_on `bs` >> rw [] >>
Cases_on `h` >> fs []) >>
rw [n2l_l2n]
>- (
rw [TAKE_def, take_replicate] >>
Cases_on `n` >> fs [] >>
rfs [l2n_0] >> unabbrev_all_tac >> fs [EVERY_MAP] >>
ONCE_REWRITE_TAC [GSYM REPLICATE] >>
qmatch_goalsub_abbrev_tac `take n _` >>
qpat_assum `¬(_ < _)` mp_tac >>
qpat_assum `every (\x. 0 = w2n x) _` mp_tac >>
rpt (pop_assum kall_tac) >>
qid_spec_tac `bs` >>
Induct_on `n` >> rw [] >>
Cases_on `bs` >> rw [] >> fs [] >>
Cases_on `h` >> fs [] >>
first_x_assum irule >> rw [] >>
irule MONO_EVERY >>
qexists_tac `(λx. 0 = w2n x)` >> rw []) >>
fs [MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF] >>
`exists (\y. 0 y) l`
by (
fs [l2n_eq_0, combinTheory.o_DEF] >> fs [EXISTS_MEM, EVERY_MEM] >>
qexists_tac `x` >> rfs [MOD_MOD, GREATER_DEF]) >>
simp [LOG_l2n_dropWhile] >>
`length (dropWhile ($= 0) (reverse l)) 0`
by (
Cases_on `l` >> fs [dropWhile_eq_nil, combinTheory.o_DEF, EXISTS_REVERSE]) >>
`0 < length (dropWhile ($= 0) (reverse l))` by decide_tac >>
fs [SUC_PRE] >>
`map n2w l = take n bs`
by (simp [Abbr `l`, MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF, n2w_w2n]) >>
simp [TAKE_TAKE_MIN] >>
`length l = n` by simp [Abbr `l`] >>
`length (dropWhile ($= 0) (reverse l)) n`
by metis_tac [LESS_EQ_TRANS, LENGTH_dropWhile_LESS_EQ, LENGTH_REVERSE] >>
rw [MIN_DEF] >> fs []
>- (
simp [TAKE_APPEND, TAKE_TAKE_MIN, MIN_DEF, take_replicate] >>
`replicate (length l length (dropWhile ($= 0) (reverse l))) 0w =
take (length l (length (dropWhile ($= 0) (reverse l)))) (drop (length (dropWhile ($= 0) (reverse l))) bs)`
suffices_by (rw [] >> irule take_drop_partition >> simp []) >>
rw [LIST_EQ_REWRITE, EL_REPLICATE, EL_TAKE, EL_DROP] >>
`length (dropWhile ($= 0) (reverse l)) =
length (dropWhile (λx. 0 = w2n x) (reverse (take (length l) bs)))`
by (
`reverse l = reverse (take (length l) (map w2n bs))` by metis_tac [] >>
ONCE_ASM_REWRITE_TAC [] >>
qpat_x_assum `Abbrev (l = _)` kall_tac >>
simp [GSYM MAP_TAKE, GSYM MAP_REVERSE, dropWhile_map, combinTheory.o_DEF]) >>
fs [] >>
`x + length (dropWhile (λx. 0 = w2n x) (reverse (take (length l) bs))) < length l` by decide_tac >>
drule (SIMP_RULE std_ss [LET_THM] dropWhile_rev_take) >>
rw [] >>
REWRITE_TAC [GSYM w2n_11, word_0_n2w] >>
simp [])
>- rw [TAKE_APPEND, TAKE_TAKE]
QED
Theorem b2v_v2b_lem:
(∀f s v.
value_shape f s v
(∀x t n. f n t x fst (h x) = n g n t (snd (h x)) = x w2n (snd (h x)) < 256 ** n)
∀bs.
bytes_to_value (g:num -> 'c -> 'a word -> 'b) s (value_to_bytes h v ++ bs) = (v, bs))
(∀f ss vs.
value_shapes f ss vs
(∀x t n. f n t x fst (h x) = n g n t (snd (h x)) = x w2n (snd (h x)) < 256 ** n)
∀bs.
read_str (g:num -> 'c -> 'a word -> 'b) ss (flat (map (value_to_bytes h) vs) ++ bs) = (vs, bs))
Proof
ho_match_mp_tac value_shape_ind >> rw [value_shape_def] >>
fs [bytes_to_value_def, value_to_bytes_def] >> rw []
>- (
pairarg_tac >> fs [] >>
first_x_assum drule >> simp [] >> rw [] >>
simp [le_read_write])
>- (
qmatch_abbrev_tac `_ x = _` >>
`x = (vs, bs)` suffices_by (simp [PAIR_MAP] >> metis_tac [PAIR_EQ, FST, SND]) >>
unabbrev_all_tac >>
qid_spec_tac `bs` >> Induct_on `vs` >> simp [bytes_to_value_def] >>
rw [] >> fs [definition "reg_v_size_def"] >>
pairarg_tac >> fs [] >>
pairarg_tac >> fs [] >>
rename1 `value_shape _ s v1` >>
qpat_x_assum `_ !bs. P bs` mp_tac >> impl_tac >> simp [] >>
metis_tac [APPEND_ASSOC, PAIR_EQ])
>- (
qmatch_abbrev_tac `_ x = _` >>
`x = (vs, bs)` suffices_by (simp [PAIR_MAP] >> metis_tac [PAIR_EQ, FST, SND]) >>
metis_tac [])
>- (
pairarg_tac >> fs [] >>
pairarg_tac >> fs [] >>
metis_tac [APPEND_ASSOC, PAIR_EQ])
QED
Theorem b2v_v2b:
∀f g h s v bs.
value_shape f s v
(∀x n t. f n t x fst (h x) = n g n t (snd (h x)) = x w2n (snd (h x)) < 256 ** n)
bytes_to_value g s (value_to_bytes h v ++ bs) = (v, bs)
Proof
metis_tac [b2v_v2b_lem]
QED
Theorem flookup_set_bytes:
∀tag bytes n h n'.
flookup ((set_bytes tag bytes n h).memory) (A n') =
if n n' n' < n + length bytes then
Some (tag, el (n' - n) bytes)
else
flookup h.memory (A n')
Proof
Induct_on `bytes` >> rw [set_bytes_def, EL_CONS, PRE_SUB1] >>
fs [ADD1, FLOOKUP_UPDATE] >>
`n = n'` by decide_tac >>
rw []
QED
Theorem set_bytes_unchanged:
∀t bs p h. (set_bytes t bs p h).valid_addresses = h.valid_addresses
(set_bytes t bs p h).allocations = h.allocations
Proof
Induct_on `bs` >> rw [set_bytes_def]
QED
Theorem allocate_unchanged:
∀h1 v1 t h2 v2.
allocate h1 v1 t (v2, h2)
h1.valid_addresses = h2.valid_addresses
h1.allocations h2.allocations
Proof
rw [allocate_cases] >> rw [set_bytes_unchanged]
QED
Theorem allocate_heap_ok:
∀h1 v1 t v2 h2. heap_ok h1 allocate h1 v1 t (v2,h2) heap_ok h2
Proof
rw [allocate_cases, heap_ok_def]
>- (
fs [allocations_ok_def] >> rpt gen_tac >> disch_tac >> fs [is_free_def] >> rw [] >>
fs [set_bytes_unchanged] >> metis_tac [INTER_COMM])
>- (
rw [flookup_set_bytes, set_bytes_unchanged]
>- rw [RIGHT_AND_OVER_OR, EXISTS_OR_THM, interval_to_set_def] >>
eq_tac >> rw [] >> fs [interval_to_set_def] >>
metis_tac [])
QED
Theorem set_bytes_heap_ok:
∀h tag bytes n b.
heap_ok h is_allocated (Interval b n (n + length bytes)) h
heap_ok (set_bytes tag bytes n h)
Proof
rw [heap_ok_def]
>- (fs [allocations_ok_def] >> rw [set_bytes_unchanged] >> metis_tac [])
>- (
fs [flookup_set_bytes, set_bytes_unchanged] >> rw [] >>
fs [is_allocated_def, interval_to_set_def, SUBSET_DEF] >>
metis_tac [LESS_EQ_REFL, DECIDE ``!x y. x < x + SUC y``])
QED
export_theory ();