From f298d728c55364da442f712b51cf042b0297910f Mon Sep 17 00:00:00 2001 From: Scott Owens Date: Thu, 5 Sep 2019 09:39:26 -0700 Subject: [PATCH] [sledge sem] Start sketching translation correctness Summary: This includes a few changes and corrections to the semantics, to support the translation. This initial attempt to reason about LLVM -> llair showed three things that needed repair in the semantics, in addition to various bugs. We address them as follows. Refactor llair semantics to have only a single kind of flat value: integers that fit into specified bit widths. Operations on size values (e.g., offsets, indices and the like) can just take an integer and ignore its number of bits. Pointers can just be considered integers that fit into a certain size given by the constant pointer_size. Later on we can consider making this a parameter to the model. Change the generic memory model interface to use numbers rather than words as the generic encoding of a large value. This makes it more useful for llair where words are not used. Pay more careful attention to signed/unsigned issues. Neither LLVM nor llair have a concept of signed vs unsigned value. Instead individual operations interpret bit patterns in various ways, some of which are ambiguous in the LLVM manual. For example, since getelementpointer's indices are explicitly said to be interpreted as signed 2's complement, we should probably do the same for insertvalue and extractvalue. However it is not clear how the argument to alloca is to be interpreted. For now we assume signed. Reviewed By: jberdine Differential Revision: D17164133 fbshipit-source-id: 31a8af635 --- sledge/semantics/llairScript.sml | 228 +++++++------- sledge/semantics/llair_propScript.sml | 141 +++++++++ sledge/semantics/llvmScript.sml | 235 +++++++++------ sledge/semantics/llvm_propScript.sml | 2 +- sledge/semantics/llvm_to_llairScript.sml | 51 +++- sledge/semantics/llvm_to_llair_propScript.sml | 278 ++++++++++++++++++ sledge/semantics/memory_modelScript.sml | 123 ++++---- sledge/semantics/miscScript.sml | 74 ++++- sledge/semantics/settingsScript.sml | 2 + 9 files changed, 865 insertions(+), 269 deletions(-) create mode 100644 sledge/semantics/llair_propScript.sml create mode 100644 sledge/semantics/llvm_to_llair_propScript.sml diff --git a/sledge/semantics/llairScript.sml b/sledge/semantics/llairScript.sml index e75a92971..090c75056 100644 --- a/sledge/semantics/llairScript.sml +++ b/sledge/semantics/llairScript.sml @@ -19,6 +19,7 @@ numLib.prefer_num (); Datatype: typ = | FunctionT typ (typ list) + (* How many bits the integer occupies *) | IntegerT num | PointerT typ | ArrayT typ num @@ -121,13 +122,11 @@ End (* ----- Semantic states ----- *) (* These are the values that can be stored in registers. The implementation uses - * integers with a bit-width to represent numbers, and keeps locations and sizes - * separate. + * integers with a bit-width to represent numbers and pointers. Here we + * interpret the bit width b as meaning the int should be in the range [-2^(b-1),2^(b-1)) *) Datatype: flat_v = - | LocV num - | SizeV num | IntV int num End @@ -143,34 +142,28 @@ Datatype: globals : var |-> word64; locals : var |-> v; stack : frame list; - pointer_size : 'a itself; heap : unit heap |> End +(* Assume that all pointers can fit in 64 bits *) +Definition pointer_size_def: + pointer_size = 64 +End + (* ----- Semantic transitions ----- *) +(* The size of a type in bytes, rounded up *) Definition sizeof_def: - (sizeof (:'a) (IntegerT n) = n) ∧ - (sizeof (:'a) (PointerT t) = dimindex (:'a)) ∧ - (sizeof (:'a) (ArrayT t n) = n * sizeof (:'a) t) ∧ - (sizeof (:'a) (TupleT ts) = sum (map (sizeof (:'a)) ts)) + (sizeof (IntegerT n) = (n+7) DIV 8) ∧ + (sizeof (PointerT t) = (pointer_size+7) DIV 8) ∧ + (sizeof (ArrayT t n) = n * sizeof t) ∧ + (sizeof (TupleT ts) = sum (map sizeof ts)) Termination - WF_REL_TAC `measure (typ_size o snd)` >> simp [] >> + WF_REL_TAC `measure typ_size` >> simp [] >> Induct >> rw [definition "typ_size_def"] >> simp [] >> first_x_assum drule >> decide_tac End -Definition type_to_shape_def: - (type_to_shape (:'a) (IntegerT n) = Flat (sizeof (:'a) (IntegerT n)) (IntegerT n)) ∧ - (type_to_shape (:'a) (ArrayT t n) = Array (type_to_shape (:'a) t) n) ∧ - (type_to_shape (:'a) (TupleT ts) = Tuple (map (type_to_shape (:'a)) ts)) -Termination - WF_REL_TAC `measure (typ_size o snd)` >> - rw [] >> - Induct_on `ts` >> rw [definition "typ_size_def"] >> - res_tac >> decide_tac -End - Definition first_class_type_def: (first_class_type (IntegerT _) ⇔ T) ∧ (first_class_type (PointerT _) ⇔ T) ∧ @@ -185,34 +178,53 @@ Termination End Inductive value_type: - (value_type (IntegerT n) (FlatV (IntV i n))) ∧ - (value_type (PointerT _) (FlatV (LocV n))) ∧ - (every (value_type t) vs ∧ length vs = n ∧ first_class_type t + (∀n i. value_type (IntegerT n) (FlatV (IntV i n))) ∧ + (∀t vs n. + every (value_type t) vs ∧ length vs = n ∧ first_class_type t ⇒ value_type (ArrayT t n) (AggV vs)) ∧ - (list_rel value_type ts vs + (∀ts vs. + list_rel value_type ts vs ⇒ - value_type (StrT ts) (AggV vs)) + value_type (TupleT ts) (AggV vs)) End Definition bool2v_def: bool2v b = FlatV (IntV (if b then 1 else 0) 1) End -Definition int2unsigned_def: - int2unsigned (i:int) size = +(* The integer, interpreted as 2's complement, fits in the given number of bits *) +Definition ifits_def: + ifits (i:int) size ⇔ + 0 < size ∧ -(2 ** (size - 1)) ≤ i ∧ i < 2 ** (size - 1) +End + +(* The natural number, interpreted as unsigned, fits in the given number of bits *) +Definition nfits_def: + nfits (n:num) size ⇔ + 0 < size ∧ n < 2 ** size +End + +(* Convert an integer to an unsigned number, following the 2's complement, + * assuming (ifits i size) *) +Definition i2n_def: + i2n (IntV i size) : num = if i < 0 then - 256 ** size + i + Num (2 ** size + i) else - i + Num i End -Definition fits_def: - fits (i:int) size ⇔ - 0 < size ∧ 0 - (256 ** (size - 1)) ≤ i ∧ i < 256 ** (size - 1) +(* Convert an unsigned number into the integer that it would be in 2's + * compliment with the given size, assuming (nfits n size) *) +Definition n2i_def: + n2i n size = + if 2 ** (size - 1) ≤ n then + (IntV (&n - &(2 ** size)) size) + else + (IntV (&n) size) End -(* TODO: We never create anything that is a SizeV *) Inductive eval_exp: (∀s v r. flookup s.locals v = Some r @@ -222,17 +234,19 @@ Inductive eval_exp: (* TODO: Nondet I guess we need to know the type here *) (* TODO: Label *) - (∀s e1 e2 size byte. - eval_exp s e1 (FlatV (IntV byte 1)) ∧ - eval_exp s e2 (FlatV (SizeV size)) + (∀s e1 e2 n byte n_size. + eval_exp s e1 (FlatV (IntV byte 8)) ∧ + (* This idiom means that e2 evaluates to a non-negative integer n, and is + * used throughout *) + eval_exp s e2 (FlatV (IntV (&n) n_size)) ⇒ - eval_exp s (Splat e1 e2) (AggV (replicate size (FlatV (IntV byte 1))))) ∧ + eval_exp s (Splat e1 e2) (AggV (replicate n (FlatV (IntV byte 8))))) ∧ (* TODO Question: What if size <> vals? *) - (∀s e1 e2 size vals. + (∀s e1 e2 l vals n_size. eval_exp s e1 (AggV vals) ∧ - eval_exp s e2 (FlatV (SizeV size)) ∧ - size = length vals + eval_exp s e2 (FlatV (IntV (&l) n_size)) ∧ + l = length vals ⇒ eval_exp s (Memory e1 e2) (AggV vals)) ∧ @@ -242,7 +256,7 @@ Inductive eval_exp: eval_exp s (Concat es) (AggV (flat vals))) ∧ (∀s i size. - eval_exp s (Integer i (IntegerT size)) (FlatV (IntV i size))) ∧ + eval_exp s (Integer i (IntegerT size)) (FlatV (IntV (truncate_2comp i size) size))) ∧ (* TODO Question: Should the same integer with two different sizes be equal *) (∀s e1 e2 r1 r2. @@ -254,136 +268,146 @@ Inductive eval_exp: (∀s e1 e2 i1 size1 i2 size2. eval_exp s e1 (FlatV (IntV i1 size1)) ∧ eval_exp s e2 (FlatV (IntV i2 size2)) ∧ - fits i1 size1 ∧ - fits i2 size2 + ifits i1 size1 ∧ + ifits i2 size2 ⇒ eval_exp s (Lt e1 e2) (bool2v (i1 < i2))) ∧ - (∀s e1 e2 i1 size1 i2 size2. + (∀s e1 e2 i1 i2 size1 size2. eval_exp s e1 (FlatV (IntV i1 size1)) ∧ eval_exp s e2 (FlatV (IntV i2 size2)) ∧ - fits i1 size1 ∧ - fits i2 size2 + ifits i1 size1 ∧ + ifits i2 size2 ⇒ - eval_exp s (Ult e1 e2) (bool2v (int2unsigned i1 size1 < int2unsigned i2 size2))) ∧ + eval_exp s (Ult e1 e2) (bool2v (i2n (IntV i1 size1) < i2n (IntV i2 size2)))) ∧ (∀s size e1 e2 i1 i2. eval_exp s e1 (FlatV (IntV i1 size)) ∧ - eval_exp s e2 (FlatV (IntV i2 size)) ∧ - fits i1 size ∧ - fits i2 size + eval_exp s e2 (FlatV (IntV i2 size)) ⇒ - eval_exp s (Sub (IntegerT size) e1 e2) (FlatV (IntV (i1 - i2) size))) ∧ + eval_exp s (Sub (IntegerT size) e1 e2) (FlatV (IntV (truncate_2comp (i1 - i2) size) size))) ∧ (∀s es vals. list_rel (eval_exp s) es vals ⇒ eval_exp s (Record es) (AggV vals)) ∧ - (∀s e1 e2 vals size. + (∀s e1 e2 vals idx idx_size. eval_exp s e1 (AggV vals) ∧ - eval_exp s e2 (FlatV (SizeV size)) ∧ - size < length vals + eval_exp s e2 (FlatV (IntV (&idx) idx_size)) ∧ + idx < length vals ⇒ - eval_exp s (Select e1 e2) (el size vals)) ∧ + eval_exp s (Select e1 e2) (el idx vals)) ∧ - (∀s e1 e2 e3 vals size r. + (∀s e1 e2 e3 vals r idx idx_size. eval_exp s e1 (AggV vals) ∧ - eval_exp s e2 (FlatV (SizeV size)) ∧ + eval_exp s e2 (FlatV (IntV (&idx) idx_size)) ∧ eval_exp s e3 r ∧ - size < length vals + idx < length vals ⇒ - eval_exp s (Update e1 e2 e3) (AggV (list_update r size vals))) + eval_exp s (Update e1 e2 e3) (AggV (list_update r idx vals))) End +(* BEGIN Functions to interface to the generic memory model *) +Definition type_to_shape_def: + (type_to_shape (IntegerT n) = Flat (sizeof (IntegerT n)) (IntegerT n)) ∧ + (type_to_shape (ArrayT t n) = Array (type_to_shape t) n) ∧ + (type_to_shape (TupleT ts) = Tuple (map type_to_shape ts)) +Termination + WF_REL_TAC `measure typ_size` >> + rw [] >> + Induct_on `ts` >> rw [definition "typ_size_def"] >> + res_tac >> decide_tac +End + Definition convert_value_def: - (convert_value (IntegerT n) w = IntV (w2i w) n) ∧ - (convert_value (PointerT t) w = LocV (w2n w)) + convert_value (IntegerT size) n = IntV (&n) size End Definition bytes_to_llair_value_def: - bytes_to_llair_value (:'a) t bs = - (bytes_to_value (λn t (w:'a word). convert_value t w) (type_to_shape (:'a) t) bs) + bytes_to_llair_value t bs = + (bytes_to_value (λn t w. convert_value t w) (type_to_shape t) bs) End Definition unconvert_value_def: - (unconvert_value (:'a) (IntV i size) = (size, i2w i)) ∧ - (unconvert_value (:'a) (LocV n) = (dimindex (:'a), n2w n)) + unconvert_value (IntV i size) = ((size + 7) DIV 8, i2n (IntV i size)) End Definition llair_value_to_bytes_def: - llair_value_to_bytes (:'a) v = - value_to_bytes (unconvert_value (:'a) : flat_v -> num # 'a word) v + llair_value_to_bytes v = + value_to_bytes unconvert_value v End +(* END Functions to interface to the generic memory model *) Definition update_results_def: update_results xvs s = s with locals := s.locals |++ xvs End Inductive step_inst: - (∀s ves r. - eval_exp s e r + (∀s ves rs. + list_rel (eval_exp s) (map snd ves) rs ⇒ step_inst s (Move ves) - (update_results (map (λ(v,e). (v, r)) ves) s)) ∧ + (update_results (map (λ(v,r). (v, r)) (zip (map fst ves, rs))) s)) ∧ - (∀(s:'a state) x t e size ptr freeable interval bytes. - eval_exp s e (FlatV (LocV ptr)) ∧ - interval = Interval freeable ptr (ptr + size) ∧ + (∀s x t e size ptr freeable interval bytes. + eval_exp s e (FlatV ptr) ∧ + interval = Interval freeable (i2n ptr) (i2n ptr + size) ∧ is_allocated interval s.heap ∧ bytes = map snd (get_bytes s.heap interval) ⇒ step_inst s (Load (Var_name x t) e size) - (update_results [(Var_name x t, fst (bytes_to_llair_value (:'a) t bytes))] s)) ∧ + (update_results [(Var_name x t, fst (bytes_to_llair_value t bytes))] s)) ∧ (∀s e1 e2 size ptr bytes freeable interval r. - eval_exp s e1 (FlatV (LocV ptr)) ∧ + eval_exp s e1 (FlatV ptr) ∧ eval_exp s e2 r ∧ - interval = Interval freeable ptr (ptr + size) ∧ + interval = Interval freeable (i2n ptr) (i2n ptr + size) ∧ is_allocated interval s.heap ∧ - bytes = llair_value_to_bytes (:'a) r ∧ + bytes = llair_value_to_bytes r ∧ length bytes = size ⇒ step_inst s (Store e1 e2 size) - (s with heap := set_bytes () bytes ptr s.heap)) ∧ + (s with heap := set_bytes () bytes (i2n ptr) s.heap)) ∧ (* TODO memset *) (∀s e1 e2 e3 dest_ptr src_ptr size src_interval freeable1 freeable2 bytes. - eval_exp s e1 (FlatV (LocV dest_ptr)) ∧ - eval_exp s e2 (FlatV (LocV src_ptr)) ∧ - eval_exp s e3 (FlatV (SizeV size)) ∧ - src_interval = Interval freeable1 src_ptr (src_ptr + size) ∧ + eval_exp s e1 (FlatV dest_ptr) ∧ + eval_exp s e2 (FlatV src_ptr) ∧ + eval_exp s e3 (FlatV size) ∧ + src_interval = Interval freeable1 (i2n src_ptr) (i2n src_ptr + i2n size) ∧ is_allocated src_interval s.heap ∧ - is_allocated (Interval freeable2 dest_ptr (dest_ptr + size)) s.heap ∧ + is_allocated (Interval freeable2 (i2n dest_ptr) (i2n dest_ptr + i2n size)) s.heap ∧ (* TODO Question: should we allow overlap? *) bytes = map snd (get_bytes s.heap src_interval) ⇒ step_inst s (Memcpy e1 e2 e3) - (s with heap := set_bytes () bytes dest_ptr s.heap)) ∧ + (s with heap := set_bytes () bytes (i2n dest_ptr) s.heap)) ∧ (* TODO memmove *) - (∀s v e1 e2 n size ptr new_h. - eval_exp s e1 (FlatV (SizeV n)) ∧ - eval_exp s e2 (FlatV (SizeV size)) ∧ - allocate s.heap (n * size) () (ptr, new_h) + (∀s v e1 e2 n size ptr new_h size_size. + eval_exp s e1 (FlatV n) ∧ + eval_exp s e2 (FlatV (IntV (&size) size_size)) ∧ + allocate s.heap (i2n n * size) () (ptr, new_h) ∧ + nfits ptr pointer_size ⇒ step_inst s (Alloc v e1 e2) - (update_results [(v, FlatV (LocV ptr))] (s with heap := new_h))) ∧ + (update_results [(v, FlatV (n2i ptr pointer_size))] (s with heap := new_h))) ∧ (∀s e ptr. - eval_exp s e (FlatV (LocV ptr)) + eval_exp s e (FlatV ptr) ⇒ step_inst s (Free e) - (s with heap := deallocate [A ptr] s.heap)) ∧ + (s with heap := deallocate [A (i2n ptr)] s.heap)) ∧ (∀s x t nondet. value_type t nondet @@ -396,22 +420,22 @@ End Inductive step_term: - (∀prog s e table default size fname bname bname'. - eval_exp s e (FlatV (SizeV size)) ∧ - Lab_name fname bname = (case alookup table size of Some lab => lab | None => default) ∧ + (∀prog s e table default idx fname bname bname' idx_size. + eval_exp s e (FlatV (IntV (&idx) idx_size)) ∧ + Lab_name fname bname = (case alookup table idx of Some lab => lab | None => default) ∧ s.bp = Lab_name fname bname' ⇒ step_term prog s (Switch e table default) (s with bp := Lab_name fname bname)) ∧ - (∀prog s e labs size. - eval_exp s e (FlatV (SizeV size)) ∧ - size < length labs + (∀prog s e labs i idx idx_size. + eval_exp s e (FlatV (IntV (&idx) idx_size)) ∧ + idx < length labs ⇒ step_term prog s (Iswitch e labs) - (s with bp := el size labs)) ∧ + (s with bp := el i labs)) ∧ (∀prog s v fname bname es t ret1 ret2 vals f. alookup prog.functions fname = Some f ∧ @@ -428,7 +452,6 @@ Inductive step_term: exn_ret := ret2; ret_var := v; saved_locals := s.locals |> :: s.stack; - pointer_size := s.pointer_size; heap := s.heap |>) ∧ (∀prog s e r top rest. @@ -441,7 +464,6 @@ Inductive step_term: globals := s.globals; locals := top.saved_locals |+ (top.ret_var, r); stack := rest; - pointer_size := s.pointer_size; heap := s.heap |>) (* TODO Throw *) @@ -452,12 +474,12 @@ End * instruction pointer and do a big-step evaluation for each block *) Inductive step_block: - (!prog s1 t s2. + (∀prog s1 t s2. step_term prog s1 t s2 ⇒ step_block prog s1 [] t s2) ∧ - (!prog s1 i is t s2 s3. + (∀prog s1 i is t s2 s3. step_inst s1 i s2 ∧ step_block prog s2 is t s3 ⇒ diff --git a/sledge/semantics/llair_propScript.sml b/sledge/semantics/llair_propScript.sml new file mode 100644 index 000000000..456d92251 --- /dev/null +++ b/sledge/semantics/llair_propScript.sml @@ -0,0 +1,141 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +(* Properties of the llair model *) + +open HolKernel boolLib bossLib Parse; +open arithmeticTheory integerTheory integer_wordTheory wordsTheory; +open settingsTheory miscTheory llairTheory; + +new_theory "llair_prop"; + +numLib.prefer_num (); + +Theorem ifits_w2i: + ∀(w : 'a word). ifits (w2i w) (dimindex (:'a)) +Proof + rw [ifits_def, GSYM INT_MIN_def] >> + metis_tac [INT_MIN, w2i_ge, integer_wordTheory.INT_MAX_def, w2i_le, + intLib.COOPER_PROVE ``!(x:int) y. x ≤ y - 1 ⇔ x < y``] +QED + +Theorem truncate_2comp_fits: + ∀i size. 0 < size ⇒ ifits (truncate_2comp i size) size +Proof + rw [truncate_2comp_def, ifits_def] >> + qmatch_goalsub_abbrev_tac `(i + s1) % s2` >> + `s2 ≠ 0 ∧ ¬(s2 < 0)` by rw [Abbr `s2`] + >- ( + `0 ≤ (i + s1) % s2` suffices_by intLib.COOPER_TAC >> + drule INT_MOD_BOUNDS >> + rw []) + >- ( + `(i + s1) % s2 < 2 * s1` suffices_by intLib.COOPER_TAC >> + `2 * s1 = s2` by rw [Abbr `s1`, Abbr `s2`, GSYM EXP] >> + drule INT_MOD_BOUNDS >> + rw [Abbr `s1`, Abbr `s2`]) +QED + +Theorem fits_ident: + ∀i size. 0 < size ⇒ (ifits i size ⇔ truncate_2comp i size = i) +Proof + rw [ifits_def, truncate_2comp_def] >> + rw [intLib.COOPER_PROVE ``!(x:int) y z. x - y = z <=> x = y + z``] >> + qmatch_goalsub_abbrev_tac `(_ + s1) % s2` >> + `s2 ≠ 0 ∧ ¬(s2 < 0)` by rw [Abbr `s2`] >> + `2 * s1 = s2` by rw [Abbr `s1`, Abbr `s2`, GSYM EXP] >> + eq_tac >> + rw [] + >- ( + simp [Once INT_ADD_COMM] >> + irule INT_LESS_MOD >> + rw [] >> + intLib.COOPER_TAC) + >- ( + `0 ≤ (i + s1) % (2 * s1)` suffices_by intLib.COOPER_TAC >> + drule INT_MOD_BOUNDS >> + simp []) + >- ( + `(i + s1) % (2 * s1) < 2 * s1` suffices_by intLib.COOPER_TAC >> + drule INT_MOD_BOUNDS >> + simp []) +QED + +Theorem i2n_n2i: + !n size. 0 < size ⇒ (nfits n size ⇔ (i2n (n2i n size) = n)) +Proof + rw [nfits_def, n2i_def, i2n_def] >> rw [] + >- intLib.COOPER_TAC + >- ( + `2 ** size ≤ n` by intLib.COOPER_TAC >> simp [INT_SUB] >> + Cases_on `n = 0` >> fs [] >> + `n - 2 ** size < n` suffices_by intLib.COOPER_TAC >> + irule SUB_LESS >> simp []) + >- ( + `2 ** (size - 1) < 2 ** size` suffices_by intLib.COOPER_TAC >> + fs []) +QED + +Theorem n2i_i2n: + !i size. 0 < size ⇒ (ifits i size ⇔ (n2i (i2n (IntV i size)) size) = IntV i size) +Proof + rw [ifits_def, n2i_def, i2n_def] >> rw [] >> fs [] + >- ( + eq_tac >> rw [] + >- ( + simp [intLib.COOPER_PROVE ``∀(x:int) y z. x - y = z ⇔ x = y + z``] >> + `2 ** (size - 1) < 2 ** size` suffices_by intLib.COOPER_TAC >> + fs [INT_OF_NUM]) + >- ( + fs [intLib.COOPER_PROVE ``∀(x:int) y z. x - y = z ⇔ x = y + z``] >> + fs [INT_OF_NUM] >> + `?j. i = -j` by intLib.COOPER_TAC >> rw [] >> fs [] >> + qpat_x_assum `_ ≤ Num _` mp_tac >> + fs [GSYM INT_OF_NUM] >> + ASM_REWRITE_TAC [GSYM INT_LE] >> rw [] >> + `2 ** size = 2 * 2 ** (size - 1)` by rw [GSYM EXP, ADD1] >> fs [] >> + intLib.COOPER_TAC) + >- intLib.COOPER_TAC) + >- ( + eq_tac >> rw [] + >- intLib.COOPER_TAC + >- intLib.COOPER_TAC >> + `0 ≤ i` by intLib.COOPER_TAC >> + fs [GSYM INT_OF_NUM] >> + `&(2 ** size) = 0` by intLib.COOPER_TAC >> + fs []) + >- ( + eq_tac >> rw [] + >- ( + `2 ** size = 2 * 2 ** (size - 1)` by rw [GSYM EXP, ADD1] >> fs [] >> + intLib.COOPER_TAC) + >- intLib.COOPER_TAC + >- intLib.COOPER_TAC) + >- intLib.COOPER_TAC +QED + +Theorem w2n_i2n: + ∀w. w2n (w : 'a word) = i2n (IntV (w2i w) (dimindex (:'a))) +Proof + rw [i2n_def] >> Cases_on `w` >> fs [] + >- ( + `INT_MIN (:α) ≤ n` + by ( + fs [w2i_def] >> rw [] >> + BasicProvers.EVERY_CASE_TAC >> fs [word_msb_n2w_numeric] >> + rfs []) >> + rw [w2i_n2w_neg, dimword_def, int_arithTheory.INT_NUM_SUB]) + >- ( + `n < INT_MIN (:'a)` + by ( + fs [w2i_def] >> rw [] >> + BasicProvers.EVERY_CASE_TAC >> fs [word_msb_n2w_numeric] >> + rfs []) >> + rw [w2i_n2w_pos]) +QED + +export_theory (); diff --git a/sledge/semantics/llvmScript.sml b/sledge/semantics/llvmScript.sml index cdd24bc8f..da4914ab5 100644 --- a/sledge/semantics/llvmScript.sml +++ b/sledge/semantics/llvmScript.sml @@ -136,12 +136,17 @@ End (* ----- Semantic states ----- *) +Definition pointer_size_def: + pointer_size = 8 +End + Datatype: flat_v = | W1V word1 | W8V word8 | W32V word32 | W64V word64 + (* LLVM guarantees that 64 bits is enough to hold a pointer *) | PtrV word64 | UndefV End @@ -172,13 +177,21 @@ End (* ----- Things about types ----- *) +(* Given a number n that fits into pointer_size number of bytes, create the + * pointer value. Since the pointer is represented as a 64-bit word, + * pointer_size must be 8 or less, which LLVM guarantees *) +Definition mk_ptr_def: + mk_ptr n = + if n < 256 ** pointer_size then Some (FlatV (PtrV (n2w n))) else None +End + (* How many bytes a value of the given type occupies *) Definition sizeof_def: (sizeof (IntT W1) = 1) ∧ (sizeof (IntT W8) = 1) ∧ (sizeof (IntT W32) = 4) ∧ (sizeof (IntT W64) = 8) ∧ - (sizeof (PtrT _) = 8) ∧ + (sizeof (PtrT _) = pointer_size) ∧ (sizeof (ArrT n t) = n * sizeof t) ∧ (sizeof (StrT ts) = sum (map sizeof ts)) Termination @@ -200,6 +213,7 @@ Termination res_tac >> decide_tac End +(* Are the indices all in bounds? *) Definition indices_ok_def: (indices_ok _ [] ⇔ T) ∧ (indices_ok (ArrT n t) (i::indices) ⇔ @@ -209,12 +223,13 @@ Definition indices_ok_def: (indices_ok _ _ ⇔ F) End +(* Which values have which types *) Inductive value_type: (value_type (IntT W1) (FlatV (W1V w1))) ∧ (value_type (IntT W8) (FlatV (W8V w8))) ∧ (value_type (IntT W32) (FlatV (W32V w32))) ∧ (value_type (IntT W64) (FlatV (W64V w64))) ∧ - (value_type (PtrT _) (FlatV (PtrV w64))) ∧ + (value_type (PtrT _) (FlatV (PtrV ptr))) ∧ (every (value_type t) vs ∧ length vs = n ∧ first_class_type t ⇒ value_type (ArrT n t) (AggV vs)) ∧ @@ -223,6 +238,7 @@ Inductive value_type: value_type (StrT ts) (AggV vs)) End +(* Get the component of a type referred by the indices *) Definition extract_type_def: (extract_type t [] = Some t) ∧ (extract_type (ArrT n t) (i::idx) = @@ -257,6 +273,7 @@ End (* ----- Semantic transitions ----- *) +(* Put a 64-bit word into a smaller value, truncating if necessary *) Definition w64_cast_def: (w64_cast w (IntT W1) = Some (FlatV (W1V (w2w w)))) ∧ (w64_cast w (IntT W8) = Some (FlatV (W8V (w2w w)))) ∧ @@ -265,22 +282,37 @@ Definition w64_cast_def: (w64_cast _ _ = None) End -Definition cast_w64_def: - (cast_w64 (FlatV (W1V w)) = Some (w2w w)) ∧ - (cast_w64 (FlatV (W8V w)) = Some (w2w w)) ∧ - (cast_w64 (FlatV (W32V w)) = Some (w2w w)) ∧ - (cast_w64 (FlatV (W64V w)) = Some w) ∧ - (cast_w64 _ = None) +Definition bool_to_v_def: + bool_to_v b = if b then FlatV (W1V 1w) else FlatV (W1V 0w) End -Definition cast_num_def: - cast_num v = option_map w2n (cast_w64 v) +(* Convert a word value into an integer, interpreting the word in 2's complement *) +Definition signed_v_to_int_def: + (signed_v_to_int (FlatV (W1V w)) = Some (w2i w)) ∧ + (signed_v_to_int (FlatV (W8V w)) = Some (w2i w)) ∧ + (signed_v_to_int (FlatV (W32V w)) = Some (w2i w)) ∧ + (signed_v_to_int (FlatV (W64V w)) = Some (w2i w)) ∧ + (signed_v_to_int _ = None) End -Definition bool_to_v_def: - bool_to_v b = if b then FlatV (W1V 1w) else FlatV (W1V 0w) +(* Convert a non-negative word value (interpreted as 2's complement) into a natural number *) +Definition signed_v_to_num_def: + signed_v_to_num v = + option_join + (option_map (\i. if i < 0 then None else Some (Num i)) (signed_v_to_int v)) +End + +(* Convert a word value into a natural number, interpreting the word as unsigned *) +Definition unsigned_v_to_num_def: + (unsigned_v_to_num (FlatV (W1V w)) = Some (w2n w)) ∧ + (unsigned_v_to_num (FlatV (W8V w)) = Some (w2n w)) ∧ + (unsigned_v_to_num (FlatV (W32V w)) = Some (w2n w)) ∧ + (unsigned_v_to_num (FlatV (W64V w)) = Some (w2n w)) ∧ + (unsigned_v_to_num _ = None) End +(* TODO: This is a bit of a mess. Consider changing to a relation to deal with + * partiality *) Definition eval_const_def: (eval_const g (IntC W1 i) = FlatV (W1V (i2w i))) ∧ (eval_const g (IntC W8 i) = FlatV (W8V (i2w i))) ∧ @@ -289,12 +321,12 @@ Definition eval_const_def: (eval_const g (StrC tconsts) = AggV (map (eval_const g) (map snd tconsts))) ∧ (eval_const g (ArrC tconsts) = AggV (map (eval_const g) (map snd tconsts))) ∧ (eval_const g (GepC ty ptr (t, idx) indices) = - case (eval_const g ptr, cast_num (eval_const g idx)) of - | (FlatV (PtrV w), Some n) => - let ns = map (λ(t,ci). case cast_num (eval_const g ci) of None => 0 | Some n => n) indices in + case (eval_const g ptr, signed_v_to_num (eval_const g idx)) of + | (FlatV (PtrV ptr), Some n) => + let ns = map (λ(t,ci). case signed_v_to_num (eval_const g ci) of None => 0 | Some n => n) indices in (case get_offset ty ns of | None => FlatV UndefV - | Some off => FlatV (PtrV (n2w (w2n w + sizeof ty * n + off)))) + | Some off => FlatV (PtrV (n2w ((w2n ptr) + (sizeof ty) * n + off)))) | _ => FlatV UndefV) ∧ (eval_const g (GlobalC var) = case flookup g var of @@ -312,21 +344,11 @@ Termination End Definition eval_def: - (eval s (Variable v) = - case flookup s.locals v of - | None => <| poison := F; value := FlatV (W1V 0w) |> - | Some v => v) ∧ - (eval s (Constant c) = <| poison := F; value := eval_const s.globals c |>) -End - -Definition v2n_def: - (v2n (FlatV (W1V b)) = Some (if T then 1 else 0)) ∧ - (v2n (FlatV (W8V w8)) = Some (w2n w8)) ∧ - (v2n (FlatV (W32V w32)) = Some (w2n w32)) ∧ - (v2n (FlatV (W64V w64)) = Some (w2n w64)) ∧ - (v2n _ = None) + (eval s (Variable v) = flookup s.locals v) ∧ + (eval s (Constant c) = Some <| poison := F; value := eval_const s.globals c |>) End +(* BEGIN Functions to interface to the generic memory model *) Definition type_to_shape_def: (type_to_shape (IntT s) = Flat (sizeof (IntT s)) (IntT s)) ∧ (type_to_shape (PtrT t) = Flat (sizeof (PtrT t)) (PtrT t)) ∧ @@ -339,11 +361,11 @@ Termination End Definition convert_value_def: - (convert_value (IntT W1) w = W1V (w2w w)) ∧ - (convert_value (IntT W8) w = W8V (w2w w)) ∧ - (convert_value (IntT W32) w = W32V (w2w w)) ∧ - (convert_value (IntT W64) w = W64V w) ∧ - (convert_value (PtrT _) w = PtrV w) + (convert_value (IntT W1) n = W1V (n2w n)) ∧ + (convert_value (IntT W8) n = W8V (n2w n)) ∧ + (convert_value (IntT W32) n = W32V (n2w n)) ∧ + (convert_value (IntT W64) n = W64V (n2w n)) ∧ + (convert_value (PtrT _) n = PtrV (n2w n)) End Definition bytes_to_llvm_value_def: @@ -352,11 +374,11 @@ Definition bytes_to_llvm_value_def: End Definition unconvert_value_def: - (unconvert_value (W1V w) = (1, w2w w)) ∧ - (unconvert_value (W8V w) = (1, w2w w)) ∧ - (unconvert_value (W32V w) = (4, w2w w)) ∧ - (unconvert_value (W64V w) = (8, w)) ∧ - (unconvert_value (PtrV w) = (8, w)) + (unconvert_value (W1V w) = (1, w2n w)) ∧ + (unconvert_value (W8V w) = (1, w2n w)) ∧ + (unconvert_value (W32V w) = (4, w2n w)) ∧ + (unconvert_value (W64V w) = (8, w2n w)) ∧ + (unconvert_value (PtrV w) = (pointer_size, w2n w)) End Definition llvm_value_to_bytes_def: @@ -364,17 +386,27 @@ Definition llvm_value_to_bytes_def: value_to_bytes unconvert_value v End +(* END Functions to interface to the generic memory model *) + Definition do_sub_def: - do_sub (nuw:bool) (nsw:bool) (v1:pv) (v2:pv) = - let (diff, u_overflow, s_overflow) = - case (v1.value, v2.value) of - | (FlatV (W1V w1), FlatV (W1V w2)) => (FlatV o W1V ## I) (add_with_carry (w1, ¬w2, T)) - | (FlatV (W8V w1), FlatV (W8V w2)) => (FlatV o W8V ## I) (add_with_carry (w1, ¬w2, T)) - | (FlatV (W32V w1), FlatV (W32V w2)) => (FlatV o W32V ## I) (add_with_carry (w1, ¬w2, T)) - | (FlatV (W64V w1), FlatV (W64V w2)) => (FlatV o W64V ## I) (add_with_carry (w1, ¬w2, T)) + do_sub (nuw:bool) (nsw:bool) (v1:pv) (v2:pv) t = + let diff = + case (v1.value, v2.value, t) of + | (FlatV (W1V w1), FlatV (W1V w2), IntT W1) => + Some ((FlatV o W1V ## I) (add_with_carry (w1, ¬w2, T))) + | (FlatV (W8V w1), FlatV (W8V w2), IntT W8) => + Some ((FlatV o W8V ## I) (add_with_carry (w1, ¬w2, T))) + | (FlatV (W32V w1), FlatV (W32V w2), IntT W32) => + Some ((FlatV o W32V ## I) (add_with_carry (w1, ¬w2, T))) + | (FlatV (W64V w1), FlatV (W64V w2), IntT W64) => + Some ((FlatV o W64V ## I) (add_with_carry (w1, ¬w2, T))) + | _ => None in - let p = ((nuw ∧ u_overflow) ∨ (nsw ∧ s_overflow) ∨ v1.poison ∨ v2.poison) in - <| poison := p; value := diff |> + option_map + (\(diff, u_overflow, s_overflow). + let p = ((nuw ∧ u_overflow) ∨ (nsw ∧ s_overflow) ∨ v1.poison ∨ v2.poison) in + <| poison := p; value := diff |>) + diff End Definition get_comp_def: @@ -385,19 +417,19 @@ End Definition do_icmp_def: do_icmp c v1 v2 = - <| poison := (v1.poison ∨ v2.poison); - value := bool_to_v ( - case (v1.value, v2.value) of - | (FlatV (W1V w1), FlatV (W1V w2)) => (get_comp c) w1 w2 - | (FlatV (W8V w1), FlatV (W8V w2)) => (get_comp c) w1 w2 - | (FlatV (W32V w1), FlatV (W32V w2)) => (get_comp c) w1 w2 - | (FlatV (W64V w1), FlatV (W64V w2)) => (get_comp c) w1 w2 - | (FlatV (PtrV w1), FlatV (PtrV w2)) => (get_comp c) w1 w2) |> + option_map (\b. <| poison := (v1.poison ∨ v2.poison); value := bool_to_v b |>) + (case (v1.value, v2.value) of + | (FlatV (W1V w1), FlatV (W1V w2)) => Some ((get_comp c) w1 w2) + | (FlatV (W8V w1), FlatV (W8V w2)) => Some ((get_comp c) w1 w2) + | (FlatV (W32V w1), FlatV (W32V w2)) => Some ((get_comp c) w1 w2) + | (FlatV (W64V w1), FlatV (W64V w2)) => Some ((get_comp c) w1 w2) + | (FlatV (PtrV w1), FlatV (PtrV w2)) => Some ((get_comp c) w1 w2) + | _ => None) End Definition do_phi_def: do_phi from_l s (Phi id _ entries) = - option_map (λarg. (id, eval s arg)) (alookup entries from_l) + option_join (option_map (λarg. option_map (\v. (id, v)) (eval s arg)) (alookup entries from_l)) End Definition extract_value_def: @@ -436,11 +468,12 @@ End Inductive step_instr: (s.stack = fr::st ∧ - deallocate fr.stack_allocs s.heap = new_h + deallocate fr.stack_allocs s.heap = new_h ∧ + eval s a = Some v ⇒ step_instr prog s (Ret (t, a)) - (update_result fr.result_var (eval s a) + (update_result fr.result_var v <| ip := fr.ret; globals := s.globals; locals := fr.saved_locals; @@ -458,7 +491,7 @@ Inductive step_instr: * %r2 = phi [%r1, %l] * %r1 = phi [0, %l] *) - (eval s a = <| poison := p; value := FlatV (W1V tf) |> ∧ + (eval s a = Some <| poison := p; value := FlatV (W1V tf) |> ∧ l = Some (if tf = 1w then l1 else l2) ∧ alookup prog s.ip.f = Some d ∧ alookup d.blocks l = Some <| h := Head phis None; body := b |> ∧ @@ -472,22 +505,29 @@ Inductive step_instr: (* TODO *) (step_instr prog s (Invoke r t a args l1 l2) s) ∧ - (step_instr prog s + (eval s a1 = Some v1 ∧ + eval s a2 = Some v2 ∧ + do_sub nuw nsw v1 v2 t = Some v3 + ⇒ + step_instr prog s (Sub r nuw nsw t a1 a2) - (inc_pc (update_result r (do_sub nuw nsw (eval s a1) (eval s a2)) s))) ∧ + (inc_pc (update_result r v3 s))) ∧ - (eval s a = v ∧ - map (λci. cast_num (eval_const s.globals ci)) const_indices = map Some ns ∧ + (eval s a = Some v ∧ + (* The manual implies (but does not explicitly state) that the indices are + * interpreted as signed numbers *) + map (λci. signed_v_to_num (eval_const s.globals ci)) const_indices = map Some ns ∧ extract_value v.value ns = Some result ⇒ step_instr prog s (Extractvalue r (t, a) const_indices) - (inc_pc (update_result r - <| poison := v.poison; value := result |> s))) ∧ + (inc_pc (update_result r <| poison := v.poison; value := result |> s))) ∧ - (eval s a1 = v1 ∧ - eval s a2 = v2 ∧ - map (λci. cast_num (eval_const s.globals ci)) const_indices = map Some ns ∧ + (eval s a1 = Some v1 ∧ + eval s a2 = Some v2 ∧ + (* The manual implies (but does not explicitly state) that the indices are + * interpreted as signed numbers *) + map (λci. signed_v_to_num (eval_const s.globals ci)) const_indices = map Some ns ∧ insert_value v1.value v2.value ns = Some result ⇒ step_instr prog s @@ -495,16 +535,19 @@ Inductive step_instr: (inc_pc (update_result r <| poison := (v1.poison ∨ v2.poison); value := result |> s))) ∧ - (eval s a1 = v ∧ - v2n v.value = Some n ∧ - allocate s.heap (n * sizeof t) v.poison (n2, new_h) + (eval s a1 = Some v ∧ + (* TODO Question is the number to allocate interpreted as a signed or + * unsigned quantity. E.g., if we allocate i8 0xFF does that do 255 or -1? *) + signed_v_to_num v.value = Some n ∧ + allocate s.heap (n * sizeof t) v.poison (n2, new_h) ∧ + mk_ptr n2 = Some ptr ⇒ step_instr prog s (Alloca r t (t1, a1)) - (inc_pc (update_result r <| poison := v.poison; value := FlatV (PtrV (n2w n2)) |> + (inc_pc (update_result r <| poison := v.poison; value := ptr |> (s with heap := new_h)))) ∧ - (eval s a1 = <| poison := p1; value := FlatV (PtrV w) |> ∧ + (eval s a1 = Some <| poison := p1; value := FlatV (PtrV w) |> ∧ interval = Interval freeable (w2n w) (w2n w + sizeof t) ∧ is_allocated interval s.heap ∧ pbytes = get_bytes s.heap interval @@ -515,30 +558,34 @@ Inductive step_instr: value := fst (bytes_to_llvm_value t (map snd pbytes)) |> s))) ∧ - (eval s a2 = <| poison := p2; value := FlatV (PtrV w) |> ∧ + (eval s a2 = Some <| poison := p2; value := FlatV (PtrV w) |> ∧ + eval s a1 = Some v1 ∧ interval = Interval freeable (w2n w) (w2n w + sizeof t) ∧ is_allocated interval s.heap ∧ - bytes = llvm_value_to_bytes (eval s a1).value ∧ + bytes = llvm_value_to_bytes v1.value ∧ length bytes = sizeof t ⇒ step_instr prog s (Store (t1, a1) (t2, a2)) (inc_pc (s with heap := set_bytes p2 bytes (w2n w) s.heap))) ∧ - (map (eval s o snd) tindices = i1::indices ∧ - (eval s a1).value = FlatV (PtrV w1) ∧ - cast_num i1.value = Some n ∧ - map (λx. cast_num x.value) indices = map Some ns ∧ - get_offset t1 ns = Some off + (map (eval s o snd) tindices = map Some (i1::indices) ∧ + eval s a1 = Some v ∧ + v.value = FlatV (PtrV w1) ∧ + (* The manual states that the indices are interpreted as signed numbers *) + signed_v_to_num i1.value = Some i ∧ + map (λx. signed_v_to_num x.value) indices = map Some is ∧ + get_offset t1 is = Some off ∧ + mk_ptr (w2n w1 + sizeof t1 * i + off) = Some ptr ⇒ step_instr prog s (Gep r t ((PtrT t1), a1) tindices) (inc_pc (update_result r <| poison := (v1.poison ∨ i1.poison ∨ exists (λv. v.poison) indices); - value := FlatV (PtrV (n2w (w2n w1 + sizeof t1 * n + off))) |> + value := ptr |> s))) ∧ - (eval s a1 = v1 ∧ + (eval s a1 = Some v1 ∧ v1.value = FlatV (PtrV w) ∧ w64_cast w t = Some int_v ⇒ @@ -546,23 +593,29 @@ Inductive step_instr: (Ptrtoint r (t1, a1) t) (inc_pc (update_result r <| poison := v1.poison; value := int_v |> s))) ∧ - (eval s a1 = v1 ∧ - cast_w64 v1.value = Some w + (eval s a1 = Some v1 ∧ + unsigned_v_to_num v1.value = Some n ∧ + mk_ptr n = Some ptr ⇒ step_instr prog s (Inttoptr r (t1, a1) t) - (inc_pc (update_result r <| poison := v1.poison; value := FlatV (PtrV w) |> s))) ∧ + (inc_pc (update_result r <| poison := v1.poison; value := ptr |> s))) ∧ - (step_instr prog s + (eval s a1 = Some v1 ∧ + eval s a2 = Some v2 ∧ + do_icmp c v1 v2 = Some v3 + ⇒ + step_instr prog s (Icmp r c t a1 a2) - (inc_pc (update_result r (do_icmp c (eval s a1) (eval s a2)) s))) ∧ + (inc_pc (update_result r v3 s))) ∧ - (alookup prog fname = Some d + (alookup prog fname = Some d ∧ + map (eval s o snd) targs = map Some vs ⇒ step_instr prog s (Call r t fname targs) <| ip := <| f := fname; b := None; i := 0 |>; - locals := alist_to_fmap (zip (map snd d.params, map (eval s o snd) targs)); + locals := alist_to_fmap (zip (map snd d.params, vs)); globals := s.globals; stack := <| ret := s.ip with i := s.ip.i + 1; @@ -663,7 +716,7 @@ Definition is_init_state_def: globals_ok s ∧ heap_ok s.heap ∧ fdom s.globals = fdom global_init ∧ - s.heap.valid_addresses = { A n | n < dimword (:64) } ∧ + s.heap.valid_addresses = { A n | n < 256 ** pointer_size } ∧ (* The initial allocations for globals are not freeable *) s.heap.allocations ⊆ { Interval F start stop | T } ∧ (* The heap starts with the initial values of the globals written to their diff --git a/sledge/semantics/llvm_propScript.sml b/sledge/semantics/llvm_propScript.sml index 8553be87f..6142b75fb 100644 --- a/sledge/semantics/llvm_propScript.sml +++ b/sledge/semantics/llvm_propScript.sml @@ -98,7 +98,7 @@ Proof irule b2v_v2b >> qexists_tac `f` >> rw [] >> unabbrev_all_tac >> fs [] >> - fs [unconvert_value_def, convert_value_def, value_type_cases] >> + fs [unconvert_value_def, convert_value_def, value_type_cases, pointer_size_def] >> wordsLib.WORD_DECIDE_TAC QED diff --git a/sledge/semantics/llvm_to_llairScript.sml b/sledge/semantics/llvm_to_llairScript.sml index 7ec5151d1..2d573bc55 100644 --- a/sledge/semantics/llvm_to_llairScript.sml +++ b/sledge/semantics/llvm_to_llairScript.sml @@ -61,9 +61,16 @@ Definition translate_size_def: (translate_size W64 = 64) End -(* TODO *) Definition translate_ty_def: - translate_ty = ARB : ty -> typ + (translate_ty (FunT t ts) = FunctionT (translate_ty t) (map translate_ty ts)) ∧ + (translate_ty (IntT s) = IntegerT (translate_size s)) ∧ + (translate_ty (PtrT t) = PointerT (translate_ty t)) ∧ + (translate_ty (ArrT n t) = ArrayT (translate_ty t) n) ∧ + (translate_ty (StrT ts) = TupleT (map translate_ty ts)) +Termination + WF_REL_TAC `measure ty_size` >> rw [] >> + Induct_on `ts` >> rw [ty_size_def] >> + res_tac >> decide_tac End Definition translate_glob_var_def: @@ -96,10 +103,13 @@ Termination End Definition translate_arg_def: - (translate_arg emap (Constant c) t = translate_const c) ∧ - (translate_arg emap (Variable r) t = + (translate_arg emap (Constant c) = translate_const c) ∧ + (translate_arg emap (Variable r) = case flookup emap r of - | None => Var (translate_reg r t) + (* With the current strategy of threading the emap through the whole + * function, we should never get a None here. + *) + | None => Var (translate_reg r (IntT W64)) | Some e => e) End @@ -113,11 +123,11 @@ End (* TODO *) Definition translate_instr_to_exp_def: (translate_instr_to_exp emap (llvm$Sub _ _ _ ty a1 a2) = - llair$Sub (translate_ty ty) (translate_arg emap a1 ty) (translate_arg emap a2 ty)) ∧ + llair$Sub (translate_ty ty) (translate_arg emap a1) (translate_arg emap a2)) ∧ (translate_instr_to_exp emap (Extractvalue _ (t, a) cs) = - foldl (λe c. Select e (translate_const c)) (translate_arg emap a t) cs) ∧ + foldl (λe c. Select e (translate_const c)) (translate_arg emap a) cs) ∧ (translate_instr_to_exp emap (Insertvalue _ (t1, a1) (t2, a2) cs) = - translate_updatevalue (translate_arg emap a1 t1) (translate_arg emap a2 t2) cs) + translate_updatevalue (translate_arg emap a1) (translate_arg emap a2) cs) End (* This translation of insertvalue to update and select is quadratic in the @@ -170,15 +180,15 @@ End (* TODO *) Definition translate_instr_to_inst_def: (translate_instr_to_inst emap (llvm$Store (t1, a1) (t2, a2)) = - llair$Store (translate_arg emap a1 t1) (translate_arg emap a2 t2) (sizeof t2)) ∧ + llair$Store (translate_arg emap a1) (translate_arg emap a2) (sizeof t2)) ∧ (translate_instr_to_inst emap (Load r t (t1, a1)) = - Load (translate_reg r t1) (translate_arg emap a1 t1) (sizeof t)) + Load (translate_reg r t) (translate_arg emap a1) (sizeof t)) End (* TODO *) Definition translate_instr_to_term_def: translate_instr_to_term f emap (Br a l1 l2) = - Iswitch (translate_arg emap a (IntT W1)) [translate_label f l2; translate_label f l1] + Iswitch (translate_arg emap a) [translate_label f l2; translate_label f l1] End Datatype: @@ -232,7 +242,9 @@ End (* Translate a list of instructions into an block. f is the name of the function * that the instructions are in, reg_to_keep is the set of variables that we * want to keep assignments to (e.g., because of sharing in the expression - * structure. * emap is a mapping of registers to expressions that compute the + * structure. + * + * emap is a mapping of registers to expressions that compute the * value that should have been in the expression. * * This tries to build large expressions, and omits intermediate assignments @@ -288,7 +300,8 @@ Definition translate_header_def: map (λ(r, t, largs). (translate_reg r t, - map (λ(l, arg). (translate_label_opt f entry l, translate_arg fempty arg t)) largs)) + (* TODO: shouldn't use fempty here *) + map (λ(l, arg). (translate_label_opt f entry l, translate_arg fempty arg)) largs)) (map dest_phi phis)) End @@ -357,7 +370,7 @@ Definition translate_param_def: End Definition translate_def_def: - translate_def (Lab f) d = + translate_def f d = let used_names = ARB in let entry_name = gen_name used_names "entry" in (* TODO *) @@ -383,4 +396,14 @@ Definition translate_def_def: fthrow := ARB |> End +Definition dest_fn_def: + dest_fn (Fn f) = f +End + +Definition translate_prog_def: + translate_prog p = + <| globals := ARB; + functions := map (\(fname, d). (dest_fn fname, translate_def (dest_fn fname) d)) p |> +End + export_theory (); diff --git a/sledge/semantics/llvm_to_llair_propScript.sml b/sledge/semantics/llvm_to_llair_propScript.sml new file mode 100644 index 000000000..9efdfb781 --- /dev/null +++ b/sledge/semantics/llvm_to_llair_propScript.sml @@ -0,0 +1,278 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +(* Proofs about llvm to llair translation *) + +open HolKernel boolLib bossLib Parse; +open listTheory arithmeticTheory pred_setTheory finite_mapTheory wordsTheory integer_wordTheory; +open settingsTheory miscTheory llvmTheory llairTheory llair_propTheory llvm_to_llairTheory; + +new_theory "llvm_to_llair_prop"; + +numLib.prefer_num (); + +Inductive v_rel: + (∀w. v_rel (FlatV (PtrV w)) (FlatV (IntV (w2i w) pointer_size))) ∧ + (∀w. v_rel (FlatV (W1V w)) (FlatV (IntV (w2i w) 1))) ∧ + (∀w. v_rel (FlatV (W8V w)) (FlatV (IntV (w2i w) 8))) ∧ + (∀w. v_rel (FlatV (W32V w)) (FlatV (IntV (w2i w) 32))) ∧ + (∀w. v_rel (FlatV (W64V w)) (FlatV (IntV (w2i w) 64))) ∧ + (∀vs1 vs2. + list_rel v_rel vs1 vs2 + ⇒ + v_rel (AggV vs1) (AggV vs2)) +End + +(* Define when an LLVM state is related to a llair one. Parameterised over a + * relation on program counters, which chould be generated by the + * transformation. It is not trivial because the translation cuts up blocks at + * function calls and for remiving phi nodes. + * + * Also parameterised on a map for locals relating LLVM registers to llair + * expressions that compute the value in that register. This corresponds to part + * of the translation's state. + *) +Definition state_rel_def: + state_rel pc_rel emap (s:llvm$state) (s':llair$state) ⇔ + pc_rel s.ip s'.bp ∧ + (* Unmapped registers in LLVM are unmapped in llair too *) + (∀r. flookup s.locals r = None ⇒ flookup emap r = None) ∧ + (* Mapped LLVM registers have a related value in the emap (after + * evaluating) *) + (∀r v. flookup s.locals r = Some v ⇒ + ∃v' e. + v_rel v.value v' ∧ + flookup emap r = Some e ∧ eval_exp s' e v') ∧ + erase_tags s.heap = s'.heap +End + +Theorem translate_arg_correct: + ∀s a v pc_rel emap s'. + state_rel pc_rel emap s s' ∧ + eval s a = Some v + ⇒ + ∃v'. eval_exp s' (translate_arg emap a) v' ∧ v_rel v.value v' +Proof + Cases_on `a` >> rw [eval_def, translate_arg_def] + >- cheat >> + CASE_TAC >> fs [PULL_EXISTS, state_rel_def] >> + res_tac >> rfs [] >> metis_tac [] +QED + +Theorem translate_constant_correct_lem: + (∀c s pc_rel emap s' (g : glob_var |-> β # word64). + state_rel pc_rel emap s s' + ⇒ + ∃v'. eval_exp s' (translate_const c) v' ∧ v_rel (eval_const g c) v') ∧ + (∀(cs : (ty # const) list) s pc_rel emap s' (g : glob_var |-> β # word64). + state_rel pc_rel emap s s' + ⇒ + ∃v'. list_rel (eval_exp s') (map (translate_const o snd) cs) v' ∧ list_rel v_rel (map (eval_const g o snd) cs) v') ∧ + (∀(tc : ty # const) s pc_rel emap s' (g : glob_var |-> β # word64). + state_rel pc_rel emap s s' + ⇒ + ∃v'. eval_exp s' (translate_const (snd tc)) v' ∧ v_rel (eval_const g (snd tc)) v') +Proof + ho_match_mp_tac const_induction >> rw [translate_const_def] >> + simp [Once eval_exp_cases, eval_const_def] + >- ( + Cases_on `s` >> simp [eval_const_def, translate_size_def, v_rel_cases] >> + metis_tac [truncate_2comp_i2w_w2i, dimindex_1, dimindex_8, dimindex_32, dimindex_64]) + >- ( + simp [v_rel_cases, PULL_EXISTS, MAP_MAP_o] >> + fs [combinTheory.o_DEF, pairTheory.LAMBDA_PROD] >> + metis_tac []) + >- ( + simp [v_rel_cases, PULL_EXISTS, MAP_MAP_o] >> + fs [combinTheory.o_DEF, pairTheory.LAMBDA_PROD] >> + metis_tac []) + >- cheat + >- cheat + >- cheat + >- cheat +QED + +Theorem translate_constant_correct: + ∀c s pc_rel emap s' g. + state_rel pc_rel emap s s' + ⇒ + ∃v'. eval_exp s' (translate_const c) v' ∧ v_rel (eval_const g c) v' +Proof + metis_tac [translate_constant_correct_lem] +QED + +Theorem restricted_i2w_11: + ∀i (w:'a word). INT_MIN (:'a) ≤ i ∧ i ≤ INT_MAX (:'a) ⇒ (i2w i : 'a word) = i2w (w2i w) ⇒ i = w2i w +Proof + rw [i2w_def] + >- ( + Cases_on `n2w (Num (-i)) = INT_MINw` >> + rw [w2i_neg, w2i_INT_MINw] >> + fs [word_L_def] >> + `?j. 0 ≤ j ∧ i = -j` by intLib.COOPER_TAC >> + rw [] >> + fs [] >> + `INT_MIN (:'a) < dimword (:'a)` by metis_tac [INT_MIN_LT_DIMWORD] >> + `Num j MOD dimword (:'a) = Num j` + by (irule LESS_MOD >> intLib.COOPER_TAC) >> + fs [] + >- intLib.COOPER_TAC + >- ( + `Num j < INT_MIN (:'a)` by intLib.COOPER_TAC >> + fs [w2i_n2w_pos, integerTheory.INT_OF_NUM])) + >- ( + fs [GSYM INT_MAX, INT_MAX_def] >> + `Num i < INT_MIN (:'a)` by intLib.COOPER_TAC >> + rw [w2i_n2w_pos, integerTheory.INT_OF_NUM] >> + intLib.COOPER_TAC) +QED + +Theorem translate_extract_correct: + ∀pc_rel emap s1 s1' a v v1' e1' cs ns result. + state_rel pc_rel emap s1 s1' ∧ + map (λci. signed_v_to_num (eval_const s1.globals ci)) cs = map Some ns ∧ + extract_value v ns = Some result ∧ + eval_exp s1' e1' v1' ∧ + v_rel v v1' + ⇒ + ∃v2'. + eval_exp s1' (foldl (λe c. Select e (translate_const c)) e1' cs) v2' ∧ + v_rel result v2' +Proof + Induct_on `cs` >> rw [] >> fs [extract_value_def] + >- metis_tac [] >> + first_x_assum irule >> + Cases_on `ns` >> fs [] >> + qmatch_goalsub_rename_tac `translate_const c` >> + `?v2'. eval_exp s1' (translate_const c) v2' ∧ v_rel (eval_const s1.globals c) v2'` + by metis_tac [translate_constant_correct] >> + Cases_on `v` >> fs [extract_value_def] >> + qpat_x_assum `v_rel (AggV _) _` mp_tac >> + simp [Once v_rel_cases] >> rw [] >> + simp [Once eval_exp_cases, PULL_EXISTS] >> + fs [LIST_REL_EL_EQN] >> + qmatch_assum_rename_tac `_ = map Some is` >> + Cases_on `eval_const s1.globals c` >> fs [signed_v_to_num_def, signed_v_to_int_def] >> rw [] >> + `?i. v2' = FlatV i` by fs [v_rel_cases] >> fs [] >> + qmatch_assum_rename_tac `option_join _ = Some x` >> + `?size. i = IntV (&x) size` suffices_by metis_tac [] >> rw [] >> + qpat_x_assum `v_rel _ _` mp_tac >> + simp [v_rel_cases] >> rw [] >> fs [signed_v_to_int_def] >> rw [] >> + intLib.COOPER_TAC +QED + +Theorem translate_update_correct: + ∀pc_rel emap s1 s1' a v1 v1' v2 v2' e2 e2' e1' cs ns result. + state_rel pc_rel emap s1 s1' ∧ + map (λci. signed_v_to_num (eval_const s1.globals ci)) cs = map Some ns ∧ + insert_value v1 v2 ns = Some result ∧ + eval_exp s1' e1' v1' ∧ + v_rel v1 v1' ∧ + eval_exp s1' e2' v2' ∧ + v_rel v2 v2' + ⇒ + ∃v3'. + eval_exp s1' (translate_updatevalue e1' e2' cs) v3' ∧ + v_rel result v3' +Proof + Induct_on `cs` >> rw [] >> fs [insert_value_def, translate_updatevalue_def] + >- metis_tac [] >> + simp [Once eval_exp_cases, PULL_EXISTS] >> + Cases_on `ns` >> fs [] >> + Cases_on `v1` >> fs [insert_value_def] >> + rename [`insert_value (el x _) _ ns`] >> + Cases_on `insert_value (el x l) v2 ns` >> fs [] >> rw [] >> + qpat_x_assum `v_rel (AggV _) _` mp_tac >> simp [Once v_rel_cases] >> rw [] >> + simp [v_rel_cases] >> + qmatch_goalsub_rename_tac `translate_const c` >> + qexists_tac `vs2` >> simp [] >> + `?v4'. eval_exp s1' (translate_const c) v4' ∧ v_rel (eval_const s1.globals c) v4'` + by metis_tac [translate_constant_correct] >> + `?idx_size. v4' = FlatV (IntV (&x) idx_size)` + by ( + pop_assum mp_tac >> simp [Once v_rel_cases] >> + rw [] >> fs [signed_v_to_num_def, signed_v_to_int_def] >> + intLib.COOPER_TAC) >> + first_x_assum drule >> + disch_then drule >> + disch_then drule >> + disch_then (qspecl_then [`el x vs2`, `v2'`, `e2'`, `Select e1' (translate_const c)`] mp_tac) >> + simp [Once eval_exp_cases] >> + metis_tac [EVERY2_LUPDATE_same, LIST_REL_LENGTH, LIST_REL_EL_EQN] +QED + +Theorem translate_instr_to_exp_correct: + ∀emap instr r t s1 s1'. + classify_instr instr = Exp r t ∧ + state_rel pc_rel emap s1 s1' + ⇒ + (∀s2. step_instr prog s1 instr s2 ⇒ + (∃v pv. eval_exp s1' (translate_instr_to_exp emap instr) v ∧ + flookup s2.locals r = Some pv ∧ v_rel pv.value v)) +Proof + recInduct translate_instr_to_exp_ind >> + simp [translate_instr_to_exp_def, classify_instr_def] >> + conj_tac + >- ( (* Sub *) + rw [step_instr_cases, Once eval_exp_cases, do_sub_def, PULL_EXISTS] >> + simp [inc_pc_def, update_result_def, FLOOKUP_UPDATE] >> + simp [v_rel_cases, PULL_EXISTS] >> + first_x_assum (mp_then.mp_then mp_then.Any mp_tac translate_arg_correct) >> + disch_then drule >> + first_x_assum (mp_then.mp_then mp_then.Any mp_tac translate_arg_correct) >> + disch_then drule >> + BasicProvers.EVERY_CASE_TAC >> fs [translate_ty_def, translate_size_def] >> + rfs [v_rel_cases] >> + pairarg_tac >> fs [] >> + fs [pairTheory.PAIR_MAP, wordsTheory.FST_ADD_WITH_CARRY] >> + qmatch_goalsub_abbrev_tac `eval_exp _ _ (FlatV (IntV i1 _))` >> strip_tac >> + qmatch_goalsub_abbrev_tac `eval_exp _ _ (FlatV (IntV i2 _))` >> strip_tac >> + qexists_tac `i1` >> qexists_tac `i2` >> simp [] >> + unabbrev_all_tac >> + rw [] + >- ( + irule restricted_i2w_11 >> simp [word_sub_i2w] >> + `dimindex (:1) = 1` by rw [] >> + drule truncate_2comp_i2w_w2i >> + rw [word_sub_i2w] >> + metis_tac [w2i_ge, w2i_le, SIMP_CONV (srw_ss()) [] ``INT_MIN (:1)``, + SIMP_CONV (srw_ss()) [] ``INT_MAX (:1)``]) + >- ( + irule restricted_i2w_11 >> simp [word_sub_i2w] >> + `dimindex (:8) = 8` by rw [] >> + drule truncate_2comp_i2w_w2i >> + rw [word_sub_i2w] >> + metis_tac [w2i_ge, w2i_le, SIMP_CONV (srw_ss()) [] ``INT_MIN (:8)``, + SIMP_CONV (srw_ss()) [] ``INT_MAX (:8)``]) + >- ( + irule restricted_i2w_11 >> simp [word_sub_i2w] >> + `dimindex (:32) = 32` by rw [] >> + drule truncate_2comp_i2w_w2i >> + rw [word_sub_i2w] >> + metis_tac [w2i_ge, w2i_le, SIMP_CONV (srw_ss()) [] ``INT_MIN (:32)``, + SIMP_CONV (srw_ss()) [] ``INT_MAX (:32)``]) + >- ( + irule restricted_i2w_11 >> simp [word_sub_i2w] >> + `dimindex (:64) = 64` by rw [] >> + drule truncate_2comp_i2w_w2i >> + rw [word_sub_i2w] >> + metis_tac [w2i_ge, w2i_le, SIMP_CONV (srw_ss()) [] ``INT_MIN (:64)``, + SIMP_CONV (srw_ss()) [] ``INT_MAX (:64)``])) >> + conj_tac + >- ( + rw [step_instr_cases] >> + simp [inc_pc_def, update_result_def, FLOOKUP_UPDATE] >> + metis_tac [translate_arg_correct, translate_extract_correct]) >> + conj_tac + >- ( + rw [step_instr_cases] >> + simp [inc_pc_def, update_result_def, FLOOKUP_UPDATE] >> + metis_tac [translate_arg_correct, translate_update_correct]) >> + cheat +QED + +export_theory (); diff --git a/sledge/semantics/memory_modelScript.sml b/sledge/semantics/memory_modelScript.sml index 549be0955..2712063c4 100644 --- a/sledge/semantics/memory_modelScript.sml +++ b/sledge/semantics/memory_modelScript.sml @@ -47,6 +47,13 @@ Datatype: valid_addresses : addr set|> End +Definition erase_tags_def: + erase_tags h = + <| memory := (λ(t,b). ((), b)) o_f h.memory; + allocations := h.allocations; + valid_addresses := h.valid_addresses |> +End + (* shapes statically describe the shape of a value in a register. Flat shapes * take a given number of bytes, Array shapes are replicated a certain number of * times @@ -184,20 +191,21 @@ Definition deallocate_def: End -(* Read len bytes from the list of bytes, and convert it into a word value, +(* Read len bytes from the list of bytes, and convert it into a number, * little-endian encoding *) -Definition le_read_w_def: - le_read_w len (bs : word8 list) = +Definition le_read_num_def: + le_read_num len (bs : word8 list) = if length bs < len then - (l2w 256 (map w2n bs), []) + (l2n 256 (map w2n bs), []) else - (l2w 256 (map w2n (take len bs)), drop len bs) + (l2n 256 (map w2n (take len bs)), drop len bs) End -(* Return len bytes that are the little-endian encoding of the argument word *) -Definition le_write_w_def: - le_write_w len w = - let (l : word8 list) = map n2w (w2l 256 w) in +(* Return len bytes that are the little-endian encoding of the argument number, + * assuming that it fits*) +Definition le_write_num_def: + le_write_num len n = + let (l : word8 list) = map n2w (n2l 256 n) in take len (l ++ replicate (len - length l) 0w) End @@ -205,7 +213,7 @@ End * shape. The function f is applied to the word read from a flat list of bytes * in little-endian order. *) Definition bytes_to_value_def: - (bytes_to_value f (Flat n t) bs = (FlatV o f n t ## I) (le_read_w n bs)) ∧ + (bytes_to_value f (Flat n t) bs = (FlatV o f n t ## I) (le_read_num n bs)) ∧ (bytes_to_value f (Array s n) bs = (AggV ## I) (read_array f n s bs)) ∧ (bytes_to_value f (Tuple ts) bs = (AggV ## I) (read_str f ts bs)) ∧ (read_array f 0 s bs = ([], bs)) ∧ @@ -230,7 +238,7 @@ End Definition value_to_bytes_def: (value_to_bytes f (FlatV x) = let (size, word) = f x in - le_write_w size word) ∧ + le_write_num size word) ∧ (value_to_bytes f (AggV vs) = flat (map (value_to_bytes f) vs)) Termination WF_REL_TAC `measure (reg_v_size (\x. 0) o snd)` >> @@ -241,10 +249,10 @@ End (* ----- Theorems about converting between values and byte lists ----- *) -Theorem le_write_w_length: - ∀l x. length (le_write_w l w) = l +Theorem le_write_num_length: + ∀l x. length (le_write_num l w) = l Proof - rw [le_write_w_def] + rw [le_write_num_def] QED Theorem v2b_size_lem: @@ -255,7 +263,7 @@ Proof rw [value_to_bytes_def, sizeof_def, value_shape_def] >- ( pairarg_tac >> simp [] >> - metis_tac [FST, le_write_w_length]) + metis_tac [FST, le_write_num_length]) >- ( simp [LENGTH_FLAT, MAP_MAP_o, combinTheory.o_DEF] >> Induct_on `vs` >> rw [ADD1] >> fs [LEFT_ADD_DISTRIB] >> @@ -271,15 +279,15 @@ Proof QED Theorem b2v_size_lem: - (∀(f:num->'c ->'a word -> 'b) s bs. sizeof s ≤ length bs ⇒ + (∀(f:num->'c ->num -> 'b) s bs. sizeof s ≤ length bs ⇒ ∃v. bytes_to_value f s bs = (v, drop (sizeof s) bs)) ∧ - (∀(f:num->'c ->'a word -> 'b) n s bs. n * sizeof s ≤ length bs ⇒ + (∀(f:num->'c ->num -> 'b) n s bs. n * sizeof s ≤ length bs ⇒ ∃vs. read_array f n s bs = (vs, drop (n * sizeof s) bs)) ∧ - (∀(f:num->'c ->'a word -> 'b) ss bs. sum (map sizeof ss) ≤ length bs ⇒ + (∀(f:num->'c ->num -> 'b) ss bs. sum (map sizeof ss) ≤ length bs ⇒ ∃vs. read_str f ss bs = (vs, drop (sum (map sizeof ss)) bs)) Proof ho_match_mp_tac bytes_to_value_ind >> - rw [sizeof_def, bytes_to_value_def, le_read_w_def] >> + rw [sizeof_def, bytes_to_value_def, le_read_num_def] >> fs [] >- (simp [PAIR_MAP] >> metis_tac [SND]) >- ( @@ -305,15 +313,15 @@ Proof QED Theorem b2v_append_lem: - (∀(f:num->'c->'a word -> 'b) s bs. sizeof s ≤ length bs ⇒ + (∀(f:num->'c->num -> 'b) s bs. sizeof s ≤ length bs ⇒ bytes_to_value f s (bs ++ bs') = (I ## (λx. x ++ bs')) (bytes_to_value f s bs)) ∧ - (∀(f:num->'c->'a word -> 'b) n s bs. n * sizeof s ≤ length bs ⇒ + (∀(f:num->'c->num -> 'b) n s bs. n * sizeof s ≤ length bs ⇒ ∃vs. read_array f n s (bs ++ bs') = (I ## (λx. x ++ bs')) (read_array f n s bs)) ∧ - (∀(f:num->'c->'a word -> 'b) ss bs. sum (map sizeof ss) ≤ length bs ⇒ + (∀(f:num->'c-> num -> 'b) ss bs. sum (map sizeof ss) ≤ length bs ⇒ ∃vs. read_str f ss (bs ++ bs') = (I ## (λx. x ++ bs')) (read_str f ss bs)) Proof ho_match_mp_tac bytes_to_value_ind >> - rw [sizeof_def, bytes_to_value_def, le_read_w_def] >> + rw [sizeof_def, bytes_to_value_def, le_read_num_def] >> fs [TAKE_APPEND, DROP_APPEND, DECIDE ``!x y. x ≤ y ⇒ x - y = 0n``, ETA_THM] >- (simp [PAIR_MAP] >> metis_tac [SND]) @@ -344,31 +352,30 @@ Proof QED Theorem le_read_write: - ∀n w bs. - w2n w < 256 ** n ⇒ le_read_w n (le_write_w n w ⧺ bs) = (w, bs) + ∀size n bs. + n < 256 ** size ⇒ le_read_num size (le_write_num size n ⧺ bs) = (n, bs) Proof - rw [le_read_w_def, le_write_w_length] + rw [le_read_num_def, le_write_num_length] >- ( - `take n (le_write_w n w ⧺ bs) = le_write_w n w` - by metis_tac [le_write_w_length, TAKE_LENGTH_APPEND] >> - simp [le_write_w_def, w2l_def, l2w_def] >> - Cases_on `w` >> simp [] >> fs [l2n_padding, TAKE_APPEND, take_replicate] >> + `take size (le_write_num size n ⧺ bs) = le_write_num size n` + by metis_tac [le_write_num_length, TAKE_LENGTH_APPEND] >> + simp [le_write_num_def, w2l_def, l2w_def] >> + fs [l2n_padding, TAKE_APPEND, take_replicate] >> simp [MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF, mod_n2l] >> rename1 `n2l 256 m` >> - Cases_on `n = 0` >> fs [] >> - `length (n2l 256 m) ≤ n` + Cases_on `size = 0` >> fs [] >> + `length (n2l 256 m) ≤ size` by ( rw [LENGTH_n2l] >> `256 = 2 ** 8` by EVAL_TAC >> ASM_REWRITE_TAC [] >> simp [log_change_base_power, GSYM LESS_EQ] >> - `n2w m ≠ 0w` by simp [] >> rw [] >> fs [bitTheory.LOG2_def, dimword_def] >> `8 * (log 2 m DIV 8) ≤ log 2 m` by metis_tac [mul_div_bound, EVAL ``8 ≠ 0n``] >> - `LOG 2 m ≤ LOG 2 (256 ** n)` by simp [LOG_LE_MONO] >> + `LOG 2 m ≤ LOG 2 (256 ** size)` by simp [LOG_LE_MONO, LESS_IMP_LESS_OR_EQ] >> pop_assum mp_tac >> `256 = 2 ** 8` by EVAL_TAC >> ASM_REWRITE_TAC [EXP_MUL] >> simp [log_base_power] >> - Cases_on `log 2 m DIV 8 = n` >> rw [] >> + Cases_on `log 2 m DIV 8 = size` >> rw [] >> CCONTR_TAC >> fs [] >> `log 2 m = (8 * (log 2 m DIV 8))` by intLib.COOPER_TAC >> fs [] >> `2 ** log 2 m = 2 ** (8 * (log 2 m DIV 8))` by rw [] >> @@ -376,26 +383,24 @@ Proof `2 ** log 2 m ≤ m` by rw [exp_log_bound] >> decide_tac) >> simp [mod_n2l, l2n_n2l, TAKE_LENGTH_TOO_LONG]) - >- metis_tac [le_write_w_length, DROP_LENGTH_APPEND] + >- metis_tac [le_write_num_length, DROP_LENGTH_APPEND] QED Theorem le_write_read: - ∀n w bs bs'. - 256 ** n ≤ dimword (:'a) ∧ - n ≤ length bs ∧ - le_read_w n bs = (w:'a word, bs') + ∀size n bs bs'. + size ≤ length bs ∧ + le_read_num size bs = (n, bs') ⇒ - le_write_w n w ++ bs' = bs + le_write_num size n ++ bs' = bs Proof - rw [le_read_w_def] >> - qmatch_goalsub_abbrev_tac `l2w _ l` >> - `le_write_w n (l2w 256 l) = take n bs` suffices_by metis_tac [TAKE_DROP] >> - simp [le_write_w_def, w2l_l2w] >> - `l2n 256 l < 256 ** n` + rw [le_read_num_def] >> + qmatch_goalsub_abbrev_tac `l2n _ l` >> + `le_write_num size (l2n 256 l) = take size bs` suffices_by metis_tac [TAKE_DROP] >> + simp [le_write_num_def, w2l_l2w] >> + `l2n 256 l < 256 ** size` by ( - `n ≤ length bs` by decide_tac >> + `size ≤ length bs` by decide_tac >> metis_tac [l2n_lt, LENGTH_TAKE, LENGTH_MAP, EVAL ``0n < 256``]) >> - fs [] >> `every ($> 256) l` by ( simp [EVERY_MAP, Abbr `l`] >> irule EVERY_TAKE >> simp [] >> @@ -405,15 +410,15 @@ Proof rw [n2l_l2n] >- ( rw [TAKE_def, take_replicate] >> - Cases_on `n` >> fs [] >> + Cases_on `size` >> fs [] >> rfs [l2n_0] >> unabbrev_all_tac >> fs [EVERY_MAP] >> ONCE_REWRITE_TAC [GSYM REPLICATE] >> - qmatch_goalsub_abbrev_tac `take n _` >> + qmatch_goalsub_abbrev_tac `take size _` >> qpat_assum `¬(_ < _)` mp_tac >> qpat_assum `every (\x. 0 = w2n x) _` mp_tac >> rpt (pop_assum kall_tac) >> qid_spec_tac `bs` >> - Induct_on `n` >> rw [] >> + Induct_on `size` >> rw [] >> Cases_on `bs` >> rw [] >> fs [] >> Cases_on `h` >> fs [] >> first_x_assum irule >> rw [] >> @@ -430,11 +435,11 @@ Proof Cases_on `l` >> fs [dropWhile_eq_nil, combinTheory.o_DEF, EXISTS_REVERSE]) >> `0 < length (dropWhile ($= 0) (reverse l))` by decide_tac >> fs [SUC_PRE] >> - `map n2w l = take n bs` + `map n2w l = take size bs` by (simp [Abbr `l`, MAP_TAKE, MAP_MAP_o, combinTheory.o_DEF, n2w_w2n]) >> simp [TAKE_TAKE_MIN] >> - `length l = n` by simp [Abbr `l`] >> - `length (dropWhile ($= 0) (reverse l)) ≤ n` + `length l = size` by simp [Abbr `l`] >> + `length (dropWhile ($= 0) (reverse l)) ≤ size` by metis_tac [LESS_EQ_TRANS, LENGTH_dropWhile_LESS_EQ, LENGTH_REVERSE] >> rw [MIN_DEF] >> fs [] >- ( @@ -462,14 +467,14 @@ QED Theorem b2v_v2b_lem: (∀f s v. value_shape f s v ⇒ - (∀x t n. f n t x ⇒ fst (h x) = n ∧ g n t (snd (h x)) = x ∧ w2n (snd (h x)) < 256 ** n) ⇒ + (∀x t n. f n t x ⇒ fst (h x) = n ∧ g n t (snd (h x)) = x ∧ snd (h x) < 256 ** n) ⇒ ∀bs. - bytes_to_value (g:num -> 'c -> 'a word -> 'b) s (value_to_bytes h v ++ bs) = (v, bs)) ∧ + bytes_to_value g s (value_to_bytes h v ++ bs) = (v, bs)) ∧ (∀f ss vs. value_shapes f ss vs ⇒ - (∀x t n. f n t x ⇒ fst (h x) = n ∧ g n t (snd (h x)) = x ∧ w2n (snd (h x)) < 256 ** n) ⇒ + (∀x t n. f n t x ⇒ fst (h x) = n ∧ g n t (snd (h x)) = x ∧ snd (h x) < 256 ** n) ⇒ ∀bs. - read_str (g:num -> 'c -> 'a word -> 'b) ss (flat (map (value_to_bytes h) vs) ++ bs) = (vs, bs)) + read_str g ss (flat (map (value_to_bytes h) vs) ++ bs) = (vs, bs)) Proof ho_match_mp_tac value_shape_ind >> rw [value_shape_def] >> fs [bytes_to_value_def, value_to_bytes_def] >> rw [] @@ -501,7 +506,7 @@ QED Theorem b2v_v2b: ∀f g h s v bs. value_shape f s v ⇒ - (∀x n t. f n t x ⇒ fst (h x) = n ∧ g n t (snd (h x)) = x ∧ w2n (snd (h x)) < 256 ** n) ⇒ + (∀x n t. f n t x ⇒ fst (h x) = n ∧ g n t (snd (h x)) = x ∧ snd (h x) < 256 ** n) ⇒ bytes_to_value g s (value_to_bytes h v ++ bs) = (v, bs) Proof metis_tac [b2v_v2b_lem] diff --git a/sledge/semantics/miscScript.sml b/sledge/semantics/miscScript.sml index 953161431..58eac9995 100644 --- a/sledge/semantics/miscScript.sml +++ b/sledge/semantics/miscScript.sml @@ -9,7 +9,8 @@ * could be upstreamed to HOL, and should eventually. *) open HolKernel boolLib bossLib Parse; -open listTheory rich_listTheory arithmeticTheory wordsTheory; +open listTheory rich_listTheory arithmeticTheory integerTheory; +open integer_wordTheory wordsTheory; open finite_mapTheory open logrootTheory numposrepTheory; open settingsTheory; @@ -169,4 +170,75 @@ Proof Induct >> rw [] QED +Definition truncate_2comp_def: + truncate_2comp (i:int) size = + (i + 2 ** (size - 1)) % 2 ** size - 2 ** (size - 1) +End + +Theorem truncate_2comp_i2w_w2i: + ∀i size. dimindex (:'a) = size ⇒ truncate_2comp i size = w2i (i2w i : 'a word) +Proof + rw [truncate_2comp_def, w2i_def, word_msb_i2w, w2n_i2w] >> + qmatch_goalsub_abbrev_tac `(_ + s1) % s2` >> + `2 * s1 = s2` by rw [Abbr `s1`, Abbr `s2`, GSYM EXP, DIMINDEX_GT_0] >> + `0 ≠ s2 ∧ ¬(s2 < 0)` by rw [Abbr `s2`] >> + fs [MULT_MINUS_ONE, w2n_i2w] >> + fs [GSYM dimword_def, dimword_IS_TWICE_INT_MIN] + >- ( + `-i % s2 = -((i + s1) % s2 - s1)` suffices_by intLib.COOPER_TAC >> + simp [] >> + irule INT_MOD_UNIQUE >> + simp [GSYM PULL_EXISTS] >> + conj_tac + >- ( + simp [int_mod, INT_ADD_ASSOC, + intLib.COOPER_PROVE ``!x y (z:int). x - (y + z - a) = x - y - z + a``] >> + qexists_tac `-((i + s1) / s2)` >> + intLib.COOPER_TAC) >> + `&INT_MIN (:α) = s1` by (unabbrev_all_tac >> rw [INT_MIN_def]) >> + fs [INT_SUB_LE] >> + `0 ≤ (i + s1) % s2` by metis_tac [INT_MOD_BOUNDS] >> + strip_tac + >- ( + `(i + s1) % s2 = (i % s2 + s1 % s2) % s2` + by (irule (GSYM INT_MOD_PLUS) >> rw []) >> + simp [] >> + `(i % s2 + s1 % s2) % s2 = (-1 * s2 + (i % s2 + s1 % s2)) % s2` + by (metis_tac [INT_MOD_ADD_MULTIPLES]) >> + simp [GSYM INT_NEG_MINUS1, INT_ADD_ASSOC] >> + `i % s2 < s2 ∧ s1 % s2 < s2 ∧ i % s2 ≤ s2` by metis_tac [INT_MOD_BOUNDS, INT_LT_IMP_LE] >> + `0 ≤ s1 ∧ s1 < s2 ∧ -s2 + i % s2 + s1 % s2 < s2` by intLib.COOPER_TAC >> + `0 ≤ -s2 + i % s2 + s1 % s2` + by ( + `s2 = s1 + s1` by intLib.COOPER_TAC >> + fs [INT_LESS_MOD] >> + intLib.COOPER_TAC) >> + simp [INT_LESS_MOD] >> + intLib.COOPER_TAC) + >- intLib.COOPER_TAC) + >- ( + `(i + s1) % s2 = i % s2 + s1` suffices_by intLib.COOPER_TAC >> + `(i + s1) % s2 = i % s2 + s1 % s2` + suffices_by ( + rw [] >> + irule INT_LESS_MOD >> rw [] >> + intLib.COOPER_TAC) >> + `(i + s1) % s2 = (i % s2 + s1 % s2) % s2` + suffices_by ( + fs [Abbr `s2`] >> + `s1 = &INT_MIN (:'a)` by intLib.COOPER_TAC >> rw [] >> + irule INT_LESS_MOD >> rw [] >> + fs [intLib.COOPER_PROVE ``!(x:int) y. ¬(x ≤ y) ⇔ y < x``] >> rw [] >> + full_simp_tac std_ss [GSYM INT_MUL] >> + qpat_abbrev_tac `s = &INT_MIN (:α)` + >- ( + `2*s ≠ 0 ∧ ¬(2*s < 0) ∧ ¬(s < 0)` + by (unabbrev_all_tac >> rw []) >> + drule INT_MOD_BOUNDS >> simp [] >> + disch_then (qspec_then `i` mp_tac) >> simp [] >> + intLib.COOPER_TAC) + >- intLib.COOPER_TAC) >> + simp [INT_MOD_PLUS]) +QED + export_theory (); diff --git a/sledge/semantics/settingsScript.sml b/sledge/semantics/settingsScript.sml index f5cb4095f..2213357c7 100644 --- a/sledge/semantics/settingsScript.sml +++ b/sledge/semantics/settingsScript.sml @@ -53,5 +53,7 @@ overload_on ("alookup", ``ALOOKUP``); overload_on ("filter", ``FILTER``); overload_on ("map2", ``list$MAP2``); overload_on ("foldl", ``FOLDL``); +overload_on ("foldr", ``FOLDR``); +overload_on ("option_rel", ``OPTREL``); export_theory ();