You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

528 lines
17 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
(* Define SSA form and the concept of variable liveness, and then show how SSA
* simplifies it *)
open HolKernel boolLib bossLib Parse;
open pred_setTheory listTheory rich_listTheory;
open settingsTheory miscTheory llvmTheory llvm_propTheory;
new_theory "llvm_ssa";
numLib.prefer_num ();
(* ----- Static paths through a program *)
Definition inc_pc_def:
inc_pc ip = ip with i := inc_bip ip.i
End
(* The set of program counters the given instruction and starting point can
* immediately reach, within a function *)
Definition instr_next_ips_def:
(instr_next_ips (Ret _) ip = {})
(instr_next_ips (Br _ l1 l2) ip =
{ <| f := ip.f; b := Some l; i := Phi_ip ip.b |> | l | l {l1; l2} })
(instr_next_ips (Invoke _ _ _ _ l1 l2) ip =
{ <| f := ip.f; b := Some l; i := Phi_ip ip.b |> | l | l {l1; l2} })
(instr_next_ips Unreachable ip = {})
(instr_next_ips (Exit _) ip = {})
(instr_next_ips (Sub _ _ _ _ _ _) ip = { inc_pc ip })
(instr_next_ips (Extractvalue _ _ _) ip = { inc_pc ip })
(instr_next_ips (Insertvalue _ _ _ _) ip = { inc_pc ip })
(instr_next_ips (Alloca _ _ _) ip = { inc_pc ip })
(instr_next_ips (Load _ _ _) ip = { inc_pc ip })
(instr_next_ips (Store _ _) ip = { inc_pc ip })
(instr_next_ips (Gep _ _ _ _) ip = { inc_pc ip })
(instr_next_ips (Ptrtoint _ _ _) ip = { inc_pc ip })
(instr_next_ips (Inttoptr _ _ _) ip = { inc_pc ip })
(instr_next_ips (Icmp _ _ _ _ _) ip = { inc_pc ip })
(instr_next_ips (Call _ _ _ _) ip = { inc_pc ip })
(instr_next_ips (Cxa_allocate_exn _ _) ip = { inc_pc ip })
(* TODO: revisit throw when dealing with exceptions *)
(instr_next_ips (Cxa_throw _ _ _) ip = { })
(instr_next_ips (Cxa_begin_catch _ _) ip = { inc_pc ip })
(instr_next_ips (Cxa_end_catch) ip = { inc_pc ip })
(instr_next_ips (Cxa_get_exception_ptr _ _) ip = { inc_pc ip })
End
Inductive next_ips:
(∀prog ip i l i2.
get_instr prog ip (Inl i)
l instr_next_ips i ip
get_instr prog l i2
next_ips prog ip l)
(∀prog ip from_l phis i2.
get_instr prog ip (Inr (from_l, phis))
get_instr prog (inc_pc ip) i2
next_ips prog ip (inc_pc ip))
End
(* The path is a list of program counters that represent a statically feasible
* path through a function *)
Inductive good_path:
(∀prog. good_path prog [])
(∀prog ip i.
get_instr prog ip i
good_path prog [ip])
(∀prog path ip1 ip2.
ip2 next_ips prog ip1
good_path prog (ip2::path)
good_path prog (ip1::ip2::path))
End
Theorem next_ips_same_func:
∀prog ip1 ip2. ip2 next_ips prog ip1 ip1.f = ip2.f
Proof
rw [next_ips_cases, IN_DEF, get_instr_cases, inc_pc_def, inc_bip_def] >> rw [] >>
Cases_on `el idx b.body` >> fs [instr_next_ips_def, inc_pc_def, inc_bip_def]
QED
Theorem good_path_same_func:
∀prog path. good_path prog path ∀ip1 ip2. mem ip1 path mem ip2 path ip1.f = ip2.f
Proof
ho_match_mp_tac good_path_ind >> rw [] >>
metis_tac [next_ips_same_func]
QED
Theorem good_path_prefix:
∀prog path path'. good_path prog path path' path good_path prog path'
Proof
Induct_on `path'` >> rw []
>- simp [Once good_path_cases] >>
pop_assum mp_tac >> CASE_TAC >> rw [] >>
qpat_x_assum `good_path _ _` mp_tac >>
simp [Once good_path_cases] >> rw [] >> fs []
>- (simp [Once good_path_cases] >> metis_tac []) >>
first_x_assum drule >> rw [] >>
simp [Once good_path_cases] >>
Cases_on `path'` >> fs [next_ips_cases, IN_DEF] >>
metis_tac []
QED
Theorem good_path_append:
!prog p1 p2.
good_path prog (p1++p2)
good_path prog p1 good_path prog p2
(p1 [] p2 [] HD p2 next_ips prog (last p1))
Proof
Induct_on `p1` >> rw []
>- metis_tac [good_path_rules] >>
Cases_on `p1` >> Cases_on `p2` >> rw []
>- metis_tac [good_path_rules]
>- (
simp [Once good_path_cases] >>
metis_tac [good_path_rules, next_ips_cases, IN_DEF])
>- metis_tac [good_path_rules] >>
rename1 `ip1::ip2::(ips1++ip3::ips2)` >>
first_x_assum (qspecl_then [`prog`, `[ip3]++ips2`] mp_tac) >>
rw [] >> simp [Once good_path_cases, LAST_DEF] >> rw [] >>
eq_tac >> rw []
>- metis_tac [good_path_rules]
>- (qpat_x_assum `good_path _ [_;_]` mp_tac >> simp [Once good_path_cases])
>- metis_tac [good_path_rules, next_ips_cases, IN_DEF]
>- metis_tac [good_path_rules]
>- (qpat_x_assum `good_path _ (ip1::ip2::ips1)` mp_tac >> simp [Once good_path_cases])
>- (qpat_x_assum `good_path _ (ip1::ip2::ips1)` mp_tac >> simp [Once good_path_cases])
QED
(* ----- Helper functions to get registers out of instructions ----- *)
Definition arg_to_regs_def:
(arg_to_regs (Constant _) = {})
(arg_to_regs (Variable r) = {r})
End
(* The registers that an instruction uses *)
Definition instr_uses_def:
(instr_uses (Ret (_, a)) = arg_to_regs a)
(instr_uses (Br a _ _) = arg_to_regs a)
(instr_uses (Invoke _ _ a targs _ _) =
arg_to_regs a BIGUNION (set (map (arg_to_regs o snd) targs)))
(instr_uses Unreachable = {})
(instr_uses (Sub _ _ _ _ a1 a2) =
arg_to_regs a1 arg_to_regs a2)
(instr_uses (Extractvalue _ (_, a) _) = arg_to_regs a)
(instr_uses (Insertvalue _ (_, a1) (_, a2) _) =
arg_to_regs a1 arg_to_regs a2)
(instr_uses (Alloca _ _ (_, a)) = arg_to_regs a)
(instr_uses (Load _ _ (_, a)) = arg_to_regs a)
(instr_uses (Store (_, a1) (_, a2)) =
arg_to_regs a1 arg_to_regs a2)
(instr_uses (Gep _ _ (_, a) targs) =
arg_to_regs a BIGUNION (set (map (arg_to_regs o snd) targs)))
(instr_uses (Ptrtoint _ (_, a) _) = arg_to_regs a)
(instr_uses (Inttoptr _ (_, a) _) = arg_to_regs a)
(instr_uses (Icmp _ _ _ a1 a2) =
arg_to_regs a1 arg_to_regs a2)
(instr_uses (Call _ _ _ targs) =
BIGUNION (set (map (arg_to_regs o snd) targs)))
(instr_uses (Cxa_allocate_exn _ a) = arg_to_regs a)
(instr_uses (Cxa_throw a1 a2 a3) =
arg_to_regs a1 arg_to_regs a2 arg_to_regs a3)
(instr_uses (Cxa_begin_catch _ a) = arg_to_regs a)
(instr_uses (Cxa_end_catch) = { })
(instr_uses (Cxa_get_exception_ptr _ a) = arg_to_regs a)
End
Definition phi_uses_def:
phi_uses from_l (Phi _ _ entries) =
case alookup entries from_l of
| None => {}
| Some a => arg_to_regs a
End
Inductive uses:
(∀prog ip i r.
get_instr prog ip (Inl i)
r instr_uses i
uses prog ip r)
(∀prog ip from_l phis r.
get_instr prog ip (Inr (from_l, phis))
r BIGUNION (set (map (phi_uses from_l) phis))
uses prog ip r)
End
(* The registers that an instruction assigns *)
Definition instr_assigns_def:
(instr_assigns (Invoke r _ _ _ _ _) = {r})
(instr_assigns (Sub r _ _ _ _ _) = {r})
(instr_assigns (Extractvalue r _ _) = {r})
(instr_assigns (Insertvalue r _ _ _) = {r})
(instr_assigns (Alloca r _ _) = {r})
(instr_assigns (Load r _ _) = {r})
(instr_assigns (Gep r _ _ _) = {r})
(instr_assigns (Ptrtoint r _ _) = {r})
(instr_assigns (Inttoptr r _ _) = {r})
(instr_assigns (Icmp r _ _ _ _) = {r})
(instr_assigns (Call r _ _ _) = {r})
(instr_assigns (Cxa_allocate_exn r _) = {r})
(instr_assigns (Cxa_begin_catch r _) = {r})
(instr_assigns (Cxa_get_exception_ptr r _) = {r})
(instr_assigns _ = {})
End
Definition phi_assigns_def:
phi_assigns (Phi r _ _) = {r}
End
Inductive assigns:
(∀prog ip i r.
get_instr prog ip (Inl i)
r instr_assigns i
assigns prog ip r)
(∀prog ip from_l phis r.
get_instr prog ip (Inr (from_l, phis))
r BIGUNION (set (map phi_assigns phis))
assigns prog ip r)
End
(* ----- SSA form ----- *)
Definition entry_ip_def:
entry_ip fname = <| f := fname; b := None; i := Offset 0 |>
End
Definition reachable_def:
reachable prog ip
∃path. good_path prog (entry_ip ip.f :: path) last (entry_ip ip.f :: path) = ip
End
(* To get to ip2 from the entry, you must go through ip1 *)
Definition dominates_def:
dominates prog ip1 ip2
∀path.
good_path prog (entry_ip ip2.f :: path)
last (entry_ip ip2.f :: path) = ip2
mem ip1 (front (entry_ip ip2.f :: path))
End
Definition is_ssa_def:
is_ssa prog
(* Operate function by function *)
(∀fname.
(* No register is assigned in two different instructions *)
(∀r ip1 ip2.
r assigns prog ip1 r assigns prog ip2 ip1.f = fname ip2.f = fname
ip1 = ip2))
(* Each use is dominated by its assignment *)
(∀ip1 r. r uses prog ip1 ∃ip2. ip2.f = ip1.f r assigns prog ip2 dominates prog ip2 ip1)
End
Theorem dominates_trans:
∀prog ip1 ip2 ip3.
dominates prog ip1 ip2 dominates prog ip2 ip3 dominates prog ip1 ip3
Proof
rw [dominates_def] >> simp [FRONT_DEF] >> rw []
>- (first_x_assum (qspec_then `[]` mp_tac) >> rw []) >>
first_x_assum drule >> rw [] >>
qpat_x_assum `mem _ (front _)` mp_tac >>
simp [Once MEM_EL] >> rw [] >> fs [EL_FRONT] >>
first_x_assum (qspec_then `take n path` mp_tac) >> simp [LAST_DEF] >>
rw [] >> fs [entry_ip_def]
>- (fs [Once good_path_cases] >> rw [] >> fs [next_ips_cases, IN_DEF]) >>
rfs [EL_CONS] >>
`?m. n = Suc m` by (Cases_on `n` >> rw []) >>
rw [] >> rfs [] >>
`(el m path).f = ip3.f`
by (
irule good_path_same_func >>
qexists_tac `<| f:= ip3.f; b := NONE; i := Offset 0|> :: path` >>
qexists_tac `prog` >>
conj_tac >- rw [EL_MEM] >>
metis_tac [MEM_LAST]) >>
fs [] >> qpat_x_assum `_ _` mp_tac >> impl_tac
>- (
irule good_path_prefix >> HINT_EXISTS_TAC >> rw [] >>
metis_tac [take_is_prefix]) >>
rw [] >> drule MEM_FRONT >> rw [] >>
fs [MEM_EL, LENGTH_FRONT] >> rfs [EL_TAKE] >> rw [] >>
disj2_tac >> qexists_tac `n'` >> rw [] >>
irule (GSYM EL_FRONT) >>
rw [NULL_EQ, LENGTH_FRONT]
QED
Theorem dominates_unreachable:
∀prog ip1 ip2. ¬reachable prog ip2 dominates prog ip1 ip2
Proof
rw [dominates_def, reachable_def] >>
metis_tac []
QED
Theorem dominates_antisym_lem:
∀prog ip1 ip2. dominates prog ip1 ip2 dominates prog ip2 ip1 ¬reachable prog ip1
Proof
rw [dominates_def, reachable_def] >> CCONTR_TAC >> fs [] >>
Cases_on `ip1 = entry_ip ip1.f` >> fs []
>- (
first_x_assum (qspec_then `[]` mp_tac) >> rw [] >>
fs [Once good_path_cases, IN_DEF, next_ips_cases] >>
metis_tac []) >>
`path []` by (Cases_on `path` >> fs []) >>
`(OLEAST n. n < length path el n path = ip1) None`
by (
rw [whileTheory.OLEAST_EQ_NONE] >>
qexists_tac `PRE (length path)` >> rw [] >>
fs [LAST_DEF, LAST_EL] >>
Cases_on `path` >> fs []) >>
qabbrev_tac `path1 = splitAtPki (\n ip. ip = ip1) (\x y. x) path` >>
first_x_assum (qspec_then `path1 ++ [ip1]` mp_tac) >>
simp [] >>
conj_asm1_tac >> rw []
>- (
irule good_path_prefix >>
HINT_EXISTS_TAC >> rw [] >>
unabbrev_all_tac >> rw [splitAtPki_EQN] >>
CASE_TAC >> rw [] >>
fs [whileTheory.OLEAST_EQ_SOME] >>
rw [GSYM SNOC_APPEND, SNOC_EL_TAKE] >>
metis_tac [take_is_prefix])
>- rw [LAST_DEF] >>
simp [GSYM SNOC_APPEND, FRONT_SNOC, FRONT_DEF] >>
CCONTR_TAC >> fs [MEM_EL]
>- (
first_x_assum (qspec_then `[]` mp_tac) >>
fs [entry_ip_def, Once good_path_cases, IN_DEF, next_ips_cases] >>
metis_tac []) >>
rw [] >>
rename [`n1 < length _`, `last _ = el n path`] >>
first_x_assum (qspec_then `take (Suc n1) path1` mp_tac) >> rw []
>- (
irule good_path_prefix >> HINT_EXISTS_TAC >> rw [entry_ip_def]
>- (
irule good_path_same_func >>
qexists_tac `entry_ip (el n path).f::(path1 ++ [el n path])` >>
qexists_tac `prog` >> rw [EL_MEM]) >>
metis_tac [IS_PREFIX_APPEND3, take_is_prefix, IS_PREFIX_TRANS])
>- (rw [LAST_DEF] >> fs []) >>
rw [METIS_PROVE [] ``~x y (x y)``] >>
simp [EL_FRONT] >>
rename [`n2 < Suc _`] >>
Cases_on `¬(0 < n2)` >> rw [EL_CONS]
>- (
fs [entry_ip_def] >>
`(el n path).f = (el n1 path1).f` suffices_by metis_tac [] >>
irule good_path_same_func >>
qexists_tac `<|f := (el n path).f; b := None; i := Offset 0|> ::(path1 ++ [el n path])` >>
qexists_tac `prog` >>
rw [EL_MEM]) >>
fs [EL_TAKE, Abbr `path1`, splitAtPki_EQN] >>
CASE_TAC >> rw [] >> fs []
>- metis_tac [] >>
fs [whileTheory.OLEAST_EQ_SOME] >>
rfs [LENGTH_TAKE] >>
`PRE n2 < x` by decide_tac >>
first_x_assum drule >>
rw [EL_TAKE]
QED
Theorem dominates_antisym:
∀prog ip1 ip2. reachable prog ip1 dominates prog ip1 ip2 ¬dominates prog ip2 ip1
Proof
metis_tac [dominates_antisym_lem]
QED
Theorem dominates_irrefl:
∀prog ip. reachable prog ip ¬dominates prog ip ip
Proof
metis_tac [dominates_antisym]
QED
(* ----- Liveness ----- *)
Definition live_def:
live prog ip =
{ r | ∃path.
good_path prog (ip::path)
r uses prog (last (ip::path))
∀ip2. ip2 set (front (ip::path)) r assigns prog ip2 }
End
Theorem get_instr_live:
∀prog ip instr.
get_instr prog ip instr
uses prog ip live prog ip
Proof
rw [live_def, SUBSET_DEF] >>
qexists_tac `[]` >> rw [Once good_path_cases] >>
qexists_tac `instr` >> simp [] >> metis_tac [IN_DEF]
QED
Triviality set_rw:
∀s P. (∀x. x s P x) s = P
Proof
rw [] >> eq_tac >> rw [IN_DEF] >> metis_tac []
QED
Theorem live_gen_kill:
∀prog ip ip'.
live prog ip =
BIGUNION {live prog ip' | ip' | ip' next_ips prog ip} DIFF assigns prog ip uses prog ip
Proof
rw [live_def, EXTENSION] >> eq_tac >> rw []
>- (
Cases_on `path` >> fs [] >>
rename1 `ip::ip2::path` >>
qpat_x_assum `good_path _ _` mp_tac >> simp [Once good_path_cases] >> rw [] >>
Cases_on `x uses prog ip` >> fs [] >> simp [set_rw, PULL_EXISTS] >>
qexists_tac `ip2` >> qexists_tac `path` >> rw [])
>- (
fs [] >>
qexists_tac `ip'::path` >> rw [] >>
simp [Once good_path_cases])
>- (
qexists_tac `[]` >> rw [] >>
fs [Once good_path_cases, uses_cases, IN_DEF] >>
metis_tac [])
QED
Theorem ssa_dominates_live_range_lem:
∀prog r ip1 ip2.
is_ssa prog ip1.f = ip2.f r assigns prog ip1 r live prog ip2
dominates prog ip1 ip2
Proof
rw [dominates_def, is_ssa_def, live_def] >>
`path [] (last path).f = ip2.f`
by (
rw [] >>
irule good_path_same_func >>
qexists_tac `ip2::path` >> rw [] >>
Cases_on `path` >> fs [MEM_LAST] >> metis_tac []) >>
first_x_assum drule >> rw [] >>
first_x_assum (qspec_then `path'++path` mp_tac) >>
impl_tac
>- (
fs [LAST_DEF] >> rw [] >> fs []
>- (
simp_tac std_ss [GSYM APPEND, good_path_append] >> rw []
>- (
qpat_x_assum `good_path _ (_::_)` mp_tac >>
qpat_x_assum `good_path _ (_::_)` mp_tac >>
simp [Once good_path_cases] >>
metis_tac [])
>- (
simp [LAST_DEF] >>
qpat_x_assum `good_path _ (_::_)` mp_tac >>
qpat_x_assum `good_path _ (_::_)` mp_tac >>
simp [Once good_path_cases] >>
rw [] >> rw []))
>- (Cases_on `path` >> fs [])) >>
rw [] >>
`ip2'.f = ip2.f`
by (
irule good_path_same_func >>
qexists_tac `entry_ip ip2.f::path'` >>
qexists_tac `prog` >>
conj_tac
>- (
Cases_on `path` >>
full_simp_tac std_ss [GSYM APPEND, FRONT_APPEND, APPEND_NIL, LAST_CONS]
>- metis_tac [MEM_FRONT] >>
fs [] >> metis_tac [])
>- metis_tac [MEM_LAST]) >>
`ip2' = ip1` by metis_tac [] >>
rw [] >>
Cases_on `path` >> fs [] >>
full_simp_tac std_ss [GSYM APPEND, FRONT_APPEND] >> fs [] >> rw [FRONT_DEF] >> fs []
>- metis_tac []
>- (
`mem ip1 path' = mem ip1 (front path' ++ [last path'])` by metis_tac [APPEND_FRONT_LAST] >>
fs [LAST_DEF] >>
metis_tac [])
>- metis_tac []
>- metis_tac []
QED
Theorem ssa_dominates_live_range:
∀prog r ip.
is_ssa prog r uses prog ip
∃ip1. ip1.f = ip.f r assigns prog ip1
∀ip2. ip2.f = ip.f r live prog ip2
dominates prog ip1 ip2
Proof
rw [] >> drule ssa_dominates_live_range_lem >> rw [] >>
fs [is_ssa_def] >>
first_assum drule >> rw [] >> metis_tac []
QED
Theorem reachable_dominates_same_func:
∀prog ip1 ip2. reachable prog ip2 dominates prog ip1 ip2 ip1.f = ip2.f
Proof
rw [reachable_def, dominates_def] >> res_tac >>
irule good_path_same_func >>
metis_tac [MEM_LAST, MEM_FRONT]
QED
Theorem next_ips_reachable:
∀prog ip1 ip2. reachable prog ip1 ip2 next_ips prog ip1 reachable prog ip2
Proof
rw [] >> imp_res_tac next_ips_same_func >>
fs [reachable_def] >>
qexists_tac `path ++ [ip2]` >>
simp_tac std_ss [GSYM APPEND, LAST_APPEND_CONS, LAST_CONS] >>
simp [good_path_append] >>
simp [Once good_path_cases] >>
fs [next_ips_cases, IN_DEF] >>
metis_tac []
QED
export_theory ();