diff --git a/sledge/TODO.org b/sledge/TODO.org index e49905e83..01315678e 100644 --- a/sledge/TODO.org +++ b/sledge/TODO.org @@ -25,6 +25,36 @@ rather than nearest enclosing ** check/ensure that generated names do not clash - name ^ ".ti" xlate_instr LandingPad ** ? expose the roots computed by Llair.mk +** ? types +- could add types to Exp constructors, indicating the types at which the operation interprets its arguments + + pros + * could enforce well-typedness modulo castability + - quite weak constraint, but might catch some bugs + - not castable: + + Bytes <-> (Function | Opaque | Memory) + + between (Int | Float | Array) when prim_bit_size different + + Pointer <-> (Function | Tuple | Struct | Opaque | Memory) + + between (Function | Tuple | Struct | Opaque | Memory) + * perhaps helpful when debugging + * needed for correct semantics + - where size of integer and floating point numbers matters (overflow behavior and interpretation of conversions) + + cons + - perf: increases size of representation of Exp, perhaps a lot + - code complexity: need to plumb through target-specific data in order to e.g. be able to create equalities at intptr type +- instructions and globals could use accurate types to replace len fields with static sizeof type +- load instructions would need accurate types on reg to create equalities between it and its value in Exec +- memcpy and memmov would need types to create equality between src and dst in Exec +- formals would need types, to create equalities between formals and actuals in Domain +- types could be useful for approximate human-readable printing for general expressions + + to print p+o as p.f, will likely need to consult what p is equal to, to find some meaningful type, and it could easily take much more work than this to produce reliably readable results +- target-specific types and layout + + change Typ.target into a separate module + + construct an instance in frontend as first step + + use it during translation + + return it as part of program + + pass it from Control to Domain, etc. +- function types could include the types of throw continuation args +but they are currently the same for all functions: i8* ** ? change blocks to take all free variables as args + currently the scope of an identifier bound by e.g. Load is the continuation of the inst as well as all the conts that it dominates, this is somewhat messy + build a table from blocks to conts @@ -50,8 +80,6 @@ rather than nearest enclosing * else recurse over the destination blocks except the current block + after entry block (and recursively everything reachable from it) is xlated, map over the function block list looking up from the table to get order of conts to match order of blocks ** ? format #line directives in programs -** ? function types could include the types of throw continuation args -but they are currently the same for all functions: i8* * frontend ** kill the frontend memo tables when translate returns ** translate %malloc to alloc @@ -136,7 +164,6 @@ so that xlate_type can preserve sizedness, e.g. add a post-condition that Typ.is ** scalarizer does not work on functions with [optnone] attribute - repro: llvm/Transforms/FunctionAttrs/optnone-simple.ll - one solution: pre-process llvm to remove [optnone] attributes before running scalarizer pass -** ? move is_zero to Exp ** ? remove Exp.Nondet, replace with free variables it is not obvious whether it will be simpler to use free variables instead of Nondet in the frontend, or to treat Nondet as a single-occurrence existential variable in the analyzer ** llvm bugs? diff --git a/sledge/src/llair/llair.ml b/sledge/src/llair/llair.ml index 20f0927e8..1ed209fed 100644 --- a/sledge/src/llair/llair.ml +++ b/sledge/src/llair/llair.ml @@ -5,34 +5,35 @@ * LICENSE file in the root directory of this source tree. *) -(** Programs / translation units *) +(** Translation units *) type inst = - | Load of {reg: Var.t; ptr: Exp.t; loc: Loc.t} - | Store of {ptr: Exp.t; exp: Exp.t; loc: Loc.t} + | Load of {reg: Var.t; ptr: Exp.t; len: Exp.t; loc: Loc.t} + | Store of {ptr: Exp.t; exp: Exp.t; len: Exp.t; loc: Loc.t} | Memcpy of {dst: Exp.t; src: Exp.t; len: Exp.t; loc: Loc.t} | Memmov of {dst: Exp.t; src: Exp.t; len: Exp.t; loc: Loc.t} | Memset of {dst: Exp.t; byt: Exp.t; len: Exp.t; loc: Loc.t} - | Alloc of {reg: Var.t; num: Exp.t; loc: Loc.t} + | Alloc of {reg: Var.t; num: Exp.t; len: Exp.t; loc: Loc.t} | Free of {ptr: Exp.t; loc: Loc.t} | Nondet of {reg: Var.t option; msg: string; loc: Loc.t} + | Strlen of {reg: Var.t; ptr: Exp.t; loc: Loc.t} [@@deriving sexp] type cmnd = inst vector [@@deriving sexp] - type label = string [@@deriving sexp] type 'a control_transfer = - {mutable dst: 'a; args: Exp.t vector; mutable retreating: bool} -[@@deriving sexp] + {mutable dst: 'a; args: Exp.t list; mutable retreating: bool} +[@@deriving compare, sexp_of] type jump = block control_transfer and term = | Switch of {key: Exp.t; tbl: (Z.t * jump) vector; els: jump; loc: Loc.t} - | ISwitch of {ptr: Exp.t; tbl: jump vector; loc: Loc.t} + | Iswitch of {ptr: Exp.t; tbl: jump vector; loc: Loc.t} | Call of { call: Exp.t control_transfer + ; typ: Typ.t ; return: jump ; throw: jump option ; ignore_result: bool @@ -43,7 +44,8 @@ and term = and block = { lbl: label - ; params: Var.t vector + ; params: Var.t list + ; locals: Var.Set.t ; cmnd: cmnd ; term: term ; mutable parent: func @@ -51,239 +53,322 @@ and block = and cfg = block vector -and func = {name: Global.t; entry: block; cfg: cfg} [@@deriving sexp] - -type t = - {typ_defns: Typ.t list; globals: Global.t vector; functions: func vector} +(* [entry] is not part of [cfg] since it is special in two ways: its params + are the func params, and it cannot be jumped to, only called. *) +and func = {name: Global.t; entry: block; cfg: cfg} + +let rec sexp_of_jump ({dst; args; retreating} as jmp) = + if retreating then + [%sexp {dst= (dst.lbl : label); args : Exp.t list; retreating : bool}] + else [%sexp_of: jump] jmp + +and sexp_of_term t = [%sexp_of: term] t + +and sexp_of_block {lbl; params; locals; cmnd; term; parent; sort_index} = + [%sexp + { lbl : label + ; params : Var.t list + ; locals : Var.Set.t + ; cmnd : cmnd + ; term : term + ; parent= (parent.name.var : Var.t) + ; sort_index : int }] + +and sexp_of_func f = [%sexp_of: func] f + +(* blocks in a [t] are uniquely identified by [sort_index] *) +let compare_block x y = Int.compare x.sort_index y.sort_index + +type t = {globals: Global.t vector; functions: func vector} +[@@deriving sexp_of] + +let pp_inst fs inst = + let pf pp = Format.fprintf fs pp in + match inst with + | Load {reg; ptr; len; loc} -> + pf "@[<2>load %a@ %a@ %a;@]\t%a" Exp.pp len Var.pp reg Exp.pp ptr + Loc.pp loc + | Store {ptr; exp; len; loc} -> + pf "@[<2>store %a@ %a@ %a;@]\t%a" Exp.pp len Exp.pp ptr Exp.pp exp + Loc.pp loc + | Memcpy {dst; src; len; loc} -> + pf "@[<2>memcpy %a %a %a;@]\t%a" Exp.pp len Exp.pp dst Exp.pp src + Loc.pp loc + | Memmov {dst; src; len; loc} -> + pf "@[<2>memmov %a %a %a;@]\t%a" Exp.pp len Exp.pp dst Exp.pp src + Loc.pp loc + | Memset {dst; byt; len; loc} -> + pf "@[<2>memset %a %a %a;@]\t%a" Exp.pp len Exp.pp dst Exp.pp byt + Loc.pp loc + | Alloc {reg; num; len; loc} -> + pf "@[<2>alloc %a@ [%a x %a];@]\t%a" Var.pp reg Exp.pp num Exp.pp len + Loc.pp loc + | Free {ptr; loc} -> pf "@[<2>free %a;@]\t%a" Exp.pp ptr Loc.pp loc + | Nondet {reg; msg; loc} -> + pf "@[<2>nondet %a\"%s\";@]\t%a" (Option.pp "%a " Var.pp) reg msg + Loc.pp loc + | Strlen {reg; ptr; loc} -> + pf "@[<2>strlen %a@ %a;@]\t%a" Var.pp reg Exp.pp ptr Loc.pp loc + +let pp_args pp_arg fs args = + Format.fprintf fs "@ (@[%a@])" (List.pp ",@ " pp_arg) (List.rev args) + +let pp_param fs var = Var.pp fs var + +let pp_jump fs {dst= {lbl}; args; retreating} = + Format.fprintf fs "@[<2>%s%%%s%a@]" + (if retreating then "↑" else "") + lbl (pp_args Exp.pp) args + +let pp_term fs term = + let pf pp = Format.fprintf fs pp in + let pp_goto fs jmp = Format.fprintf fs "goto %a;" pp_jump jmp in + match term with + | Switch {key; tbl; els; loc} -> ( + match Vector.to_array tbl with + | [||] -> pf "@[%a@]\t%a" pp_goto els Loc.pp loc + | [|(z, jmp)|] when Z.equal Z.one z -> + pf "@[if %a@;<1 2>%a@ @[else@;<1 2>%a@]@]\t%a" Exp.pp key pp_goto + jmp pp_goto els Loc.pp loc + | _ -> + pf "@[<2>switch %a@ @[%a@ else: %a@]@]\t%a" Exp.pp key + (Vector.pp "@ " (fun fs (z, jmp) -> + Format.fprintf fs "%a: %a" Z.pp_print z pp_goto jmp )) + tbl pp_goto els Loc.pp loc ) + | Iswitch {ptr; tbl; loc} -> + pf "@[<2>iswitch %a@ @[%a@]@]\t%a" Exp.pp ptr + (Vector.pp "@ " (fun fs ({dst= {lbl}; _} as jmp) -> + Format.fprintf fs "%s: %a" lbl pp_goto jmp )) + tbl Loc.pp loc + | Call {call= {dst; args; retreating}; return; throw; loc} -> + pf "@[<2>@[<7>call @[<2>%s%a%a@]@]@ @[returnto %a%a;@]@]\t%a" + (if retreating then "↑" else "") + Exp.pp dst (pp_args Exp.pp) args pp_jump return + (Option.pp "@ throwto %a" pp_jump) + throw Loc.pp loc + | Return {exp; loc} -> + pf "@[<2>return%a@]\t%a" (Option.pp " %a" Exp.pp) exp Loc.pp loc + | Throw {exc; loc} -> pf "@[<2>throw %a@]\t%a" Exp.pp exc Loc.pp loc + | Unreachable -> pf "unreachable" + +let pp_cmnd = Vector.pp "@ " pp_inst + +let pp_block fs {lbl; params; cmnd; term; sort_index} = + Format.fprintf fs "@[@[<4>%s%a@]: #%i@ @[%a%t%a@]@]" lbl + (pp_args pp_param) params sort_index pp_cmnd cmnd + (fun fs -> if Vector.is_empty cmnd then () else Format.fprintf fs "@ ") + pp_term term (** Initial cyclic values *) let rec dummy_block = { lbl= "dummy" - ; params= Vector.empty + ; params= [] + ; locals= Var.Set.empty ; cmnd= Vector.empty ; term= Unreachable ; parent= dummy_func ; sort_index= 0 } - and dummy_func = - {name= Global.mk "dummy" Typ.i8p; entry= dummy_block; cfg= Vector.empty} + let dummy_ptr_typ = Typ.pointer ~elt:(Typ.opaque ~name:"dummy") in + { name= Global.mk (Var.program "dummy") 0 dummy_ptr_typ Loc.none + ; entry= dummy_block + ; cfg= Vector.empty } +(** Instructions *) module Inst = struct - type t = inst - - let mkLoad ~reg ~ptr ~loc = - assert ( - match Exp.typ ptr with - | Pointer {elt} -> Typ.equal elt (Var.typ reg) && Typ.is_sized elt - | _ -> false ) ; - Load {reg; ptr; loc} - - - let mkStore ~ptr ~exp ~loc = - assert ( - match Exp.typ ptr with - | Pointer {elt} -> Typ.equal elt (Exp.typ exp) && Typ.is_sized elt - | _ -> false ) ; - Store {ptr; exp; loc} - - - let mkMemcpy ~dst ~src ~len ~loc = - assert ( - match (Exp.typ dst, Exp.typ src, Exp.typ len) with - | (Pointer {elt} as ptr1), ptr2, Integer _ -> - Typ.equal ptr1 ptr2 && Typ.is_sized elt - | _ -> false ) ; - Memcpy {dst; src; len; loc} - - - let mkMemmov ~dst ~src ~len ~loc = - assert ( - match (Exp.typ dst, Exp.typ src, Exp.typ len) with - | (Pointer {elt} as ptr1), ptr2, Integer _ -> - Typ.equal ptr1 ptr2 && Typ.is_sized elt - | _ -> false ) ; - Memmov {dst; src; len; loc} - - - let mkMemset ~dst ~byt ~len ~loc = - assert ( - match (Exp.typ dst, Exp.typ byt, Exp.typ len) with - | Pointer {elt}, Integer {bits= 8}, Integer _ -> Typ.is_sized elt - | _ -> false ) ; - Memset {dst; byt; len; loc} - - - let mkAlloc ~reg ~num ~loc = - assert ( - match (Var.typ reg, Exp.typ num) with - | Pointer {elt}, Integer _ -> Typ.is_sized elt - | _ -> false ) ; - Alloc {reg; num; loc} - - - let mkFree ~ptr ~loc = - assert ( - match Exp.typ ptr with - | Pointer {elt} -> Typ.is_sized elt - | _ -> false ) ; - Free {ptr; loc} - - - let mkNondet ~reg ~msg ~loc = - assert (Option.for_all ~f:(Var.typ >> Typ.is_sized) reg) ; - Nondet {reg; msg; loc} - - - let fmt ff inst = - let pf fmt = Format.fprintf ff fmt in + type t = inst [@@deriving sexp] + + let pp = pp_inst + let load ~reg ~ptr ~len ~loc = Load {reg; ptr; len; loc} + let store ~ptr ~exp ~len ~loc = Store {ptr; exp; len; loc} + let memcpy ~dst ~src ~len ~loc = Memcpy {dst; src; len; loc} + let memmov ~dst ~src ~len ~loc = Memmov {dst; src; len; loc} + let memset ~dst ~byt ~len ~loc = Memset {dst; byt; len; loc} + let alloc ~reg ~num ~len ~loc = Alloc {reg; num; len; loc} + let free ~ptr ~loc = Free {ptr; loc} + let nondet ~reg ~msg ~loc = Nondet {reg; msg; loc} + let strlen ~reg ~ptr ~loc = Strlen {reg; ptr; loc} + + let loc = function + | Load {loc} + |Store {loc} + |Memcpy {loc} + |Memmov {loc} + |Memset {loc} + |Alloc {loc} + |Free {loc} + |Nondet {loc} + |Strlen {loc} -> + loc + + let union_locals inst vs = match inst with - | Load {reg; ptr} -> pf "load %a %a;" Var.fmt reg Exp.fmt ptr - | Store {ptr; exp} -> pf "store %a %a;" Exp.fmt ptr Exp.fmt exp - | Memcpy {dst; src; len} -> - pf "memcpy %a %a %a;" Exp.fmt dst Exp.fmt src Exp.fmt len - | Memmov {dst; src; len} -> - pf "memmov %a %a %a;" Exp.fmt dst Exp.fmt src Exp.fmt len - | Memset {dst; byt; len} -> - pf "memset %a %a %a;" Exp.fmt dst Exp.fmt byt Exp.fmt len - | Alloc {reg; num} -> - let[@warning "p"] (Typ.Pointer {elt}) = Var.typ reg in - pf "alloc %a [%a x %a];" Var.fmt reg Exp.fmt num Typ.fmt elt - | Free {ptr} -> pf "free %a;" Exp.fmt ptr - | Nondet {reg; msg} -> - pf "nondet %a\"%s\";" (option_fmt "%a " Var.fmt) reg msg -end + | Load {reg} | Alloc {reg} | Nondet {reg= Some reg} | Strlen {reg} -> + Set.add vs reg + | Store _ | Memcpy _ | Memmov _ | Memset _ | Free _ | Nondet {reg= None} + -> + vs -let fmt_cmnd = vector_fmt "@ " Inst.fmt - -let fmt_args fmt_arg ff args = - Format.fprintf ff "@ (@[%a@])" (vector_fmt ",@ " fmt_arg) args - - -let fmt_param ff var = - Format.fprintf ff "%a %a" Typ.fmt (Var.typ var) Var.fmt var + let locals inst = union_locals inst Var.Set.empty +end +(** Jumps *) module Jump = struct - type t = jump - - let mk lbl args = {dst= {dummy_block with lbl}; args; retreating= false} - - let fmt ff {dst= {lbl}; args; retreating} = - Format.fprintf ff "@[<2>%s%%%s%a@]" - (if retreating then "↑" else "") - lbl (fmt_args Exp.fmt) args + type t = jump [@@deriving sexp_of] + + let compare = compare_control_transfer compare_block + let pp = pp_jump + + let invariant ?(accept_return = false) jmp = + Invariant.invariant [%here] jmp [%sexp_of: t] + @@ fun () -> + let {dst= {params; parent}; args} = jmp in + if parent == dummy_func then + (* jmp not yet backpatched by Func.mk *) + assert true + else + assert ( + List.length params = List.length args + Bool.to_int accept_return ) + + let mk lbl args = + {dst= {dummy_block with lbl}; args; retreating= false} + |> check invariant end -module Term = struct - type t = term - - let mkSwitch ~key ~tbl ~els ~loc = - assert (match Exp.typ key with Integer _ -> true | _ -> false) ; - Switch {key; tbl; els; loc} +(** Basic-Block Terminators *) +module Term = struct + type t = term [@@deriving sexp_of] - let mkISwitch ~ptr ~tbl ~loc = - assert ( - match Exp.typ ptr with - | Pointer {elt= Integer {bits= 8}} -> true - | _ -> false ) ; - ISwitch {ptr; tbl; loc} - + let pp = pp_term - let mkCall ~func ~args ~return ~throw ~ignore_result ~loc = - assert ( - match Exp.typ func with - | Pointer {elt= Function {args= typs}} -> - Vector.for_all2_exn typs args ~f:(fun typ arg -> - Typ.equal typ (Exp.typ arg) ) - | _ -> false ) ; + let invariant term = + Invariant.invariant [%here] term [%sexp_of: t] + @@ fun () -> + match term with + | Switch {tbl; els} -> + Vector.iter tbl ~f:(fun (_, jmp) -> Jump.invariant jmp) ; + Jump.invariant els + | Iswitch {tbl} -> Vector.iter tbl ~f:Jump.invariant + | Call {call= {args= actls}; typ; return; throw; ignore_result} -> ( + match typ with + | Pointer {elt= Function {args= frmls; return= retn_typ}} -> + assert (Vector.length frmls = List.length actls) ; + Jump.invariant return + ~accept_return:(Option.is_some retn_typ && not ignore_result) ; + Option.iter throw ~f:(Jump.invariant ~accept_return:true) + | _ -> assert false ) + | Return _ | Throw _ | Unreachable -> assert true + + let switch ~key ~tbl ~els ~loc = + Switch {key; tbl; els; loc} |> check invariant + + let iswitch ~ptr ~tbl ~loc = Iswitch {ptr; tbl; loc} |> check invariant + + let call ~func ~typ ~args ~return ~throw ~ignore_result ~loc = Call { call= {dst= func; args; retreating= false} + ; typ ; return ; throw ; ignore_result ; loc } + |> check invariant - - let mkReturn ~exp ~loc = Return {exp; loc} - - let mkThrow ~exc ~loc = Throw {exc; loc} - - let mkUnreachable = Unreachable - - let fmt ff term = - let pf fmt = Format.fprintf ff fmt in - let fmt_goto ff jmp = Format.fprintf ff "goto %a;" Jump.fmt jmp in - match term with - | Switch {key; tbl; els} -> ( - match Vector.to_array tbl with - | [||] -> pf "%a" fmt_goto els - | [|(z, jmp)|] when Z.equal Z.one z -> - pf "@[if (%a)@;<1 2>%a@ @[else@;<1 2>%a@]@]" Exp.fmt key fmt_goto - jmp fmt_goto els - | _ -> - pf "@[<2>switch %a@ @[%a@ else: %a@]@]" Exp.fmt key - (vector_fmt "@ " (fun ff (z, jmp) -> - Format.fprintf ff "%a: %a" Z.pp_print z fmt_goto jmp )) - tbl fmt_goto els ) - | ISwitch {ptr; tbl} -> - pf "@[<2>iswitch %a@ @[%a@]@]" Exp.fmt ptr - (vector_fmt "@ " (fun ff ({dst= {lbl}; _} as jmp) -> - Format.fprintf ff "%s: %a" lbl fmt_goto jmp )) - tbl - | Call {call= {dst; args; retreating}; return; throw} -> - pf "@[<2>@[<7>call @[<2>%s%a%a@]@]@ @[returnto %a%a;@]@]" - (if retreating then "↑" else "") - Exp.fmt dst (fmt_args Exp.fmt) args Jump.fmt return - (option_fmt "@ throwto %a" Jump.fmt) - throw - | Return {exp} -> pf "return%a" (option_fmt " %a" Exp.fmt) exp - | Throw {exc} -> pf "throw %a" Exp.fmt exc - | Unreachable -> pf "unreachable" + let return ~exp ~loc = Return {exp; loc} |> check invariant + let throw ~exc ~loc = Throw {exc; loc} |> check invariant + let unreachable = Unreachable |> check invariant end -module Block = struct - type t = block [@@deriving sexp] - - let mk ~lbl ~params ~cmnd ~term = - assert ( - not (List.contains_dup (Vector.to_list params) ~compare:Var.compare) - ) ; - {dummy_block with lbl; params; cmnd; term} +(** Basic-Blocks *) +module Block = struct + module T = struct type t = block [@@deriving compare, sexp_of] end + include T + include Comparator.Make (T) - (* blocks in a [t] are uniquely identified by [sort_index] *) - let compare x y = Int.compare x.sort_index y.sort_index + let pp = pp_block - let hash {sort_index} = Int.hash sort_index + let invariant blk = + Invariant.invariant [%here] blk [%sexp_of: t] + @@ fun () -> + assert (not (List.contains_dup blk.params ~compare:Var.compare)) - let fmt ff {lbl; params; cmnd; term; sort_index} = - Format.fprintf ff "@[@[<4>%s%a@]: #%i@ @[%a%t%a@]@]" lbl - (fmt_args fmt_param) params sort_index fmt_cmnd cmnd - (fun ff -> if Vector.is_empty cmnd then () else Format.fprintf ff "@ ") - Term.fmt term + let mk ~lbl ~params ~cmnd ~term = + let locals = + let locals_cmnd cmnd vs = + Vector.fold_right cmnd ~init:vs ~f:Inst.union_locals + in + let locals_params params vs = List.fold params ~init:vs ~f:Set.add in + locals_params params (locals_cmnd cmnd Var.Set.empty) + in + { lbl + ; params + ; locals + ; cmnd + ; term + ; parent= dummy_block.parent + ; sort_index= dummy_block.sort_index } + |> check invariant end -module Func = struct - type t = func - - let find functions func = - Vector.find functions ~f:(fun {name} -> Global.equal func name) +(** Functions *) +module Func = struct + type t = func [@@deriving sexp_of] let is_undefined = function | {entry= {cmnd; term= Unreachable}} -> Vector.is_empty cmnd | _ -> false + let pp fs ({name; entry= {params; cmnd; term; sort_index}; cfg} as func) = + let pp_if cnd str fs = if cnd then Format.fprintf fs str in + Format.fprintf fs "@[@[%a@[<2>%a%a@]%t@]" (Option.pp "%a " Typ.pp) + ( match name.typ with + | Pointer {elt= Function {return}} -> return + | _ -> None ) + Global.pp name (pp_args pp_param) params + (fun fs -> + if is_undefined func then Format.fprintf fs " #%i@]" sort_index + else + Format.fprintf fs " { #%i %a@;<1 4>@[%a@ %a@]%t%a@]@ }" + sort_index Loc.pp name.loc pp_cmnd cmnd Term.pp term + (pp_if (not (Vector.is_empty cfg)) "@ @ ") + (Vector.pp "@\n@\n " Block.pp) + cfg ) - let fold_term {entry; cfg} ~init ~f = - let fold_block {term} ~init ~f = f init term in - Vector.fold cfg ~init:(fold_block entry ~init ~f) ~f:(fun z k -> - fold_block k ~init:z ~f ) - + let fold_term {entry= {term}; cfg} ~init ~f = + Vector.fold cfg ~init:(f init term) ~f:(fun a {term} -> f a term) + + let iter_term {entry= {term}; cfg} ~f = + f term ; + Vector.iter cfg ~f:(fun {term} -> f term) + + let invariant func = + Invariant.invariant [%here] func [%sexp_of: t] + @@ fun () -> + let {name= {typ}; cfg} = func in + match typ with + | Pointer _ -> + assert ( + not + (Vector.contains_dup cfg ~compare:(fun b1 b2 -> + String.compare b1.lbl b2.lbl )) ) ; + assert ( + not + (List.contains_dup + (List.concat_map (Vector.to_list cfg) ~f:(fun {params} -> + params )) + ~compare:Var.compare) ) ; + iter_term func ~f:(fun term -> Term.invariant term) + | _ -> assert false + + let find functions name = + Vector.find functions ~f:(fun {name= {var}} -> Var.equal name var) let mk ~name ~entry ~cfg = let func = {name; entry; cfg} in @@ -297,7 +382,7 @@ module Func = struct | Switch {tbl; els} -> Vector.iter tbl ~f:(fun (_, jmp) -> set_dst jmp) ; set_dst els - | ISwitch {tbl} -> Vector.iter tbl ~f:set_dst + | Iswitch {tbl} -> Vector.iter tbl ~f:set_dst | Call {return; throw} -> set_dst return ; Option.iter throw ~f:set_dst @@ -305,155 +390,107 @@ module Func = struct in resolve_parent_and_jumps entry ; Vector.iter cfg ~f:resolve_parent_and_jumps ; - assert ( - not - (List.contains_dup (Vector.to_list cfg) ~compare:(fun b1 b2 -> - String.compare b1.lbl b2.lbl )) ) ; - assert ( - let check_jump ?retn_typ {dst= {params}; args} = - let rev_frml_typs = - Vector.fold params ~init:[] ~f:(fun rev_frml_typs frml -> - Var.typ frml :: rev_frml_typs ) - in - let rev_actl_typs = - Option.to_list retn_typ - @ Vector.fold args ~init:[] ~f:(fun rev_actl_typs actl -> - Exp.typ actl :: rev_actl_typs ) - in - List.iter2_exn rev_frml_typs rev_actl_typs ~f:(fun frml actl -> - assert (Typ.equal frml actl && Typ.is_sized frml) ) - in - fold_term func ~init:() ~f:(fun () -> function - | Switch {tbl; els} -> - Vector.iter tbl ~f:(fun (_, jmp) -> check_jump jmp) ; - check_jump els - | ISwitch {tbl} -> Vector.iter tbl ~f:check_jump - | Call {call= {dst; args= actls}; return; throw; ignore_result} -> ( - match Exp.typ dst with - | Pointer {elt= Function {args= frmls; return= retn_typ}} -> - Vector.iter2_exn frmls actls ~f:(fun frml actl -> - assert (Typ.equal frml (Exp.typ actl)) ) ; - check_jump return - ?retn_typ:(if ignore_result then None else retn_typ) ; - Option.iter throw ~f:(fun throw -> - check_jump throw ~retn_typ:Typ.i8p ) - | _ -> assert false ) - | Return {exp} -> ( - let typ = Option.map exp ~f:Exp.typ in - match Global.typ name with - | Pointer {elt= Function {return}} -> - assert (Option.equal Typ.equal typ return) - | _ -> assert false ) - | Throw {exc} -> assert (Typ.equal Typ.i8p (Exp.typ exc)) - | Unreachable -> () ) ; - true ) ; - func - + func |> check invariant let mk_undefined ~name ~params = let entry = - Block.mk ~lbl:"" ~params ~cmnd:Vector.empty ~term:Term.mkUnreachable + Block.mk ~lbl:"" ~params ~cmnd:Vector.empty ~term:Term.unreachable in let cfg = Vector.empty in mk ~name ~entry ~cfg +end +(** Derived meta-data *) - let fmt ff ({name; entry= {params; cmnd; term; sort_index}; cfg} as func) - = - let fmt_if cnd str ff = if cnd then Format.fprintf ff str in - let[@warning "p"] (Typ.Pointer {elt= Function {return}}) = - Global.typ name - in - Format.fprintf ff "@[@[%a@[<2>%a%a@]%t@]" - (option_fmt "%a " Typ.fmt) - return Global.fmt name (fmt_args fmt_param) params (fun ff -> - if is_undefined func then Format.fprintf ff " #%i@]" sort_index - else - Format.fprintf ff " { #%i @;<1 4>@[%a@ %a@]%t%a@]@ }" - sort_index fmt_cmnd cmnd Term.fmt term - (fmt_if (not (Vector.is_empty cfg)) "@ @ ") - (vector_fmt "@\n@\n " Block.fmt) - cfg ) -end +(* Blocks compared by label, which are unique within a function, used to + compute unique sort_index ids *) +module Block_label = struct + module T = struct + module T0 = struct + type t = block [@@deriving sexp_of] -module Block_id = struct - type t = block [@@deriving sexp] + let compare x y = + [%compare: string * Global.t] (x.lbl, x.parent.name) + (y.lbl, y.parent.name) - (* block labels within a function are unique *) - let compare x y = - [%compare: string * Global.t] (x.lbl, x.parent.name) - (y.lbl, y.parent.name) + let hash b = [%hash: string * Global.t] (b.lbl, b.parent.name) + end + include T0 + include Comparator.Make (T0) + end - let hash b = Hashtbl.hash (b.lbl, b.parent.name) + include T + + let empty_set = Set.empty (module T) end -module BS = Set.Make (Block_id) -module BQ = Hash_queue.Make (Block_id) -module FQ = Hash_queue.Make (Global) +module BlockQ = Hash_queue.Make (Block_label) +module FuncQ = Hash_queue.Make (Var) let set_derived_metadata functions = let compute_roots functions = - let roots = FQ.create () in - Array.iter functions ~f:(fun func -> FQ.enqueue_exn roots func.name func) ; + let roots = FuncQ.create () in + Array.iter functions ~f:(fun func -> + FuncQ.enqueue_exn roots func.name.var func ) ; Array.iter functions ~f:(fun func -> Func.fold_term func ~init:() ~f:(fun () -> function | Call {call= {dst}} -> ( - match Global.of_exp dst with - | Some callee -> FQ.remove roots callee |> ignore + match Var.of_exp dst with + | Some callee -> + FuncQ.remove roots callee |> (ignore : [> ] -> unit) | None -> () ) | _ -> () ) ) ; roots in let topsort functions roots = - let tips_to_roots = BQ.create () in + let tips_to_roots = BlockQ.create () in let rec visit ancestors func src = - if not (BQ.mem tips_to_roots src) then ( - let ancestors = BS.add ancestors src in + if BlockQ.mem tips_to_roots src then () + else + let ancestors = Set.add ancestors src in let jump jmp = - if BS.mem ancestors jmp.dst then jmp.retreating <- true + if Set.mem ancestors jmp.dst then jmp.retreating <- true else visit ancestors func jmp.dst in ( match src.term with | Switch {tbl; els} -> Vector.iter tbl ~f:(fun (_, jmp) -> jump jmp) ; jump els - | ISwitch {tbl} -> Vector.iter tbl ~f:jump + | Iswitch {tbl} -> Vector.iter tbl ~f:jump | Call {call= {dst} as call; return; throw} -> - ( match Global.of_exp dst with - | Some name -> ( - match Func.find (Vector.of_array functions) name with - | Some func -> - if BS.mem ancestors func.entry then - call.retreating <- true - else visit ancestors func func.entry - | None -> - fail "Call to unknown function: %a" Global.fmt name () ) + ( match + Var.of_exp dst >>= Func.find (Vector.of_array functions) + with + | Some func -> + if Set.mem ancestors func.entry then call.retreating <- true + else visit ancestors func func.entry | None -> (* conservatively assume all virtual calls are recursive *) call.retreating <- true ) ; jump return ; Option.iter ~f:jump throw | Return _ | Throw _ | Unreachable -> () ) ; - BQ.enqueue_exn tips_to_roots src () ) + BlockQ.enqueue_exn tips_to_roots src () in - FQ.iter roots ~f:(fun root -> visit BS.empty root root.entry) ; + FuncQ.iter roots ~f:(fun root -> + visit Block_label.empty_set root root.entry ) ; tips_to_roots in let set_sort_indices tips_to_roots = - let index = ref (BQ.length tips_to_roots) in - BQ.iteri tips_to_roots ~f:(fun ~key:block ~data:_ -> + let index = ref (BlockQ.length tips_to_roots) in + BlockQ.iteri tips_to_roots ~f:(fun ~key:block ~data:_ -> block.sort_index <- !index ; - decr index ) + index := !index - 1 ) in let sort_cfgs functions = Array.iter functions ~f:(fun {cfg} -> Array.sort - ~cmp:(fun x y -> Int.compare x.sort_index y.sort_index) + ~compare:(fun x y -> Int.compare x.sort_index y.sort_index) (Vector.to_array cfg) ) in let sort_functions functions = Array.sort - ~cmp:(fun x y -> Int.compare x.entry.sort_index y.entry.sort_index) + ~compare:(fun x y -> Int.compare x.entry.sort_index y.entry.sort_index) functions in let functions = Array.of_list functions in @@ -464,33 +501,27 @@ let set_derived_metadata functions = sort_functions functions ; Vector.of_array functions - -let mk ~typ_defns ~globals ~functions = +let invariant pgm = + Invariant.invariant [%here] pgm [%sexp_of: t] + @@ fun () -> + let {globals; functions} = pgm in assert ( - (not - (List.contains_dup typ_defns ~compare:(fun (s : Typ.t) (t : Typ.t) -> - match (s, t) with - | ( (Struct {name= n1} | Opaque {name= n1}) - , (Struct {name= n2} | Opaque {name= n2}) ) -> - String.compare n1 n2 - | _ -> Typ.compare s t ))) - && (not - (List.contains_dup globals ~compare:(fun g1 g2 -> - String.compare (Global.name g1) (Global.name g2) ))) - && not - (List.contains_dup functions ~compare:(fun f1 f2 -> - String.compare (Global.name f1.name) (Global.name f2.name) )) - ) ; - { typ_defns - ; globals= Vector.of_list_rev globals - ; functions= set_derived_metadata functions } + not + (Vector.contains_dup globals ~compare:(fun g1 g2 -> + Var.compare g1.Global.var g2.Global.var )) ) ; + assert ( + not + (Vector.contains_dup functions ~compare:(fun f1 f2 -> + Var.compare f1.name.var f2.name.var )) ) +let mk ~globals ~functions = + { globals= Vector.of_list_rev globals + ; functions= set_derived_metadata functions } + |> check invariant -let fmt ff {typ_defns; globals; functions} = - Format.fprintf ff "@[@[%a@]@ @ @ @[%a@]@ @ @ @[%a@]@]" - (list_fmt "@\n@\n" Typ.fmt_defn) - typ_defns - (vector_fmt "@\n@\n" Global.fmt_defn) +let pp fs {globals; functions} = + Format.fprintf fs "@[@[%a@]@ @ @ @[%a@]@]" + (Vector.pp "@\n@\n" Global.pp_defn) globals - (vector_fmt "@\n@\n" Func.fmt) + (Vector.pp "@\n@\n" Func.pp) functions diff --git a/sledge/src/llair/llair.mli b/sledge/src/llair/llair.mli index 2d72ae432..0eead2955 100644 --- a/sledge/src/llair/llair.mli +++ b/sledge/src/llair/llair.mli @@ -5,10 +5,10 @@ * LICENSE file in the root directory of this source tree. *) -(** Programs / translation units +(** Translation units - LLAIR (Low-Level Analysis Internal Representation) is an IR optimized - for static analysis using a low-level model of memory. Compared to a + LLAIR (Low-Level Analysis Internal Representation) is an IR tailored for + static analysis using a low-level model of memory. Compared to a compiler IR such as LLVM, an analyzer does not need to perform register allocation, instruction selection, code generation, etc. or even much code transformation, so the constraints on the IR are very different. @@ -17,14 +17,12 @@ instead of using ϕ-nodes. An analyzer will need good support for parameter passing anyhow, and ϕ-nodes make it hard to express program properties as predicates on states, since some execution history is - needed to evaluate ϕ instructions. SSA form is beneficial for analysis - as it means that all "modified variables" side-conditions on program - logic rules trivially hold. An alternative view is that the scope of - variables [reg] assigned in instructions such as [Load] is the successor - block as well as all blocks the instruction dominates in the + needed to evaluate ϕ instructions. An alternative view is that the + scope of variables [reg] assigned in instructions such as [Load] is the + successor block as well as all blocks the instruction dominates in the control-flow graph. This language is first-order, and a term structure for the code constituting the scope of variables is not needed, so SSA - and not CPS suffices. + rather than full CPS suffices. Additionally, the focus on memory analysis leads to a design where the arithmetic and logic operations are not "instructions" but instead are @@ -32,10 +30,11 @@ (** Instructions for memory manipulation or other non-control effects. *) type inst = private - | Load of {reg: Var.t; ptr: Exp.t; loc: Loc.t} - (** Read the contents of memory at address [ptr] into [reg]. *) - | Store of {ptr: Exp.t; exp: Exp.t; loc: Loc.t} - (** Write [exp] into memory at address [ptr]. *) + | Load of {reg: Var.t; ptr: Exp.t; len: Exp.t; loc: Loc.t} + (** Read a [len]-byte value from the contents of memory at address + [ptr] into [reg]. *) + | Store of {ptr: Exp.t; exp: Exp.t; len: Exp.t; loc: Loc.t} + (** Write [len]-byte value [exp] into memory at address [ptr]. *) | Memcpy of {dst: Exp.t; src: Exp.t; len: Exp.t; loc: Loc.t} (** Copy [len] bytes starting from address [src] to [dst], undefined if ranges overlap. *) @@ -43,14 +42,17 @@ type inst = private (** Copy [len] bytes starting from address [src] to [dst]. *) | Memset of {dst: Exp.t; byt: Exp.t; len: Exp.t; loc: Loc.t} (** Store byte [byt] into [len] memory addresses starting from [dst]. *) - | Alloc of {reg: Var.t; num: Exp.t; loc: Loc.t} + | Alloc of {reg: Var.t; num: Exp.t; len: Exp.t; loc: Loc.t} (** Allocate a block of memory large enough to store [num] elements of - type [t] where [reg : t*] and bind [reg] to the first address. *) + [len] bytes each and bind [reg] to the first address. *) | Free of {ptr: Exp.t; loc: Loc.t} (** Deallocate the previously allocated block at address [ptr]. *) | Nondet of {reg: Var.t option; msg: string; loc: Loc.t} - (** Bind [reg] to an arbitrary value of its type, representing - non-deterministic approximation of behavior described by [msg]. *) + (** Bind [reg] to an arbitrary value, representing non-deterministic + approximation of behavior described by [msg]. *) + | Strlen of {reg: Var.t; ptr: Exp.t; loc: Loc.t} + (** Bind [reg] to the length of the null-terminated string in memory + starting from [ptr]. *) (** A (straight-line) command is a sequence of instructions. *) type cmnd = inst vector @@ -61,7 +63,7 @@ type label = string (** A jump with arguments. *) type 'a control_transfer = { mutable dst: 'a - ; args: Exp.t vector + ; args: Exp.t list (** Stack of arguments, first-arg-last *) ; mutable retreating: bool (** Holds if [dst] is an ancestor in a depth-first traversal. *) } @@ -73,10 +75,11 @@ and term = private | Switch of {key: Exp.t; tbl: (Z.t * jump) vector; els: jump; loc: Loc.t} (** Invoke the [jump] in [tbl] associated with the integer [z] which is equal to [key], if any, otherwise invoke [els]. *) - | ISwitch of {ptr: Exp.t; tbl: jump vector; loc: Loc.t} + | Iswitch of {ptr: Exp.t; tbl: jump vector; loc: Loc.t} (** Invoke the [jump] in [tbl] whose [dst] is equal to [ptr]. *) | Call of - { call: Exp.t control_transfer (** [Global] for non-virtual call. *) + { call: Exp.t control_transfer (** A [global] for non-virtual call. *) + ; typ: Typ.t (** Type of the callee. *) ; return: jump (** Return destination or trampoline. *) ; throw: jump option (** Handler destination or trampoline. *) ; ignore_result: bool (** Drop return value when invoking return. *) @@ -93,7 +96,8 @@ and term = private (** A block is a destination of a jump with arguments, contains code. *) and block = private { lbl: label - ; params: Var.t vector + ; params: Var.t list (** Formal parameters, first-param-last stack *) + ; locals: Var.Set.t (** Local variables, including [params]. *) ; cmnd: cmnd ; term: term ; mutable parent: func @@ -104,104 +108,93 @@ and block = private and cfg (** A function is a control-flow graph with distinguished entry block, whose - arguments are the function arguments. *) + parameters are the function parameters. *) and func = private {name: Global.t; entry: block; cfg: cfg} type t = private - { typ_defns: Typ.t list (** Type definitions. *) - ; globals: Global.t vector (** Global variable definitions. *) + { globals: Global.t vector (** Global variable definitions. *) ; functions: func vector (** (Global) function definitions. *) } -val mk : - typ_defns:Typ.t list -> globals:Global.t list -> functions:func list -> t +val pp : t pp -val fmt : t fmt +include Invariant.S with type t := t + +val mk : globals:Global.t list -> functions:func list -> t module Inst : sig type t = inst - val mkLoad : reg:Var.t -> ptr:Exp.t -> loc:Loc.t -> inst - - val mkStore : ptr:Exp.t -> exp:Exp.t -> loc:Loc.t -> inst - - val mkMemcpy : dst:Exp.t -> src:Exp.t -> len:Exp.t -> loc:Loc.t -> inst - - val mkMemmov : dst:Exp.t -> src:Exp.t -> len:Exp.t -> loc:Loc.t -> inst - - val mkMemset : dst:Exp.t -> byt:Exp.t -> len:Exp.t -> loc:Loc.t -> inst - - val mkAlloc : reg:Var.t -> num:Exp.t -> loc:Loc.t -> inst - - val mkFree : ptr:Exp.t -> loc:Loc.t -> inst - - val mkNondet : reg:Var.t option -> msg:string -> loc:Loc.t -> inst - - val fmt : t fmt + val pp : t pp + val load : reg:Var.t -> ptr:Exp.t -> len:Exp.t -> loc:Loc.t -> inst + val store : ptr:Exp.t -> exp:Exp.t -> len:Exp.t -> loc:Loc.t -> inst + val memcpy : dst:Exp.t -> src:Exp.t -> len:Exp.t -> loc:Loc.t -> inst + val memmov : dst:Exp.t -> src:Exp.t -> len:Exp.t -> loc:Loc.t -> inst + val memset : dst:Exp.t -> byt:Exp.t -> len:Exp.t -> loc:Loc.t -> inst + val alloc : reg:Var.t -> num:Exp.t -> len:Exp.t -> loc:Loc.t -> inst + val free : ptr:Exp.t -> loc:Loc.t -> inst + val nondet : reg:Var.t option -> msg:string -> loc:Loc.t -> inst + val strlen : reg:Var.t -> ptr:Exp.t -> loc:Loc.t -> inst + val loc : inst -> Loc.t + val locals : inst -> Var.Set.t end module Jump : sig - type t = jump - - val mk : string -> Exp.t vector -> jump + type t = jump [@@deriving compare, sexp_of] - val fmt : jump fmt + val pp : jump pp + val mk : string -> Exp.t list -> jump end module Term : sig type t = term - val mkSwitch : + val pp : t pp + + val switch : key:Exp.t -> tbl:(Z.t * jump) vector -> els:jump -> loc:Loc.t -> term - val mkISwitch : ptr:Exp.t -> tbl:jump vector -> loc:Loc.t -> term + val iswitch : ptr:Exp.t -> tbl:jump vector -> loc:Loc.t -> term - val mkCall : + val call : func:Exp.t - -> args:Exp.t vector + -> typ:Typ.t + -> args:Exp.t list -> return:jump -> throw:jump option -> ignore_result:bool -> loc:Loc.t -> term - val mkReturn : exp:Exp.t option -> loc:Loc.t -> term - - val mkThrow : exc:Exp.t -> loc:Loc.t -> term - - val mkUnreachable : term - - val fmt : t fmt + val return : exp:Exp.t option -> loc:Loc.t -> term + val throw : exc:Exp.t -> loc:Loc.t -> term + val unreachable : term end module Block : sig - type t = block - - val mk : - lbl:label -> params:Var.t vector -> cmnd:cmnd -> term:term -> block + type t = block [@@deriving compare, sexp_of] - val fmt : t fmt + include Comparator.S with type t := t - val compare : t -> t -> int + val pp : t pp - val hash : t -> int + include Invariant.S with type t := t - val sexp_of_t : t -> Sexp.t - - val t_of_sexp : Sexp.t -> t + val mk : lbl:label -> params:Var.t list -> cmnd:cmnd -> term:term -> block end module Func : sig type t = func - val mk : name:Global.t -> entry:block -> cfg:block vector -> func + val pp : t pp - val mk_undefined : name:Global.t -> params:Var.t vector -> t + include Invariant.S with type t := t - val find : func vector -> Global.t -> func option + val mk : name:Global.t -> entry:block -> cfg:block vector -> func + val mk_undefined : name:Global.t -> params:Var.t list -> t + + val find : func vector -> Var.t -> func option (** Look up a function of the given name in the given functions. *) val is_undefined : func -> bool (** Holds of functions that are declared but not defined. *) - - val fmt : t fmt end