@ -9,6 +9,8 @@ module F = Format
module L = Logging
open Result . Monad_infix
(* {2 Abstract domain description } *)
(* * An abstract address in memory. *)
module AbstractAddress : sig
type t = private int [ @@ deriving compare ]
@ -33,44 +35,63 @@ end = struct
let pp = F . pp_print_int
end
module A bstra ctAddressDomain : Abstra ctDomain. S with type astate = AbstractAddr ess. t = struct
type astate = AbstractAddress . t
module A ccess = struct
type t = AccessPath . access [ @@ deriving compare ]
let ( < = ) ~ lhs ~ rhs = AbstractAddress . equal lhs rhs
let pp = AccessPath . pp_access
end
let join l1 l2 =
if AbstractAddress . equal l1 l2 then l1 else (* TODO: scary *) AbstractAddress . mk_fresh ()
module Memory = struct
module Edges = PrettyPrintable . MakePPMap ( Access )
module Graph = PrettyPrintable . MakePPMap ( AbstractAddress )
(* {3 Monomorphic {!PPMap} interface as needed } *)
let widen ~ prev ~ next ~ num_iters : _ = join prev nex t
type t = AbstractAddress . t Edges . t Graph . t
let pp = AbstractAddress . pp
end
let empty = Graph . empty
module Access = struct
type t = AccessPath . access [ @@ deriving compare ]
let find_opt = Graph . find_opt
let pp = AccessPath . pp_access
end
let for_all = Graph . for_all
let fold = Graph . fold
module MemoryEdges = AbstractDomain . InvertedMap ( Access ) ( AbstractAddressDomain )
let pp = Graph . pp ~ pp_value : ( Edges . pp ~ pp_value : AbstractAddress . pp )
module MemoryDomain = struct
include AbstractDomain . InvertedMap ( AbstractAddress ) ( MemoryEdges )
(* {3 Helper functions to traverse the two maps at once } *)
let add_edge addr_src access addr_end memory =
let edges =
match find_opt addr_src memory with Some edges -> edges | None -> Memory Edges. empty
match Graph . find_opt addr_src memory with Some edges -> edges | None -> Edges. empty
in
add addr_src ( Memory Edges. add access addr_end edges ) memory
Graph . add addr_src ( Edges. add access addr_end edges ) memory
let find_edge_opt addr access memory =
let open Option . Monad_infix in
find_opt addr memory > > = Memory Edges. find_opt access
Graph . find_opt addr memory > > = Edges. find_opt access
end
module AliasingDomain = AbstractDomain . InvertedMap ( Var ) ( AbstractAddressDomain )
(* * to be used as maps values *)
module AbstractAddressDomain_JoinIsMin : AbstractDomain . S with type astate = AbstractAddress . t =
struct
type astate = AbstractAddress . t
let ( < = ) ~ lhs ~ rhs = AbstractAddress . equal lhs rhs
let join l1 l2 = min l1 l2
let widen ~ prev ~ next ~ num_iters : _ = join prev next
let pp = AbstractAddress . pp
end
(* It so happens that the join we want on stacks is this followed by normalization wrt the
unification found between abstract locations , so it's convenient to define stacks as elements of
this domain . Do not use the domain operations outside of { ! Domain } though as they are mostly
meaningless on their own . * )
module AliasingDomain = AbstractDomain . Map ( Var ) ( AbstractAddressDomain_JoinIsMin )
type actor = { access_expr : AccessExpression . t ; location : Location . t }
@ -78,6 +99,7 @@ let pp_actor f {access_expr; location} =
F . fprintf f " %a@%a " AccessExpression . pp access_expr Location . pp location
(* * Locations known to be invalid for a reason described by an {!actor}. *)
module type InvalidAddressesDomain = sig
include AbstractDomain . S
@ -86,6 +108,10 @@ module type InvalidAddressesDomain = sig
val add : AbstractAddress . t -> actor -> astate -> astate
val get_invalidation : AbstractAddress . t -> astate -> actor option
(* * return [Some actor] if the location was invalid by [actor], [None] if it is valid *)
val map : ( AbstractAddress . t -> AbstractAddress . t ) -> astate -> astate
(* * translate invalid addresses according to the mapping *)
end
module InvalidAddressesDomain : InvalidAddressesDomain = struct
@ -94,6 +120,7 @@ module InvalidAddressesDomain : InvalidAddressesDomain = struct
let join actor _ = actor
(* actors do not participate in the comparison of sets of invalid locations *)
let ( < = ) ~ lhs : _ ~ rhs : _ = true
let widen ~ prev ~ next : _ ~ num_iters : _ = prev
@ -104,60 +131,221 @@ module InvalidAddressesDomain : InvalidAddressesDomain = struct
include AbstractDomain . Map ( AbstractAddress ) ( InvalidationReason )
let get_invalidation address invalids = find_opt address invalids
let map f invalids = fold ( fun key actor invalids -> add ( f key ) actor invalids ) invalids empty
end
type t =
{ heap : MemoryDomain . astate ; stack : AliasingDomain . astate ; invalids : InvalidAddressesDomain . astate }
type t = { heap : Memory . t ; stack : AliasingDomain . astate ; invalids : InvalidAddressesDomain . astate }
let initial =
{ heap = Memory Domain . empty ; stack = AliasingDomain . empty ; invalids = InvalidAddressesDomain . empty }
{ heap = Memory . empty ; stack = AliasingDomain . empty ; invalids = InvalidAddressesDomain . empty }
module Domain : AbstractDomain . S with type astate = t = struct
type astate = t
(* This is very naive and should be improved. We can compare two memory graphs by trying to
establish that the graphs reachable from each root are the same up to some additional
unfolding , i . e . are not incompatible when we prune everything that is not reachable from the
roots . Here the roots are the known aliases in the stack since that's the only way to address
into the heap . * )
let ( < = ) ~ lhs ~ rhs =
phys_equal lhs rhs
| | InvalidAddressesDomain . ( < = ) ~ lhs : lhs . invalids ~ rhs : rhs . invalids
let piecewise_lessthan lhs rhs =
InvalidAddressesDomain . ( < = ) ~ lhs : lhs . invalids ~ rhs : rhs . invalids
&& AliasingDomain . ( < = ) ~ lhs : lhs . stack ~ rhs : rhs . stack
&& MemoryDomain . ( < = ) ~ lhs : lhs . heap ~ rhs : rhs . heap
&& Memory . for_all
( fun addr_src edges ->
Memory . Edges . for_all
( fun edge addr_dst ->
Memory . find_edge_opt addr_src edge rhs . heap
| > Option . exists ~ f : ( fun addr -> AbstractAddress . equal addr addr_dst ) )
edges )
lhs . heap
module JoinState = struct
module AddressUnionSet = struct
module Set = PrettyPrintable . MakePPSet ( AbstractAddress )
type elt = AbstractAddress . t [ @@ deriving compare ]
type t = Set . t ref
let create x = ref ( Set . singleton x )
let compare_size _ _ = 0
let merge ~ from ~ to_ = to_ := Set . union ! from ! to_
(* Like ( <= ) this is probably too naive *)
let pp f x = Set . pp f ! x
end
module AddressUF = ImperativeUnionFind . Make ( AddressUnionSet )
(* * just to get the correct type coercion *)
let to_canonical_address subst addr = ( AddressUF . find subst addr :> AbstractAddress . t )
type nonrec t = { subst : AddressUF . t ; astate : t }
(* * adds [ ( src_addr, access, dst_addr ) ] to [union_heap] and record potential new equality that
results from it in [ subst ] * )
let union_one_edge subst src_addr access dst_addr union_heap =
let src_addr = to_canonical_address subst src_addr in
let dst_addr = to_canonical_address subst dst_addr in
match Memory . find_edge_opt src_addr access union_heap with
| None ->
( Memory . add_edge src_addr access dst_addr union_heap , ` No_new_equality )
| Some dst_addr' ->
(* new equality [dst_addr = dst_addr'] found *)
ignore ( AddressUF . union subst dst_addr dst_addr' ) ;
( union_heap , ` New_equality )
module Addresses = Caml . Set . Make ( AbstractAddress )
let rec visit_address subst visited heap addr union_heap =
if Addresses . mem addr visited then ( visited , union_heap )
else
let visited = Addresses . add addr visited in
let visit_edge access addr_dst ( visited , union_heap ) =
union_one_edge subst addr access addr_dst union_heap
| > fst
| > visit_address subst visited heap addr_dst
in
Memory . find_opt addr heap
| > Option . fold ~ init : ( visited , union_heap ) ~ f : ( fun visited_union_heap edges ->
Memory . Edges . fold visit_edge edges visited_union_heap )
let visit_stack subst heap stack union_heap =
(* start graph exploration *)
let visited = Addresses . empty in
let _ , union_heap =
AliasingDomain . fold
( fun _ var addr ( visited , union_heap ) -> visit_address subst visited heap addr union_heap )
stack ( visited , union_heap )
in
union_heap
let populate_subst_from_stacks subst stack1 stack2 =
ignore
( (* Use [Caml.Map.merge] to detect the variables present in both stacks. Build an empty
result map since we don't use the result . * )
AliasingDomain . merge
( fun _ var addr1_opt addr2_opt ->
Option . both addr1_opt addr2_opt
| > Option . iter ~ f : ( fun ( addr1 , addr2 ) ->
(* stack1 says [_var = addr1] and stack2 says [_var = addr2]: unify the
addresses since they are equal to the same variable * )
ignore ( AddressUF . union subst addr1 addr2 ) ) ;
(* empty result map *)
None )
stack1 stack2 )
let from_astate_union { heap = heap1 ; stack = stack1 ; invalids = invalids1 }
{ heap = heap2 ; stack = stack2 ; invalids = invalids2 } =
let subst = AddressUF . create () in
(* gather equalities from the stacks *)
populate_subst_from_stacks subst stack1 stack2 ;
(* union the heaps, take this opportunity to do garbage collection of unreachable values by
only copying the addresses reachable from the variables in the stacks * )
let heap = visit_stack subst heap1 stack1 Memory . empty | > visit_stack subst heap2 stack2 in
(* This keeps all the variables and picks one representative address for each variable in
common thanks to [ AbstractAddressDomain_JoinIsMin ] * )
let stack = AliasingDomain . join stack1 stack2 in
(* basically union *)
let invalids = InvalidAddressesDomain . join invalids1 invalids2 in
{ subst ; astate = { heap ; stack ; invalids } }
let rec normalize state =
let one_addr subst addr edges heap_has_converged =
Memory . Edges . fold
( fun access addr_dest ( heap , has_converged ) ->
match union_one_edge subst addr access addr_dest heap with
| heap , ` No_new_equality ->
( heap , has_converged )
| heap , ` New_equality ->
( heap , false ) )
edges heap_has_converged
in
let heap , has_converged =
Memory . fold ( one_addr state . subst ) state . astate . heap ( Memory . empty , true )
in
if has_converged then (
L . d_strln " Join unified addresses: " ;
L . d_increase_indent 1 ;
Container . iter state . subst ~ fold : AddressUF . fold_sets
~ f : ( fun ( ( repr : AddressUF . Repr . t ) , set ) ->
L . d_strln
( F . asprintf " %a=%a " AbstractAddress . pp
( repr :> AbstractAddress . t )
AddressUnionSet . pp set ) ) ;
L . d_decrease_indent 1 ;
let stack = AliasingDomain . map ( to_canonical_address state . subst ) state . astate . stack in
let invalids =
InvalidAddressesDomain . map ( to_canonical_address state . subst ) state . astate . invalids
in
{ heap ; stack ; invalids } )
else normalize { state with astate = { state . astate with heap } }
end
(* * Given
- stacks S1 , S2 : Var -> Address ,
- graphs G1 , G2 : Address -> Access -> Address ,
- and invalid sets I1 , I2 : 2 ^ Address
( all finite ) , the join of 2 abstract states ( S1 , G1 , I1 ) and ( S2 , G2 , I2 ) is ( S , G , A ) where
there exists a substitution σ from addresses to addresses such that the following holds . Given
addresses l , l' , access path a , and graph G , we write l – a – > l' ∈ G if there is a path labelled
by a from l to l' in G ( in particular , if a is empty then l – a – > l' ∈ G for all l , l' ) .
∀ i ∈ { 1 , 2 } , ∀ l , x , a , ∀ l' ∈ Ii , ( ( x , l ) ∈ Si ∧ l – a – > l' ∈ Gi )
= > ( x , σ ( l ) ) ∈ S ∧ σ ( l ) – a – > σ ( l' ) ∈ G ∧ σ ( l' ) ∈ I
For now the implementation gives back a larger heap than necessary , where all the previously
reachable location are still reachable ( up to the substitution ) instead of only the locations
leading to invalid ones .
* )
let join astate1 astate2 =
if phys_equal astate1 astate2 then astate1
else
{ heap = MemoryDomain . join astate1 . heap astate2 . heap
; stack = AliasingDomain . join astate1 . stack astate2 . stack
; invalids = InvalidAddressesDomain . join astate1 . invalids astate2 . invalids }
(* high-level idea: maintain some union-find data structure to identify locations in one heap
with locations in the other heap . Build the initial join state as follows :
- equate all locations that correspond to identical variables in both stacks , eg joining
stacks { x = 1 } and { x = 2 } adds " 1=2 " to the unification .
- add all addresses reachable from stack variables to the join state heap
This gives us an abstract state that is the union of both abstract states , but more states
can still be made equal . For instance , if 1 points to 3 in the first heap and 2 points to 4
in the second heap and we deduced " 1 = 2 " from the stacks already ( as in the example just
above ) then we can deduce " 3 = 4 " . Proceed in this fashion until no more equalities are
discovered , and return the abstract state where a canonical representative has been chosen
consistently for each equivalence class ( this is what the union - find data structure gives
us ) . * )
JoinState . from_astate_union astate1 astate2 | > JoinState . normalize
(* TODO: this could be [piecewise_lessthan lhs' ( join lhs rhs ) ] where [lhs'] is [lhs] renamed
according to the unification discovered while joining [ lhs ] and [ rhs ] . * )
let ( < = ) ~ lhs ~ rhs = phys_equal lhs rhs | | piecewise_lessthan lhs rhs
let max_widening = 5
let widen ~ prev ~ next ~ num_iters =
(* probably pretty obvious but that widening is just bad... We need to add a wildcard [ * ] to
access path elements in our graph representing repeated paths if we hope to converge ( like
{ ! AccessPath . Abs . Abstracted } , we actually need something very similar ) . * )
(* widening is underapproximation for now... TODO *)
if num_iters > max_widening then prev
else if phys_equal prev next then prev
else
{ heap = MemoryDomain . widen ~ num_iters ~ prev : prev . heap ~ next : next . heap
; stack = AliasingDomain . widen ~ num_iters ~ prev : prev . stack ~ next : next . stack
; invalids = InvalidAddressesDomain . widen ~ num_iters ~ prev : prev . invalids ~ next : next . invalids
}
else join prev next
let pp fmt { heap ; stack ; invalids } =
F . fprintf fmt " {@[<v1> heap=@[<hv>%a@];@;stack=@[<hv>%a@];@;invalids=@[<hv>%a@];@]} "
MemoryDomain . pp heap AliasingDomain . pp stack InvalidAddressesDomain . pp invalids
F . fprintf fmt " {@[<v1> heap=@[<hv>%a@];@;stack=@[<hv>%a@];@;invalids=@[<hv>%a@];@]} " Memory . pp
heap AliasingDomain . pp stack InvalidAddressesDomain . pp invalids
end
include Domain
(* {2 Access operations on the domain} *)
module Diagnostic = struct
type t =
@ -212,15 +400,15 @@ let rec walk actor ~overwrite_last addr path astate =
| [ a ] , Some new_addr ->
check_addr_access actor addr astate
> > | fun astate ->
let heap = Memory Domain . add_edge addr a new_addr astate . heap in
let heap = Memory . add_edge addr a new_addr astate . heap in
( { astate with heap } , new_addr )
| a :: path , _ -> (
check_addr_access actor addr astate
> > = fun astate ->
match Memory Domain . find_edge_opt addr a astate . heap with
match Memory . find_edge_opt addr a astate . heap with
| None ->
let addr' = AbstractAddress . mk_fresh () in
let heap = Memory Domain . add_edge addr a addr' astate . heap in
let heap = Memory . add_edge addr a addr' astate . heap in
let astate = { astate with heap } in
walk actor ~ overwrite_last addr' path astate
| Some addr' ->
@ -292,3 +480,6 @@ let invalidate location access_expr astate =
> > = fun ( astate , addr ) ->
let actor = { access_expr ; location } in
check_addr_access actor addr astate > > | mark_invalid actor addr
include Domain