diff --git a/infer/src/IR/Exp.ml b/infer/src/IR/Exp.ml index 30e819266..53290a116 100644 --- a/infer/src/IR/Exp.ml +++ b/infer/src/IR/Exp.ml @@ -158,6 +158,8 @@ let minus_one = int IntLit.minus_one (** Create integer constant corresponding to the boolean value *) let bool b = if b then one else zero +let and_2ary e1 e2 = BinOp (LAnd, e1, e2) + (** Create expression [e1 == e2] *) let eq e1 e2 = BinOp (Eq, e1, e2) @@ -170,6 +172,12 @@ let le e1 e2 = BinOp (Le, e1, e2) (** Create expression [e1 < e2] *) let lt e1 e2 = BinOp (Lt, e1, e2) +let nary_of_2ary op_2ary zero es = + match es with [] -> zero | e :: es -> List.fold ~init:e ~f:op_2ary es + + +let and_nary = nary_of_2ary and_2ary one + let fold_captured ~f exp acc = let rec fold_captured_ exp captured_acc = match exp with diff --git a/infer/src/IR/Exp.mli b/infer/src/IR/Exp.mli index 5e5992345..6ae087d07 100644 --- a/infer/src/IR/Exp.mli +++ b/infer/src/IR/Exp.mli @@ -6,7 +6,10 @@ * LICENSE file in the root directory of this source tree. *) -(** The Smallfoot Intermediate Language: Expressions *) +(** The Smallfoot Intermediate Language: Expressions + +NOTE: For doing substitutionson expressions, there are some functions in [Sil]. +*) open! IStd module F = Format @@ -113,6 +116,9 @@ val le : t -> t -> t val lt : t -> t -> t (** Create expression [e1 < e2] *) +val and_nary : t list -> t +(** Create expression [e1 && e2 && e3 && ...] *) + val free_vars : t -> Ident.t Sequence.t (** all the idents appearing in the expression *) diff --git a/infer/src/IR/Instrs.ml b/infer/src/IR/Instrs.ml index 7c7002389..5b37fa4e5 100644 --- a/infer/src/IR/Instrs.ml +++ b/infer/src/IR/Instrs.ml @@ -99,6 +99,7 @@ let concat_map_changed ~equal (NotReversed instrs as t) ~f = then t else NotReversed instrs' + let reverse_order (NotReversed instrs) = Reversed (RevArray.of_rev_array instrs) (* Functions on both reversed and non-reversed arrays *) diff --git a/infer/src/IR/Sil.mli b/infer/src/IR/Sil.mli index d0ca41eea..7df2c15c1 100644 --- a/infer/src/IR/Sil.mli +++ b/infer/src/IR/Sil.mli @@ -480,6 +480,8 @@ val hpred_sub : subst -> hpred -> hpred (** {2 Functions for replacing occurrences of expressions.} *) +val exp_replace_exp : (Exp.t * Exp.t) list -> Exp.t -> Exp.t + val atom_replace_exp : (Exp.t * Exp.t) list -> atom -> atom val hpred_replace_exp : (Exp.t * Exp.t) list -> hpred -> hpred diff --git a/infer/src/backend/InferAnalyze.ml b/infer/src/backend/InferAnalyze.ml index 6b9e42cbb..ee62f3b38 100644 --- a/infer/src/backend/InferAnalyze.ml +++ b/infer/src/backend/InferAnalyze.ml @@ -19,9 +19,12 @@ let clear_caches () = let analyze_target : TaskScheduler.target Tasks.doer = let analyze_source_file exe_env source_file = + DB.Results_dir.init Topl.sourcefile ; DB.Results_dir.init source_file ; L.task_progress SourceFile.pp source_file ~f:(fun () -> Callbacks.analyze_file exe_env source_file ; + if Topl.is_active () && Config.debug_mode then + Dotty.print_icfg_dotty Topl.sourcefile Topl.cfg ; if Config.write_html then Printer.write_all_html_files source_file ) in (* In call-graph scheduling, log progress every [per_procedure_logging_granularity] procedures. diff --git a/infer/src/backend/dotty.ml b/infer/src/backend/dotty.ml index a63ca0b10..158b4129b 100644 --- a/infer/src/backend/dotty.ml +++ b/infer/src/backend/dotty.ml @@ -1156,15 +1156,8 @@ let pp_cfgnode pdesc fmt (n : Procdesc.Node.t) = List.iter ~f:(fun n' -> print_edge n n' true) (Procdesc.Node.get_exn n) -(* * print control flow graph (in dot form) for fundec to channel let *) -(* print_cfg_channel (chan : out_channel) (fd : fundec) = let pnode (s: *) -(* stmt) = fprintf chan "%a@\n" d_cfgnode s in forallStmts pnode fd * *) -(* Print control flow graph (in dot form) for fundec to file let *) -(* print_cfg_filename (filename : string) (fd : fundec) = let chan = *) -(* open_out filename in begin print_cfg_channel chan fd; close_out chan; *) -(* end *) -(* Print the extra information related to the inteprocedural aspect, ie., *) -(* special node, and call / return edges *) +(** Print the flowgraphs in [cfg]. Nodes are printed only if (a) their location is [source] or +(b) [Config.dotty_cfg_libs] is set. This triggers preanalysis. *) let print_icfg source fmt cfg = let print_node pdesc node = let loc = Procdesc.Node.get_loc node in @@ -1185,9 +1178,9 @@ let write_icfg_dotty_to_file source cfg fname = let chan = Out_channel.create fname in let fmt = Format.formatter_of_out_channel chan in (* avoid phabricator thinking this file was generated by substituting substring with %s *) - F.fprintf fmt "/* %@%s */@\ndigraph cfg {@\n" "generated" ; + F.fprintf fmt "@[/* %@%s */@\ndigraph cfg {@\n" "generated" ; print_icfg source fmt cfg ; - F.fprintf fmt "}@\n" ; + F.fprintf fmt "}@]@." ; Out_channel.close chan diff --git a/infer/src/topl/Topl.ml b/infer/src/topl/Topl.ml index fcd06064d..9d432c009 100644 --- a/infer/src/topl/Topl.ml +++ b/infer/src/topl/Topl.ml @@ -8,7 +8,7 @@ open! IStd module L = Logging -let tt fmt = L.d_printf "ToplTrace: " ; L.d_printfln fmt +let tt = ToplUtils.debug let parse topl_file = let f ch = @@ -17,10 +17,10 @@ let parse topl_file = with ToplParser.Error -> let Lexing.{pos_lnum; pos_bol; pos_cnum; _} = Lexing.lexeme_start_p lexbuf in let col = pos_cnum - pos_bol + 1 in - L.(die UserError) "@[%s:%d:%d: topl parse error@]@\n@?" topl_file pos_lnum col + L.die UserError "@[%s:%d:%d: topl parse error@]@\n@?" topl_file pos_lnum col in try In_channel.with_file topl_file ~f - with Sys_error msg -> L.(die UserError) "@[topl:%s: %s@]@\n@?" topl_file msg + with Sys_error msg -> L.die UserError "@[topl:%s: %s@]@\n@?" topl_file msg let properties = lazy (List.concat_map ~f:parse Config.topl_properties) @@ -29,68 +29,21 @@ let automaton = lazy (ToplAutomaton.make (Lazy.force properties)) let is_active () = not (List.is_empty (Lazy.force properties)) -let cfg = Cfg.create () - -let sourcefile = SourceFile.create "SynthesizedToplProperty.java" - -let sourcefile_location = Location.none sourcefile - -(* NOTE: For now, Topl is mostly untyped; that is, everything has type Object. *) -let type_of_paramtyp (_t : Typ.Procname.Parameter.t) : Typ.t = - let classname = Typ.mk (Tstruct Typ.Name.Java.java_lang_object) in - Typ.mk (Tptr (classname, Pk_pointer)) - - -(** NOTE: Similar to [JTrans.formals_from_signature]. *) -let formals_of_procname proc_name = - let params = Typ.Procname.get_parameters proc_name in - let new_arg_name = - let n = ref (-1) in - fun () -> incr n ; Printf.sprintf "arg%d" !n - in - let f t = - let name = Mangled.from_string (new_arg_name ()) in - let typ = type_of_paramtyp t in - (name, typ) - in - List.map ~f params - - -(** The procedure description and its nodes will have location [sourcefile_location]. *) -let empty_proc_desc proc_name = - let attr = - let formals = formals_of_procname proc_name in - let is_defined = true in - let loc = sourcefile_location in - {(ProcAttributes.default sourcefile proc_name) with formals; is_defined; loc} - in - let proc_desc = Cfg.create_proc_desc cfg attr in - let start_node = - Procdesc.create_node proc_desc sourcefile_location Procdesc.Node.Start_node [] - in - let exit_node = Procdesc.create_node proc_desc sourcefile_location Procdesc.Node.Exit_node [] in - Procdesc.node_set_succs_exn proc_desc start_node [exit_node] [exit_node] ; - Procdesc.set_start_node proc_desc start_node ; - Procdesc.set_exit_node proc_desc exit_node ; - proc_desc - - -let is_synthesized = function - | Typ.Procname.Java j -> - String.equal "topl.Property" (Typ.Procname.Java.get_class_name j) - | _ -> - false - - -let get_proc_desc proc_name = - if is_synthesized proc_name then Some ((* TODO *) empty_proc_desc proc_name) else None - +let get_proc_desc proc_name = ToplMonitor.generate (Lazy.force automaton) proc_name let get_proc_attr proc_name = (* TODO: optimize -- don't generate body just to get attributes *) Option.map ~f:Procdesc.get_attributes (get_proc_desc proc_name) +let max_args = ref 0 + +let get_max_args () = + let instrument_max_args = !max_args in + let automaton_max_args = ToplAutomaton.max_args (Lazy.force automaton) in + Int.max instrument_max_args automaton_max_args + + let get_transitions_count () = ToplAutomaton.tcount (Lazy.force automaton) (** Checks whether the method name and the number of arguments matches the conditions in a @@ -104,7 +57,7 @@ let evaluate_static_guard label (e_fun, arg_ts) = let re = Str.regexp label.ToplAst.procedure_name in let result = Str.string_match re name 0 in if Config.trace_topl then - tt "match name='%s' re='%s' result=%b" name label.ToplAst.procedure_name result ; + tt "match name='%s' re='%s' result=%b\n" name label.ToplAst.procedure_name result ; result | _ -> false @@ -123,78 +76,87 @@ let get_active_transitions e_fun arg_ts = Array.init (ToplAutomaton.tcount a) ~f -let topl_class_exp = - let class_name = Mangled.from_string "topl.Property" in - let var_name = Pvar.mk_global class_name in - Exp.Lvar var_name - - -let topl_class_name : Typ.Name.t = Typ.Name.Java.from_string "topl.Property" - -let topl_class_typ = Typ.mk (Tstruct topl_class_name) - -let transition_field_name i = Typ.Fieldname.Java.from_string (Printf.sprintf "transition%d" i) - -let transition_var i = Exp.Lfield (topl_class_exp, transition_field_name i, topl_class_typ) - let set_transitions loc active_transitions = let store i b = let value = if b then Exp.one else Exp.zero in - Sil.Store (transition_var i, topl_class_typ, value, loc) + Sil.Store (ToplUtils.static_var (ToplName.transition i), ToplUtils.topl_class_typ, value, loc) in Array.mapi ~f:store active_transitions -let topl_fun name n = - let ret_typ = Some Typ.Name.Java.Split.void in - let args_typ = List.init n ~f:(fun _ -> Typ.Name.Java.Split.java_lang_object) in - Exp.Const - (Const.Cfun - (Typ.Procname.Java - (Typ.Procname.Java.make topl_class_name ret_typ name args_typ Typ.Procname.Java.Static))) - - let call_save_args loc ret_id ret_typ arg_ts = - let arg_ts = arg_ts @ [(Exp.Var ret_id, ret_typ)] in - let e_fun = topl_fun "saveArgs" (List.length arg_ts) in - let ret_id = Ident.create_fresh Ident.knormal in - let ret_typ = Typ.mk Tvoid in - [|Sil.Call ((ret_id, ret_typ), e_fun, arg_ts, loc, CallFlags.default)|] + let dummy_ret_id = Ident.create_fresh Ident.knormal in + [| ToplUtils.topl_call dummy_ret_id Tvoid loc ToplName.save_args + ((Exp.Var ret_id, ret_typ) :: arg_ts) |] let call_execute loc = - let e_fun = topl_fun "execute" 0 in - let ret_id = Ident.create_fresh Ident.knormal in - let ret_typ = Typ.mk Tvoid in - [|Sil.Call ((ret_id, ret_typ), e_fun, [], loc, CallFlags.default)|] + let dummy_ret_id = Ident.create_fresh Ident.knormal in + [|ToplUtils.topl_call dummy_ret_id Tvoid loc ToplName.execute []|] +(* Side-effect: increases [!max_args] when it sees a call with more arguments. *) let instrument_instruction = function | Sil.Call ((ret_id, ret_typ), e_fun, arg_ts, loc, _call_flags) as i -> let active_transitions = get_active_transitions e_fun arg_ts in if not (Array.exists ~f:(fun x -> x) active_transitions) then [|i|] - else + else ( + max_args := Int.max !max_args (List.length arg_ts) ; + tt "found %d arguments\n" (List.length arg_ts) ; let i1s = set_transitions loc active_transitions in let i2s = call_save_args loc ret_id ret_typ arg_ts in let i3s = call_execute loc in - Array.concat [[|i|]; i1s; i2s; i3s] + Array.concat [[|i|]; i1s; i2s; i3s] ) | i -> [|i|] (* PRE: Calling [Tenv.mk_struct tenv ] twice is a no-op. *) let add_types tenv = - let transition_field i = (transition_field_name i, Typ.mk (Tint IBool), []) in - let rec transitions n = - if Int.equal n 0 then [] else transition_field (n - 1) :: transitions (n - 1) + let get_registers () = + let a = Lazy.force automaton in + let h = String.Table.create () in + let record = + let open ToplAst in + function + | SaveInRegister reg | EqualToRegister reg -> Hashtbl.set h ~key:reg ~data:() | _ -> () + in + for i = 0 to ToplAutomaton.tcount a - 1 do + let label = (ToplAutomaton.transition a i).label in + record label.return ; + Option.iter ~f:(List.iter ~f:record) label.arguments + done ; + Hashtbl.keys h in - let statics = transitions (get_transitions_count ()) in - let _topl_class = Tenv.mk_struct tenv topl_class_name ~statics in + let transition_field i = + (Typ.Fieldname.Java.from_string (ToplName.transition i), Typ.mk (Tint IBool), []) + in + let saved_arg_field i = + (Typ.Fieldname.Java.from_string (ToplName.saved_arg i), ToplUtils.any_type, []) + in + let register_field name = + (Typ.Fieldname.Java.from_string (ToplName.reg name), ToplUtils.any_type, []) + in + let statics = + let state = (Typ.Fieldname.Java.from_string ToplName.state, Typ.mk (Tint IInt), []) in + let retval = (Typ.Fieldname.Java.from_string ToplName.retval, ToplUtils.any_type, []) in + let transitions = List.init (get_transitions_count ()) ~f:transition_field in + let saved_args = List.init (get_max_args ()) ~f:saved_arg_field in + let registers = List.map ~f:register_field (get_registers ()) in + List.concat [[retval; state]; transitions; saved_args; registers] + in + let _topl_class = Tenv.mk_struct tenv ToplUtils.topl_class_name ~statics in () let instrument tenv procdesc = - add_types tenv ; - let f _node = instrument_instruction in - let _updated = Procdesc.replace_instrs_by procdesc ~f in - () + if not (ToplUtils.is_synthesized (Procdesc.get_proc_name procdesc)) then ( + let f _node = instrument_instruction in + tt "instrument\n" ; + let _updated = Procdesc.replace_instrs_by procdesc ~f in + tt "add types %d\n" !max_args ; add_types tenv ; tt "done\n" ) + + +let sourcefile = ToplMonitor.sourcefile + +let cfg = ToplMonitor.cfg diff --git a/infer/src/topl/Topl.mli b/infer/src/topl/Topl.mli index 550f9a075..59e0e1ee5 100644 --- a/infer/src/topl/Topl.mli +++ b/infer/src/topl/Topl.mli @@ -19,3 +19,9 @@ val get_proc_desc : Typ.Procname.t -> Procdesc.t option val instrument : Tenv.t -> Procdesc.t -> unit (** Inserts calls to the TOPL automaton. Mutates the arguments: it is the caller's responsibility to instrument procedures at most once. *) + +val sourcefile : SourceFile.t +(** The (fake) sourcefile in which synthesized code resides. *) + +val cfg : Cfg.t +(** The CFG of the synthesized code. *) diff --git a/infer/src/topl/ToplAst.ml b/infer/src/topl/ToplAst.ml index 0d236e8ec..142b7bb9b 100644 --- a/infer/src/topl/ToplAst.ml +++ b/infer/src/topl/ToplAst.ml @@ -11,8 +11,7 @@ type property_name = string [@@deriving compare, hash, sexp] type register_name = string -(** TODO: use Const.t *) -type constant = string +type constant = Exp.t type value_pattern = | Ignore diff --git a/infer/src/topl/ToplAutomaton.ml b/infer/src/topl/ToplAutomaton.ml index dd883a3cc..748d1cc2d 100644 --- a/infer/src/topl/ToplAutomaton.ml +++ b/infer/src/topl/ToplAutomaton.ml @@ -7,6 +7,8 @@ open! IStd +let tt = ToplUtils.debug + module Vname = struct module T = struct type t = ToplAst.property_name * ToplAst.vertex [@@deriving compare, hash, sexp] @@ -25,20 +27,20 @@ type tindex = int type transition = {source: vindex; target: vindex; label: ToplAst.label} (** INV1: Array.length states = Array.length outgoing - INV2: each index of [transitions] occurs exactly once in one of [outgoing]'s lists *) + INV2: each index of [transitions] occurs exactly once in one of [outgoing]'s lists + INV3: max_args is the maximum length of the arguments list in a label on a transition *) type t = { states: vname array ; transitions: transition array ; outgoing: tindex list array - ; vindex: vname -> vindex } + ; vindex: vname -> vindex + ; max_args: int (* redundant; cached for speed *) } (** [index_in H a x] is the (last) index of [x] in array [a]. *) let index_in (type k) (module H : Hashtbl_intf.S with type key = k) (a : k array) : k -> int = let h = H.create ~size:(2 * Array.length a) () in let f i x = H.set h ~key:x ~data:i in - Array.iteri ~f a ; - let find x = H.find_exn h x in - find + Array.iteri ~f a ; H.find_exn h let make properties = @@ -48,8 +50,9 @@ let make properties = let f {source; target; _} = [(p.name, source); (p.name, target)] in List.concat_map ~f p.transitions in - Array.of_list (List.concat_map ~f properties) + Array.of_list (List.dedup_and_sort ~compare:Vname.compare (List.concat_map ~f properties)) in + if Config.trace_topl then Array.iteri ~f:(fun i (p, v) -> tt "state[%d]=(%s,%s)\n" i p v) states ; let vindex = index_in (module Vname.Table) states in let transitions : transition array = let f p = @@ -73,9 +76,22 @@ let make properties = let f i t = a.(t.source) <- i :: a.(t.source) in Array.iteri ~f transitions ; a in - {states; transitions; outgoing; vindex} + let max_args = + let f x t = + let y = Option.value_map ~default:0 ~f:List.length t.label.ToplAst.arguments in + Int.max x y + in + Array.fold ~init:0 ~f transitions + in + {states; transitions; outgoing; vindex; max_args} +let outgoing a i = a.outgoing.(i) + +let vcount a = Array.length a.states + let transition a i = a.transitions.(i) let tcount a = Array.length a.transitions + +let max_args a = a.max_args diff --git a/infer/src/topl/ToplAutomaton.mli b/infer/src/topl/ToplAutomaton.mli index b73d22043..f772212fe 100644 --- a/infer/src/topl/ToplAutomaton.mli +++ b/infer/src/topl/ToplAutomaton.mli @@ -19,14 +19,11 @@ open! IStd boolean variable transitionN to tell if the static part of a transition guard is satisfied. The N is just some identifier for the transition, and integers are convenient identifiers. - NOTE: Now, the signature is the minimal needed for code instrumentation, but the implementation - does some extra work (such as grouping transitions by their source) that will be useful for - code generation. (TODO: remove note once code generation is implemented.) + Transitions are grouped by their source to ease generation of the monitor code. *) - type t -type vindex = int +type vindex = int (* from 0 to vcount()-1, inclusive *) type tindex = int (* from 0 to tcount()-1, inclusive *) @@ -34,6 +31,12 @@ type transition = {source: vindex; target: vindex; label: ToplAst.label} val make : ToplAst.t list -> t +val outgoing : t -> vindex -> tindex list + +val vcount : t -> int + val transition : t -> tindex -> transition val tcount : t -> int + +val max_args : t -> int diff --git a/infer/src/topl/ToplLexer.mll b/infer/src/topl/ToplLexer.mll index 75cc3283d..43850fb7d 100644 --- a/infer/src/topl/ToplLexer.mll +++ b/infer/src/topl/ToplLexer.mll @@ -29,11 +29,12 @@ let id_head = ['a'-'z' 'A'-'Z'] let id_tail = ['a'-'z' 'A'-'Z' '0'-'9']* +let integer = ['0' - '9']+ -rule raw_token = parse +rule topl_token lexing_sil = parse | '\t' { raise Error } | ((' '* ("//" [^ '\n']*)? '\n')+ as x) (' '* as y) { new_line x y lexbuf } - | ' '+ { raw_token lexbuf } + | ' '+ { topl_token lexing_sil lexbuf } | "->" { ARROW } | '=' { ASGN } | ':' { COLON } @@ -41,8 +42,8 @@ rule raw_token = parse | '(' { LP } | ')' { RP } | '*' { STAR } - | '<' (([^ '<' '>' '\n' '\\'] | ('\\' _))* as x) '>' { CONSTANT (unquote x) } - | '"' ([^ '"' '\n']* as x) '"' { STRING x } + | '"' (([^ '"' '\n' '\\'] | ('\\' _))* as x) '"' { STRING (unquote x) } + | '<' { lexing_sil := true ; LT } | "prefix" { PREFIX } | "property" { PROPERTY } | "message" { MESSAGE } @@ -50,8 +51,17 @@ rule raw_token = parse | eof { EOF } | _ { raise Error } +and sil_token lexing_sil = parse + | '>' { lexing_sil := false ; GT } + | "true" { TRUE } + | "false" { FALSE } + | integer as x { INTEGER (int_of_string x) } + + { let token () = + let lexing_sil = ref false in + let raw_token lexbuf = (if !lexing_sil then sil_token else topl_token) lexbuf in let indents = ref [0] in let scheduled_rc = ref 0 in let last_indent () = match !indents with @@ -66,7 +76,7 @@ rule raw_token = parse in let rec step lexbuf = if !scheduled_rc > 0 then (decr scheduled_rc; RC) - else match raw_token lexbuf with + else match raw_token lexing_sil lexbuf with | INDENT n when n > last_indent () -> (add_indent n; LC) | INDENT n when n < last_indent () -> (drop_to_indent n; step lexbuf) | INDENT _ -> step lexbuf diff --git a/infer/src/topl/ToplMonitor.ml b/infer/src/topl/ToplMonitor.ml new file mode 100644 index 000000000..5d2e039ae --- /dev/null +++ b/infer/src/topl/ToplMonitor.ml @@ -0,0 +1,303 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd +module L = Logging + +let sourcefile = + let pid = Pid.to_int (Unix.getpid ()) in + SourceFile.create (Printf.sprintf "SynthesizedToplProperty%d.java" pid) + + +let cfg = Cfg.create () + +let sourcefile_location = Location.none sourcefile + +let type_of_paramtyp (_t : Typ.Procname.Parameter.t) : Typ.t = ToplUtils.any_type + +(** NOTE: Similar to [JTrans.formals_from_signature]. *) +let formals_of_procname proc_name = + let params = Typ.Procname.get_parameters proc_name in + let new_arg_name = + let n = ref (-1) in + fun () -> incr n ; ToplName.arg !n + in + let f t = + let name = Mangled.from_string (new_arg_name ()) in + let typ = type_of_paramtyp t in + (name, typ) + in + List.map ~f params + + +type node_creator = Procdesc.Node.nodekind -> Sil.instr list -> Procdesc.Node.t + +type succ_setter = Procdesc.Node.t -> Procdesc.Node.t list -> unit + +type block = {start_node: Procdesc.Node.t; exit_node: Procdesc.Node.t} + +(** [node_generator]s are the main concept used for organizing the code below. The main property +of node generators is that they compose, because of their return type. The two arguments +([node_creator] and [succ_setter]) are there mainly to ensure that there is a thin interface +with the underlying (heavily imperative) cfg data-structure from [Procdesc]. *) +type node_generator = node_creator -> succ_setter -> block + +let procedure proc_name (make_body : node_generator) : Procdesc.t = + let attr = + let formals = formals_of_procname proc_name in + let is_defined = true in + let loc = sourcefile_location in + {(ProcAttributes.default sourcefile proc_name) with formals; is_defined; loc} + in + let proc_desc = Cfg.create_proc_desc cfg attr in + let create_node kind instrs = Procdesc.create_node proc_desc sourcefile_location kind instrs in + let exit_node = create_node Procdesc.Node.Exit_node [] in + let set_succs node succs = Procdesc.node_set_succs_exn proc_desc node succs [exit_node] in + let {start_node= body_start; exit_node= body_exit} = make_body create_node set_succs in + let start_node = create_node Procdesc.Node.Start_node [] in + set_succs start_node [body_start] ; + set_succs body_exit [exit_node] ; + Procdesc.set_start_node proc_desc start_node ; + Procdesc.set_exit_node proc_desc exit_node ; + proc_desc + + +let sequence (gens : node_generator list) : node_generator = + fun create_node set_succs -> + let blocks = List.map ~f:(fun g -> g create_node set_succs) gens in + (* NOTE: Possible optimization: fuse successive stmt nodes, by concatenating instructions. *) + let rec connect n = function + | [] -> + n + | p :: qs -> + set_succs n.exit_node [p.start_node] ; + connect p qs + in + match blocks with + | [] -> + let n = create_node (Procdesc.Node.Stmt_node Procdesc.Node.MethodBody) [] in + {start_node= n; exit_node= n} + | n :: ns -> + let p = connect n ns in + {start_node= n.start_node; exit_node= p.exit_node} + + +(** Substitutes a fresh logical variable for any subexpression that is not an operator, a logical +variable or a constant. Also returns assignments from subexpressions to the logical variables +that replaced them. The goal is to get a Sil.Prune condition in a shape friendly to symbolic +execution. *) +let pure_exp e : Exp.t * Sil.instr list = + let rec pluck = + let open Exp in + let open Sequence.Generator in + function + | UnOp (_, e, _) -> + pluck e + | BinOp (_, e1, e2) -> + all_unit [pluck e1; pluck e2] + | Var _ | Const _ -> + return () + | e -> + yield e + in + let es = Sequence.to_list (Sequence.Generator.run (pluck e)) in + let es = List.dedup_and_sort ~compare:Exp.compare es in + let pairs = List.map ~f:(fun e -> (e, Ident.create_fresh Ident.knormal)) es in + let subst = List.map ~f:(function e, id -> (e, Exp.Var id)) pairs in + let e' = Sil.exp_replace_exp subst e in + let mk_load (e, id) = Sil.Load (id, e, ToplUtils.any_type, sourcefile_location) in + let loads = List.map ~f:mk_load pairs in + (e', loads) + + +let gen_if (cond : Exp.t) (true_branch : node_generator) (false_branch : node_generator) : + node_generator = + fun create_node set_succs -> + let start_node = create_node (Procdesc.Node.Stmt_node Procdesc.Node.MethodBody) [] in + let exit_node = create_node (Procdesc.Node.Stmt_node Procdesc.Node.MethodBody) [] in + let {start_node= true_start_node; exit_node= true_exit_node} = + true_branch create_node set_succs + in + let {start_node= false_start_node; exit_node= false_exit_node} = + false_branch create_node set_succs + in + (* NOTE: Symbolic execution works with non-pure prune expressions but it generates symbolic + states from which abstraction then removes too much information. *) + let cond, preamble = pure_exp cond in + let prune_true = + let node_type = Procdesc.Node.Prune_node (true, Sil.Ik_if, PruneNodeKind_MethodBody) in + let instr = Sil.Prune (cond, sourcefile_location, true, Sil.Ik_if) in + create_node node_type (preamble @ [instr]) + in + let prune_false = + let node_type = Procdesc.Node.Prune_node (false, Sil.Ik_if, PruneNodeKind_MethodBody) in + let instr = + Sil.Prune (Exp.UnOp (Unop.LNot, cond, None), sourcefile_location, false, Sil.Ik_if) + in + create_node node_type (preamble @ [instr]) + in + set_succs start_node [prune_true; prune_false] ; + set_succs prune_true [true_start_node] ; + set_succs prune_false [false_start_node] ; + set_succs true_exit_node [exit_node] ; + set_succs false_exit_node [exit_node] ; + {start_node; exit_node} + + +let stmt_node instrs : node_generator = + fun create_node _set_succs -> + let node = create_node (Procdesc.Node.Stmt_node Procdesc.Node.MethodBody) instrs in + {start_node= node; exit_node= node} + + +let sil_assign lhs rhs = + let tempvar = Ident.create_fresh Ident.knormal in + [ Sil.Load (tempvar, rhs, ToplUtils.any_type, sourcefile_location) + ; Sil.Store (lhs, ToplUtils.any_type, Exp.Var tempvar, sourcefile_location) ] + + +let assign lhs rhs : node_generator = stmt_node (sil_assign lhs rhs) + +let simple_call function_name : node_generator = + let ret_id = Ident.create_fresh Ident.knormal in + stmt_node [ToplUtils.topl_call ret_id Tvoid sourcefile_location function_name []] + + +let gen_maybe_call ret_id : node_generator = + stmt_node [ToplUtils.topl_call ret_id (Tint IBool) sourcefile_location ToplName.maybe []] + + +let constant (x : int) : Exp.t = Exp.int (IntLit.of_int x) + +let arguments_count proc_name = List.length (Typ.Procname.get_parameters proc_name) + +(* NOTE: The order of parameters must correspond to what gets generated by [call_save_args]. *) +let generate_save_args _automaton proc_name = + let n = arguments_count proc_name in + let local_var = ToplUtils.local_var proc_name in + procedure proc_name + (sequence + ( assign (ToplUtils.static_var ToplName.retval) (local_var (ToplName.arg 0)) + :: List.init (n - 1) ~f:(fun i -> + assign + (ToplUtils.static_var (ToplName.saved_arg i)) + (local_var (ToplName.arg (i + 1))) ) )) + + +let generate_execute automaton proc_name = + let n = ToplAutomaton.vcount automaton in + let call_execute_state i = simple_call (ToplName.execute_state i) in + procedure proc_name (sequence (List.init n ~f:call_execute_state)) + + +let generate_execute_state automaton proc_name = + let state : ToplAutomaton.vindex = + let re = Str.regexp "execute_state_\\([0-9]*\\)$" in + let mname = Typ.Procname.get_method proc_name in + if Str.string_match re mname 0 then int_of_string (Str.matched_group 1 mname) + else L.die InternalError "Topl.Monitor.generate_execute_state called for %s" mname + in + let condition maybe t : Exp.t = + let conjunct variable pattern = + let open ToplAst in + match pattern with + | Ignore | SaveInRegister _ -> + [] (* true *) + | EqualToRegister i -> + [Exp.eq variable (ToplUtils.static_var (ToplName.reg i))] + | EqualToConstant ct -> + [Exp.eq variable ct] + in + let label = (ToplAutomaton.transition automaton t).label in + let all_conjuncts = + let arg_conjunct i pattern = + conjunct (ToplUtils.static_var (ToplName.saved_arg i)) pattern + in + List.concat + ( Option.value_map ~default:[] ~f:(fun x -> [x]) maybe + :: [ToplUtils.static_var (ToplName.transition t)] + :: conjunct (ToplUtils.static_var ToplName.retval) label.ToplAst.return + :: Option.value_map ~default:[] ~f:(List.mapi ~f:arg_conjunct) label.ToplAst.arguments ) + in + Exp.and_nary all_conjuncts + in + let skip : node_generator = sequence [] in + let action t : node_generator = + let step variable pattern = + match pattern with + | ToplAst.SaveInRegister i -> + assign (ToplUtils.static_var (ToplName.reg i)) variable + | _ -> + skip + in + let transition = ToplAutomaton.transition automaton t in + let all_actions = + let arg_action i pattern = step (ToplUtils.static_var (ToplName.saved_arg i)) pattern in + stmt_node + [ Sil.Store + ( ToplUtils.static_var ToplName.state + , Typ.mk (Tint IInt) + , Exp.int (IntLit.of_int transition.target) + , sourcefile_location ) ] + :: step (ToplUtils.static_var ToplName.retval) transition.label.ToplAst.return + :: Option.value_map ~default:[] ~f:(List.mapi ~f:arg_action) + transition.label.ToplAst.arguments + in + sequence all_actions + in + let branch_for_right_state : node_generator = + let check_transition_maybe t (false_branch : node_generator) : node_generator = + let tempid = Ident.create_fresh Ident.knormal in + let tempvar = Exp.Var tempid in + sequence [gen_maybe_call tempid; gen_if (condition (Some tempvar) t) (action t) false_branch] + in + let check_transition t (false_branch : node_generator) : node_generator = + gen_if (condition None t) (action t) false_branch + in + let transitions = ToplAutomaton.outgoing automaton state in + let fold f init = List.fold_right ~init ~f transitions in + fold check_transition_maybe (fold check_transition skip) + in + let body = + gen_if + (Exp.eq (ToplUtils.static_var ToplName.state) (constant state)) + branch_for_right_state skip + in + procedure proc_name body + + +(** INV: For the code generated here, biabduction infers the spec "returned value can be anything" *) +let generate_maybe _automaton proc_name = procedure proc_name (sequence []) + +let name_matches re proc_name = Str.string_match re (Typ.Procname.get_method proc_name) 0 + +let has_name s = name_matches (Str.regexp (s ^ "$")) + +let is_save_args = has_name ToplName.save_args + +let is_execute = has_name ToplName.execute + +let is_execute_state = has_name "execute_state_[0-9]*" + +let is_maybe = has_name ToplName.maybe + +let maybe_synthesize_it automaton proc_name = + if ToplUtils.is_synthesized proc_name then + if is_save_args proc_name then Some (generate_save_args automaton proc_name) + else if is_execute proc_name then Some (generate_execute automaton proc_name) + else if is_execute_state proc_name then Some (generate_execute_state automaton proc_name) + else if is_maybe proc_name then Some (generate_maybe automaton proc_name) + else + L.die InternalError + "TOPL instrumentation introduced a call to a method that is not generated" + else None + + +let generate automaton proc_name = + IList.force_until_first_some + [ lazy (Typ.Procname.Hash.find_opt cfg proc_name) + ; lazy (maybe_synthesize_it automaton proc_name) ] diff --git a/infer/src/topl/ToplMonitor.mli b/infer/src/topl/ToplMonitor.mli new file mode 100644 index 000000000..01238e9a5 --- /dev/null +++ b/infer/src/topl/ToplMonitor.mli @@ -0,0 +1,20 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +val generate : ToplAutomaton.t -> Typ.Procname.t -> Procdesc.t option +(** [generate automaton proc_name] returns a CFG, provided that [proc_name] is a recognized procedure name *) + +val sourcefile : SourceFile.t +(** For debug. *) + +val cfg : Cfg.t +(** For debug. This datastructure accumulates all the procedures that were synthesized by the +current process. If the implementation is correct, then different processes synthesize the same +procedures, given the same set of Topl properties. However, for debug, we print the datastructure +in a filename that contains the PID, which is why [sourcefile] is exposed. *) diff --git a/infer/src/topl/ToplName.ml b/infer/src/topl/ToplName.ml new file mode 100644 index 000000000..d11592157 --- /dev/null +++ b/infer/src/topl/ToplName.ml @@ -0,0 +1,32 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +let p = Printf.sprintf + +let topl_property = "topl.Property" + +let transition = p "transition%d" + +let arg = p "arg%d" + +let retval = "retval" + +let saved_arg = p "savedArg%d" + +let reg = p "reg_%s" + +let state = "state" + +let maybe = "maybe" + +let execute = "execute" + +let execute_state = p "execute_state_%d" + +let save_args = "saveArgs" diff --git a/infer/src/topl/ToplName.mli b/infer/src/topl/ToplName.mli new file mode 100644 index 000000000..4caa02d74 --- /dev/null +++ b/infer/src/topl/ToplName.mli @@ -0,0 +1,30 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +val topl_property : string + +val transition : int -> string + +val arg : int -> string + +val retval : string + +val saved_arg : int -> string + +val reg : string -> string + +val state : string + +val maybe : string + +val execute : string + +val execute_state : int -> string + +val save_args : string diff --git a/infer/src/topl/ToplParser.mly b/infer/src/topl/ToplParser.mly index 2bd5913e8..46e52085e 100644 --- a/infer/src/topl/ToplParser.mly +++ b/infer/src/topl/ToplParser.mly @@ -17,10 +17,15 @@ if is_guard i then ToplAst.EqualToRegister j else ToplAst.SaveInRegister j %} +(* Switching and common tokens *) +%token INTEGER +%token STRING +%token GT +%token LT + +(* TOPL tokens *) %token INDENT (* The lexer uses this token only internally. *) -%token CONSTANT %token ID -%token STRING %token ARROW %token ASGN %token COLON @@ -35,6 +40,10 @@ %token RP %token STAR +(* SIL tokens *) +%token FALSE +%token TRUE + %start properties %% @@ -47,7 +56,7 @@ one_property: message: MESSAGE s=STRING { s } -prefix: PREFIX c=CONSTANT { c } +prefix: PREFIX s=STRING { s } transition: source=ID ARROW target=ID COLON label=label @@ -63,14 +72,19 @@ call_pattern: p=procedure_pattern a=arguments_pattern? { (p, a) } procedure_pattern: i=ID { i } - | c=CONSTANT { c } + | s=STRING { s } | STAR { ".*" } arguments_pattern: LP a=separated_list(COMMA, value_pattern) RP { a } value_pattern: i=ID { value_pattern_of_id i } - | c=CONSTANT { ToplAst.EqualToConstant c } | STAR { ToplAst.Ignore } + | LT e=sil_expression GT { ToplAst.EqualToConstant e } + +sil_expression: + TRUE { Exp.one } + | FALSE { Exp.zero } + | x=INTEGER { Exp.int (IntLit.of_int x) } %% diff --git a/infer/src/topl/ToplUtils.ml b/infer/src/topl/ToplUtils.ml new file mode 100644 index 000000000..21f667b41 --- /dev/null +++ b/infer/src/topl/ToplUtils.ml @@ -0,0 +1,52 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +let any_type : Typ.t = + let classname = Typ.mk (Tstruct Typ.Name.Java.java_lang_object) in + Typ.mk (Tptr (classname, Pk_pointer)) + + +let topl_class_name : Typ.Name.t = Typ.Name.Java.from_string ToplName.topl_property + +let topl_class_typ = Typ.mk (Tstruct topl_class_name) + +let topl_call ret_id (ret_typ : Typ.desc) loc name arg_ts : Sil.instr = + let e_fun = + let ret_typ = Some Typ.Name.Java.Split.void in + let args_typ = List.map arg_ts ~f:(fun _ -> Typ.Name.Java.Split.java_lang_object) in + Exp.Const + (Const.Cfun + (Typ.Procname.Java + (Typ.Procname.Java.make topl_class_name ret_typ name args_typ Typ.Procname.Java.Static))) + in + Sil.Call ((ret_id, Typ.mk ret_typ), e_fun, arg_ts, loc, CallFlags.default) + + +let topl_class_exp = + let class_name = Mangled.from_string ToplName.topl_property in + let var_name = Pvar.mk_global class_name in + Exp.Lvar var_name + + +let static_var x : Exp.t = + Exp.Lfield (topl_class_exp, Typ.Fieldname.Java.from_string x, topl_class_typ) + + +let local_var proc_name x : Exp.t = Exp.Lvar (Pvar.mk (Mangled.from_string x) proc_name) + +let is_synthesized = function + | Typ.Procname.Java j -> + String.equal ToplName.topl_property (Typ.Procname.Java.get_class_name j) + | _ -> + false + + +let debug fmt = + Logging.debug Analysis Verbose "ToplTrace: " ; + Logging.debug Analysis Verbose fmt diff --git a/infer/src/topl/ToplUtils.mli b/infer/src/topl/ToplUtils.mli new file mode 100644 index 000000000..e5cff9f24 --- /dev/null +++ b/infer/src/topl/ToplUtils.mli @@ -0,0 +1,31 @@ +(* + * Copyright (c) 2019-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +(* This file collects several utilities that are used both for instrumentation (Topl) and for +code generation (ToplMonitor). *) + +val any_type : Typ.t +(** For now, Topl is untyped. The "any" type is simulated with "java.lang.Object". *) + +val topl_class_name : Typ.Name.t + +val topl_class_typ : Typ.t + +val static_var : string -> Exp.t + +val local_var : Typ.Procname.t -> string -> Exp.t + +val topl_call : + Ident.t -> Typ.desc -> Location.t -> string -> (Exp.t * Typ.t) sexp_list -> Sil.instr +(** Call a TOPL function; that is, a static member of "topl.Property" with java.lang.Object arguments +and return [ret_id] of type [ret_typ].*) + +val is_synthesized : Typ.Procname.t -> bool + +val debug : ('a, Format.formatter, unit) IStd.format -> 'a diff --git a/infer/tests/codetoanalyze/cpp/frontend/include_header/include_only.cpp.dot b/infer/tests/codetoanalyze/cpp/frontend/include_header/include_only.cpp.dot index e69de29bb..4a75d017b 100644 --- a/infer/tests/codetoanalyze/cpp/frontend/include_header/include_only.cpp.dot +++ b/infer/tests/codetoanalyze/cpp/frontend/include_header/include_only.cpp.dot @@ -0,0 +1,3 @@ +/* @generated */ +digraph cfg { +} diff --git a/infer/tests/codetoanalyze/java/topl/iterators.topl b/infer/tests/codetoanalyze/java/topl/iterators.topl index 92cc32c51..4ffad0d1b 100644 --- a/infer/tests/codetoanalyze/java/topl/iterators.topl +++ b/infer/tests/codetoanalyze/java/topl/iterators.topl @@ -1,5 +1,6 @@ property HasNext - prefix + prefix "Iterator" + prefix "List" start -> start: * start -> invalid: I = iterator(*) invalid -> valid: = hasNext(i) // treat as shorthand for = hasNext if (b) @@ -7,7 +8,8 @@ property HasNext invalid -> error: next(i) property UnsafeIterator - prefix + prefix "Collection" + prefix "Iterator" start -> iterating: I = iterator(C) iterating -> modified: remove(c, *) iterating -> modified: add(c, *) @@ -22,7 +24,7 @@ property UnsafeMapIterator updated -> error: next(i) property SkipIndexAfterRemove - prefix + prefix "ArrayList" start -> start: * start -> removed: remove(Collection, Index) removed -> ok: get(collection, index) diff --git a/infer/tests/codetoanalyze/java/topl/tomcat.topl b/infer/tests/codetoanalyze/java/topl/tomcat.topl index bc09a9a12..b7d3e4413 100644 --- a/infer/tests/codetoanalyze/java/topl/tomcat.topl +++ b/infer/tests/codetoanalyze/java/topl/tomcat.topl @@ -1,11 +1,11 @@ property ForwardUncommitted message "A ServletResponse was forwarded before being committed." // TODO assume InterleavedResponse_Weak - prefix - prefix - prefix + prefix "javax.servlet" + prefix "javax.servlet.{ServletOutputStream,ServletResponse}" + prefix "java.io.PrintWriter" start -> start: * - start -> tracking: R = > + start -> tracking: R = "ServletResponse.\" tracking -> ok: flushBuffer(r) tracking -> gotWriter: W = getWriter(r) gotWriter -> ok: flush(w) @@ -13,9 +13,9 @@ property ForwardUncommitted tracking -> gotStream: S = getOutputStream(r) gotStream -> ok: flush(s) gotStream -> ok: flushBuffer(r) - tracking -> error: (*, *, r) - gotWriter -> error: (*, *, r) - gotStream -> error: (*, *, r) + tracking -> error: "RequestDispatcher.forward"(*, *, r) + gotWriter -> error: "RequestDispatcher.forward"(*, *, r) + gotStream -> error: "RequestDispatcher.forward"(*, *, r) // The documentation of ServletResponse.getOutputStream says that "either this // method getWriter may be called to write to the body, not both." So, @@ -23,7 +23,7 @@ property ForwardUncommitted // broken, which is why we also have the weaker version InterleavedResponse2. property InterleavedResponse1 message "A ServletResponse was asked for both a writer and a stream." - prefix + prefix "javax.servlet.ServletResponse" start -> start: * start -> gotWriter: W = getWriter(R) start -> gotStream: S = getOutputStream(R) @@ -33,7 +33,7 @@ property InterleavedResponse1 property InterleavedResponse2 // vertex names: w = got writer; W = used writer; similarly for s, S message "Incompatible methods for putting data into a response were used." - prefix + prefix "javax.servlet.ServletResponse" start -> start: * start -> w: W = getWriter(R) start -> s: S = getOutputStream(R) diff --git a/infer/tests/codetoanalyze/objc/frontend/property/PropertyCustomAccessor.m.dot b/infer/tests/codetoanalyze/objc/frontend/property/PropertyCustomAccessor.m.dot index e69de29bb..4a75d017b 100644 --- a/infer/tests/codetoanalyze/objc/frontend/property/PropertyCustomAccessor.m.dot +++ b/infer/tests/codetoanalyze/objc/frontend/property/PropertyCustomAccessor.m.dot @@ -0,0 +1,3 @@ +/* @generated */ +digraph cfg { +} diff --git a/infer/tests/codetoanalyze/objc/frontend/property/aclass.m.dot b/infer/tests/codetoanalyze/objc/frontend/property/aclass.m.dot index e69de29bb..4a75d017b 100644 --- a/infer/tests/codetoanalyze/objc/frontend/property/aclass.m.dot +++ b/infer/tests/codetoanalyze/objc/frontend/property/aclass.m.dot @@ -0,0 +1,3 @@ +/* @generated */ +digraph cfg { +} diff --git a/infer/tests/codetoanalyze/objc/frontend/property_in_protocol/Test.m.dot b/infer/tests/codetoanalyze/objc/frontend/property_in_protocol/Test.m.dot index e69de29bb..4a75d017b 100644 --- a/infer/tests/codetoanalyze/objc/frontend/property_in_protocol/Test.m.dot +++ b/infer/tests/codetoanalyze/objc/frontend/property_in_protocol/Test.m.dot @@ -0,0 +1,3 @@ +/* @generated */ +digraph cfg { +}