diff --git a/.gitignore b/.gitignore index b02e40e73..6d1091b06 100644 --- a/.gitignore +++ b/.gitignore @@ -70,6 +70,9 @@ duplicates.txt /infer/tests/.idea/runConfigurations /infer/tests/.idea/copyright/profiles_settings.xml +# VS code +.vscode + # Eclipse settings files ocaml.prefs /infer/.paths diff --git a/infer/man/man1/infer-full.txt b/infer/man/man1/infer-full.txt index 43f734922..3915b21e3 100644 --- a/infer/man/man1/infer-full.txt +++ b/infer/man/man1/infer-full.txt @@ -1224,6 +1224,13 @@ INTERNAL OPTIONS --max-nesting-reset Cancel the effect of --max-nesting. + --memcached + Activates: EXPERIMENTAL: Use memcached caching summaries during + analysis. (Conversely: --no-memcached) + + --memcached-size-mb int + EXPERIMENTAL: Default memcached size in megabytes. (default: 2048) + --method-decls-info method_decls_info.json Specifies the file containing the method declarations info (eg. start line, end line, class, method name, etc.) when Infer is run diff --git a/infer/src/backend/InferAnalyze.ml b/infer/src/backend/InferAnalyze.ml index 45bf74381..b0342ed1b 100644 --- a/infer/src/backend/InferAnalyze.ml +++ b/infer/src/backend/InferAnalyze.ml @@ -20,13 +20,15 @@ let clear_caches () = (** Create tasks to analyze an execution environment *) let analyze_source_file : SourceFile.t Tasks.doer = fun source_file -> + if Config.memcached then Memcached.connect () ; DB.Results_dir.init source_file ; let exe_env = Exe_env.mk () in L.task_progress SourceFile.pp source_file ~f:(fun () -> (* clear cache for each source file to avoid it growing unboundedly *) clear_caches () ; Callbacks.analyze_file exe_env source_file ; - if Config.write_html then Printer.write_all_html_files source_file ) + if Config.write_html then Printer.write_all_html_files source_file ) ; + if Config.memcached then Memcached.disconnect () let output_json_makefile_stats clusters = diff --git a/infer/src/backend/Summary.ml b/infer/src/backend/Summary.ml index 80933ca35..808f3d223 100644 --- a/infer/src/backend/Summary.ml +++ b/infer/src/backend/Summary.ml @@ -260,3 +260,12 @@ let reset_all ~filter () = Serialization.write_to_file summary_serializer filename ~data:blank_summary ) in Procedures.get_all ~filter () |> List.iter ~f:reset + + +module SummaryValue = struct + type nonrec t = t option + + let label = "summary" +end + +module SummaryServer = Memcached.Make (SummaryValue) diff --git a/infer/src/backend/Summary.mli b/infer/src/backend/Summary.mli index e0869bfcd..a7936623a 100644 --- a/infer/src/backend/Summary.mli +++ b/infer/src/backend/Summary.mli @@ -107,3 +107,7 @@ val store : t -> unit (** Save summary for the procedure into the spec database *) val reset_all : filter:Filtering.procedures_filter -> unit -> unit + +module SummaryValue : Memcached.Value with type t = t option + +module SummaryServer : Memcached.Server with module Value = SummaryValue diff --git a/infer/src/backend/ondemand.ml b/infer/src/backend/ondemand.ml index 3ed4c7da2..8bf32761c 100644 --- a/infer/src/backend/ondemand.ml +++ b/infer/src/backend/ondemand.ml @@ -228,17 +228,38 @@ let analyze_proc ?caller_pdesc callee_pdesc = Some (run_proc_analysis analyze_proc ~caller_pdesc callee_pdesc) +let hash_procname proc_name = Typ.Procname.to_unique_id proc_name |> Utils.string_crc_hex32 + +let memcache_get proc_name = + if not Config.memcached then None + else + let key = hash_procname proc_name in + Summary.SummaryServer.get ~key + + +let memcache_set proc_name summ = + if Config.memcached then + let key = hash_procname proc_name in + Summary.SummaryServer.set ~key summ + + let analyze_proc_desc ~caller_pdesc callee_pdesc = let callee_pname = Procdesc.get_proc_name callee_pdesc in if is_active callee_pname then None else let cache = Lazy.force cached_results in try Typ.Procname.Hash.find cache callee_pname with Caml.Not_found -> - let summary_option = - let proc_attributes = Procdesc.get_attributes callee_pdesc in - if should_be_analyzed proc_attributes then analyze_proc ~caller_pdesc callee_pdesc - else Summary.get callee_pname + let summary_option, update_memcached = + match memcache_get callee_pname with + | Some summ_opt -> + (summ_opt, false) + | None -> + let proc_attributes = Procdesc.get_attributes callee_pdesc in + if should_be_analyzed proc_attributes then + (analyze_proc ~caller_pdesc callee_pdesc, true) + else (Summary.get callee_pname, true) in + if update_memcached then memcache_set callee_pname summary_option ; Typ.Procname.Hash.add cache callee_pname summary_option ; summary_option @@ -259,15 +280,20 @@ let analyze_proc_name ?caller_pdesc callee_pname = else let cache = Lazy.force cached_results in try Typ.Procname.Hash.find cache callee_pname with Caml.Not_found -> - let summary_option = - if procedure_should_be_analyzed callee_pname then - match get_proc_desc callee_pname with - | Some callee_pdesc -> - analyze_proc ?caller_pdesc callee_pdesc - | None -> - Summary.get callee_pname - else Summary.get callee_pname + let summary_option, update_memcached = + match memcache_get callee_pname with + | Some summ_opt -> + (summ_opt, false) + | None -> + if procedure_should_be_analyzed callee_pname then + match get_proc_desc callee_pname with + | Some callee_pdesc -> + (analyze_proc ?caller_pdesc callee_pdesc, true) + | None -> + (Summary.get callee_pname, true) + else (Summary.get callee_pname, true) in + if update_memcached then memcache_set callee_pname summary_option ; Typ.Procname.Hash.add cache callee_pname summary_option ; summary_option diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml index aa99bd93b..3bc7d963d 100644 --- a/infer/src/base/Config.ml +++ b/infer/src/base/Config.ml @@ -1567,6 +1567,16 @@ and method_decls_info = method name, etc.) when Infer is run Test Determinator mode with $(b,--test-determinator)." +and memcached = + CLOpt.mk_bool ~long:"memcached" ~default:false + "EXPERIMENTAL: Use memcached caching summaries during analysis." + + +and memcached_size_mb = + CLOpt.mk_int ~long:"memcached-size-mb" ~default:2048 + "EXPERIMENTAL: Default memcached size in megabytes." + + and merge = CLOpt.mk_bool ~deprecated:["merge"] ~long:"merge" ~in_help:InferCommand.[(Analyze, manual_buck_flavors)] @@ -2722,6 +2732,10 @@ and max_nesting = !max_nesting and method_decls_info = !method_decls_info +and memcached = !memcached + +and memcached_size_mb = !memcached_size_mb + and merge = !merge and ml_buckets = !ml_buckets diff --git a/infer/src/base/Config.mli b/infer/src/base/Config.mli index e2ff0f215..51cd5a3f2 100644 --- a/infer/src/base/Config.mli +++ b/infer/src/base/Config.mli @@ -472,6 +472,10 @@ val max_nesting : int option val method_decls_info : string option +val memcached : bool + +val memcached_size_mb : int + val merge : bool val ml_buckets : diff --git a/infer/src/base/Memcached.ml b/infer/src/base/Memcached.ml new file mode 100644 index 000000000..5888a6a35 --- /dev/null +++ b/infer/src/base/Memcached.ml @@ -0,0 +1,190 @@ +(* + * Copyright (c) 2018-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) +open! IStd +module L = Logging + +(** unix socket name, always relative to the master [results_dir] *) +let memcached_socket_relative = "memcached.socket" + +(** find the results_dir of the top-level infer process *) +let results_dir = + Sys.getenv Config.infer_top_results_dir_env_var |> Option.value ~default:Config.results_dir + + +(** log file for memcached *) +let memcached_log = results_dir ^ "/memcached.log" + +(** binary name -- assumed to be on path *) +let memcached_bin = "memcached" + +(** shell to use for redirecting memcached's output to the log *) +let shell = "sh" + +type server = {input: In_channel.t; output: Out_channel.t} + +(* Unix socket paths have a historical length limit of ~100 chars (!?*@&*$). However, this applies + to the argument passed in the system call to create the socket. Thus a workaround is to cd into + the parent dir of the socket and then create it, hence this function. *) +let in_results_dir ~f = + let cwd = Unix.getcwd () in + let () = Unix.chdir results_dir in + let res = f () in + let () = Unix.chdir cwd in + res + + +let fail_on response_line = L.die InternalError "Unexpected server response: %s" response_line + +let send server str = Out_channel.output_string server.output str + +let eol = "\r\n" + +let send_eol server = send server eol + +let flush_server server = Out_channel.flush server.output + +let send_line server str = send server str ; send_eol server ; flush_server server + +let recv_line server = + match In_channel.input_line ~fix_win_eol:true server.input with + | None -> + fail_on "No response" + | Some line -> + line + + +let expect_line server lines = + match recv_line server with + | x when List.exists lines ~f:(String.equal x) -> + () + | response -> + fail_on response + + +(** socket to memcached server *) +let server : server option ref = ref None + +let with_server ~f = + match !server with + | None -> + L.die InternalError "Asked to perform socket operation without live connection.@." + | Some s -> + f s + + +let flush_all () = with_server ~f:(fun s -> send_line s "flush_all") + +let disconnect () = + with_server ~f:(fun s -> + server := None ; + Unix.shutdown_connection s.input ; + (* For some reason the below sometimes throws -- wrong docs? *) + (* In_channel.close s.input ; *) + Out_channel.close s.output ) + + +let connect () = + if Option.is_some !server then + L.die InternalError "Asked to connect memcached while already connected.@." ; + in_results_dir ~f:(fun () -> + let input, output = Unix.open_connection (ADDR_UNIX memcached_socket_relative) in + server := Some {input; output} ) + + +let stats server = + let rec aux acc = match recv_line server with "END" -> List.rev acc | l -> aux (l :: acc) in + send_line server "stats" ; aux [] + + +let stop pid () = + connect () ; + let stats = with_server ~f:stats in + disconnect () ; + Signal.send Signal.term (`Pid pid) |> ignore ; + Unix.wait (`Pid pid) |> ignore ; + in_results_dir ~f:(fun () -> Unix.remove memcached_socket_relative) ; + Out_channel.(with_file memcached_log ~append:true ~f:(fun ch -> output_lines ch stats)) + + +let start () = + let socket_exists () = + in_results_dir ~f:(fun () -> Sys.file_exists_exn memcached_socket_relative) + in + if Option.is_some !server then + L.die InternalError "Connection is open but asked to start memcached.@." ; + (* Unix sockets can be shadowed, so avoid creating a new socket/server if there is one already *) + if socket_exists () then () + else + let verbosity = match Config.debug_level_analysis with 0 -> "" | 1 -> "-v" | _ -> "-vv" in + let cmd = + Printf.sprintf "exec %s %s -Bascii -C -m%d -s%s > %s 2>&1" memcached_bin verbosity + Config.memcached_size_mb memcached_socket_relative memcached_log + in + let pid = in_results_dir ~f:(Unix.fork_exec ~prog:shell ~argv:[shell; "-c"; cmd]) in + (* Wait until socket is open -- NB this waits for 0.05s not 0.05ns *) + while not (socket_exists ()) do + Unix.nanosleep 0.05 |> ignore + done ; + Epilogues.register ~description:"Shutdown Memcached daemon" ~f:(stop pid) + + +module type Value = sig + type t + + val label : string +end + +module type Server = sig + module Value : Value + + val get : key:string -> Value.t option + + val set : key:string -> Value.t -> unit +end + +module Make (V : Value) : Server with module Value = V = struct + module Value = V + + let set_ = + let buffer = ref (Bytes.create 1024) in + let rec try_to_buffer value = + try Marshal.to_buffer !buffer 0 (Bytes.length !buffer) value [] with Failure _ -> + (* double buffer length *) + buffer := Bytes.create (2 * Bytes.length !buffer) ; + try_to_buffer value + in + fun server ~key value -> + let value_length = try_to_buffer value in + Printf.fprintf server.output "set %s:%s 0 0 %d%s" Value.label key value_length eol ; + Out_channel.output server.output ~buf:!buffer ~pos:0 ~len:value_length ; + send_eol server ; + flush_server server ; + expect_line server ["STORED"; "SERVER_ERROR object too large for cache"] + + + let get_ server ~key = + Printf.fprintf server.output "get %s:%s%s" Value.label key eol ; + flush_server server ; + let value_line = recv_line server in + match String.split value_line ~on:' ' with + | ["END"] -> + None + | ["VALUE"; _key'; _flags; _bytes] -> + let value : Value.t = Marshal.from_channel server.input in + (* eat up the trailing eol *) + expect_line server [""] ; + expect_line server ["END"] ; + Some value + | _ -> + fail_on value_line + + + let get ~key = with_server ~f:(get_ ~key) + + (* TODO: do this on background thread to avoid blocking on the response *) + let set ~key value = with_server ~f:(fun s -> set_ s ~key value) +end diff --git a/infer/src/base/Memcached.mli b/infer/src/base/Memcached.mli new file mode 100644 index 000000000..c68649a96 --- /dev/null +++ b/infer/src/base/Memcached.mli @@ -0,0 +1,42 @@ +(* + * Copyright (c) 2018-present, Facebook, Inc. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) +open! IStd + +(** Interface for managing a (single) memcached daemon and getting/setting OCaml values *) + +val connect : unit -> unit +(** connect to a running memcached server -- only call this from processes which do not fork *) + +val disconnect : unit -> unit +(** disconnect after having connected first *) + +val start : unit -> unit +(** start a memcached daemon and set up an epilogue to kill it on exit -- only for top-level *) + +val flush_all : unit -> unit +(** empty the cache *) + +(** type to marshal, plus a unique label that will be colon-prepended to a key, + roughly signifying a table *) +module type Value = sig + type t + + val label : string +end + +module type Server = sig + module Value : Value + + val get : key:string -> Value.t option + (** get a value, [None] means no [key] exists *) + + val set : key:string -> Value.t -> unit + (** set a [key]/value pair. NB we swallow failures due to exceeding max value size currently. + This will need to be changed before memcached is used as a primary store. *) +end + +module Make (V : Value) : Server with module Value = V diff --git a/infer/src/infer.ml b/infer/src/infer.ml index 3caffb15c..c71a16a33 100644 --- a/infer/src/infer.ml +++ b/infer/src/infer.ml @@ -51,7 +51,10 @@ let setup () = ResultsDir.assert_results_dir "please run an infer analysis first" | Events -> ResultsDir.assert_results_dir "have you run infer before?" ) ; - if CLOpt.is_originator then ( RunState.add_run_to_sequence () ; RunState.store () ) ; + if CLOpt.is_originator then ( + RunState.add_run_to_sequence () ; + RunState.store () ; + if Config.memcached then Memcached.start () ) ; () diff --git a/infer/src/integration/Diff.ml b/infer/src/integration/Diff.ml index 2fd3f6b13..4d5eea436 100644 --- a/infer/src/integration/Diff.ml +++ b/infer/src/integration/Diff.ml @@ -73,6 +73,7 @@ let diff driver_mode = let current_report, current_costs = save_report Current in (* Some files in the current checkout may be deleted in the old checkout. If we kept the results of the previous capture and analysis around, we would report issues on these files again in the previous checkout, which is wrong. Do not do anything too smart for now and just delete all results from the analysis of the current checkout. *) ResultsDir.delete_capture_and_analysis_data () ; + if Config.memcached then ( Memcached.(connect () ; flush_all () ; disconnect ()) ) ; (* TODO(t15553258) bail if nothing to analyze (configurable, some people might care about bugs fixed more than about time to analyze) *) checkout Previous ;