From a66f6cd736617dc46493c4322af07e28e6dc166a Mon Sep 17 00:00:00 2001 From: Fernando Gasperi Jabalera Date: Mon, 24 Feb 2020 03:33:52 -0800 Subject: [PATCH] [scheduler][restart] Quantify wasted work Summary: Count the time used by the `RestartScheduler` for analysis (useful) and the time wasted because a a worker was not able to take a lock to have a metric to compare different versions of the scheduler. The wasted time is not actually count but it can be calculated by substracting useful time from the total time. This was implemented like these to avoid substractions that may make the floating point calculations more complicated. Reviewed By: ngorogiannis Differential Revision: D19969960 fbshipit-source-id: 68a1132ca --- infer/src/backend/BackendStats.ml | 102 +++++++++++++++--------- infer/src/backend/BackendStats.mli | 8 +- infer/src/backend/ExecutionDuration.ml | 63 +++++++++++++++ infer/src/backend/ExecutionDuration.mli | 28 +++++++ infer/src/backend/ProcLocker.ml | 18 ++--- infer/src/backend/RestartScheduler.ml | 40 +++++++++- 6 files changed, 206 insertions(+), 53 deletions(-) create mode 100644 infer/src/backend/ExecutionDuration.ml create mode 100644 infer/src/backend/ExecutionDuration.mli diff --git a/infer/src/backend/BackendStats.ml b/infer/src/backend/BackendStats.ml index 9565f4945..aa9815cf5 100644 --- a/infer/src/backend/BackendStats.ml +++ b/infer/src/backend/BackendStats.ml @@ -21,8 +21,10 @@ include struct ; mutable ondemand_procs_analyzed: int ; mutable ondemand_local_cache_hits: int ; mutable ondemand_local_cache_misses: int - ; mutable proc_locker_lock_sys_time: float - ; mutable proc_locker_unlock_sys_time: float } + ; mutable proc_locker_lock_time: ExecutionDuration.t + ; mutable proc_locker_unlock_time: ExecutionDuration.t + ; mutable restart_scheduler_useful_time: ExecutionDuration.t + ; mutable restart_scheduler_total_time: ExecutionDuration.t } [@@deriving fields] end @@ -35,27 +37,25 @@ let global_stats = ; ondemand_procs_analyzed= 0 ; ondemand_local_cache_hits= 0 ; ondemand_local_cache_misses= 0 - ; proc_locker_lock_sys_time= 0. - ; proc_locker_unlock_sys_time= 0. } + ; proc_locker_lock_time= ExecutionDuration.zero + ; proc_locker_unlock_time= ExecutionDuration.zero + ; restart_scheduler_useful_time= ExecutionDuration.zero + ; restart_scheduler_total_time= ExecutionDuration.zero } let get () = global_stats -let incr field = +let update_with field ~f = match Field.setter field with | None -> L.die InternalError "incr on non-mutable field %s" (Field.name field) | Some set -> - set global_stats (Field.get field global_stats + 1) + set global_stats (f (Field.get field global_stats)) -let add field seconds_count = - match Field.setter field with - | None -> - L.die InternalError "add on non-mutable field %s" (Field.name field) - | Some set -> - set global_stats (Field.get field global_stats +. seconds_count) +let incr field = update_with field ~f:(( + ) 1) +let add field exe_duration = update_with field ~f:(ExecutionDuration.add exe_duration) let incr_summary_file_try_load () = incr Fields.summary_file_try_load @@ -73,12 +73,20 @@ let incr_ondemand_local_cache_hits () = incr Fields.ondemand_local_cache_hits let incr_ondemand_local_cache_misses () = incr Fields.ondemand_local_cache_misses -let add_to_proc_locker_lock_sys_time seconds_count = - add Fields.proc_locker_lock_sys_time seconds_count +let add_to_proc_locker_lock_time execution_duration = + add Fields.proc_locker_lock_time execution_duration + + +let add_to_proc_locker_unlock_time execution_duration = + add Fields.proc_locker_unlock_time execution_duration + + +let add_to_restart_scheduler_useful_time execution_duration = + add Fields.restart_scheduler_useful_time execution_duration -let add_to_proc_locker_unlock_sys_time seconds_count = - add Fields.proc_locker_unlock_sys_time seconds_count +let add_to_restart_scheduler_total_time execution_duration = + add Fields.restart_scheduler_total_time execution_duration let copy from ~into : unit = @@ -90,14 +98,16 @@ let copy from ~into : unit = ; ondemand_procs_analyzed ; ondemand_local_cache_hits ; ondemand_local_cache_misses - ; proc_locker_lock_sys_time - ; proc_locker_unlock_sys_time } = + ; proc_locker_lock_time + ; proc_locker_unlock_time + ; restart_scheduler_useful_time + ; restart_scheduler_total_time } = from in Fields.Direct.set_all_mutable_fields into ~summary_file_try_load ~summary_read_from_disk ~summary_cache_hits ~summary_cache_misses ~summary_has_model_queries ~ondemand_procs_analyzed - ~ondemand_local_cache_hits ~ondemand_local_cache_misses ~proc_locker_lock_sys_time - ~proc_locker_unlock_sys_time + ~ondemand_local_cache_hits ~ondemand_local_cache_misses ~proc_locker_lock_time + ~proc_locker_unlock_time ~restart_scheduler_useful_time ~restart_scheduler_total_time let merge stats1 stats2 = @@ -110,9 +120,16 @@ let merge stats1 stats2 = ; ondemand_local_cache_hits= stats1.ondemand_local_cache_hits + stats2.ondemand_local_cache_hits ; ondemand_local_cache_misses= stats1.ondemand_local_cache_misses + stats2.ondemand_local_cache_misses - ; proc_locker_lock_sys_time= stats1.proc_locker_lock_sys_time +. stats2.proc_locker_lock_sys_time - ; proc_locker_unlock_sys_time= - stats1.proc_locker_unlock_sys_time +. stats2.proc_locker_unlock_sys_time } + ; proc_locker_lock_time= + ExecutionDuration.add stats1.proc_locker_lock_time stats2.proc_locker_lock_time + ; proc_locker_unlock_time= + ExecutionDuration.add stats1.proc_locker_unlock_time stats2.proc_locker_unlock_time + ; restart_scheduler_useful_time= + ExecutionDuration.add stats1.restart_scheduler_useful_time + stats2.restart_scheduler_useful_time + ; restart_scheduler_total_time= + ExecutionDuration.add stats1.restart_scheduler_total_time stats2.restart_scheduler_total_time + } let initial = @@ -124,8 +141,10 @@ let initial = ; ondemand_procs_analyzed= 0 ; ondemand_local_cache_hits= 0 ; ondemand_local_cache_misses= 0 - ; proc_locker_lock_sys_time= 0. - ; proc_locker_unlock_sys_time= 0. } + ; proc_locker_lock_time= ExecutionDuration.zero + ; proc_locker_unlock_time= ExecutionDuration.zero + ; restart_scheduler_useful_time= ExecutionDuration.zero + ; restart_scheduler_total_time= ExecutionDuration.zero } let reset () = copy initial ~into:global_stats @@ -138,8 +157,10 @@ let pp f stats = let pp_int_field stats f field = F.fprintf f "%s= %d@;" (Field.name field) (Field.get field stats) in - let pp_float_field stats f field = - F.fprintf f "%s= %.8f@;" (Field.name field) (Field.get field stats) + let pp_execution_duration_field stats f field = + let field_value = Field.get field stats in + let field_name = Field.name field in + F.fprintf f "%a@;" (ExecutionDuration.pp ~field:field_name) field_value in let pp_cache_hits stats cache_misses f cache_hits_field = let cache_hits = Field.get cache_hits_field stats in @@ -154,27 +175,36 @@ let pp f stats = ~ondemand_procs_analyzed:(pp_int_field stats f) ~ondemand_local_cache_hits:(pp_cache_hits stats stats.ondemand_local_cache_misses f) ~ondemand_local_cache_misses:(pp_int_field stats f) - ~proc_locker_lock_sys_time:(pp_float_field stats f) - ~proc_locker_unlock_sys_time:(pp_float_field stats f) + ~proc_locker_lock_time:(pp_execution_duration_field stats f) + ~proc_locker_unlock_time:(pp_execution_duration_field stats f) + ~restart_scheduler_useful_time:(pp_execution_duration_field stats f) + ~restart_scheduler_total_time:(pp_execution_duration_field stats f) in F.fprintf f "@[Backend stats:@\n@[ %t@]@]@." (pp_stats stats) let log_to_scuba stats = let create_counter field = - LogEntry.mk_count ~label:("backend_stats." ^ Field.name field) ~value:(Field.get field stats) + [LogEntry.mk_count ~label:("backend_stats." ^ Field.name field) ~value:(Field.get field stats)] in - let create_float_counter field = - LogEntry.mk_count - ~label:("backend_stats." ^ Field.name field) - ~value:(Field.get field stats |> Int.of_float) + let secs_to_ms s = s *. 1000. |> Float.to_int in + let create_time_entry field = + let field_value = Field.get field stats in + [ LogEntry.mk_time + ~label:("backend_stats." ^ Field.name field ^ "_sys") + ~duration_ms:(ExecutionDuration.sys_time field_value |> secs_to_ms) + ; LogEntry.mk_time + ~label:("backend_stats." ^ Field.name field ^ "_user") + ~duration_ms:(ExecutionDuration.user_time field_value |> secs_to_ms) ] in let entries = Fields.to_list ~summary_file_try_load:create_counter ~summary_read_from_disk:create_counter ~summary_cache_hits:create_counter ~summary_cache_misses:create_counter ~summary_has_model_queries:create_counter ~ondemand_procs_analyzed:create_counter ~ondemand_local_cache_hits:create_counter ~ondemand_local_cache_misses:create_counter - ~proc_locker_lock_sys_time:create_float_counter - ~proc_locker_unlock_sys_time:create_float_counter + ~proc_locker_lock_time:create_time_entry ~proc_locker_unlock_time:create_time_entry + ~restart_scheduler_useful_time:create_time_entry + ~restart_scheduler_total_time:create_time_entry + |> List.concat in ScubaLogging.log_many entries diff --git a/infer/src/backend/BackendStats.mli b/infer/src/backend/BackendStats.mli index 5eab4f877..b32d28aca 100644 --- a/infer/src/backend/BackendStats.mli +++ b/infer/src/backend/BackendStats.mli @@ -31,9 +31,13 @@ val incr_ondemand_local_cache_hits : unit -> unit val incr_ondemand_local_cache_misses : unit -> unit -val add_to_proc_locker_lock_sys_time : float -> unit +val add_to_proc_locker_lock_time : ExecutionDuration.t -> unit -val add_to_proc_locker_unlock_sys_time : float -> unit +val add_to_proc_locker_unlock_time : ExecutionDuration.t -> unit + +val add_to_restart_scheduler_total_time : ExecutionDuration.t -> unit + +val add_to_restart_scheduler_useful_time : ExecutionDuration.t -> unit val reset : unit -> unit (** reset all stats *) diff --git a/infer/src/backend/ExecutionDuration.ml b/infer/src/backend/ExecutionDuration.ml new file mode 100644 index 000000000..565209464 --- /dev/null +++ b/infer/src/backend/ExecutionDuration.ml @@ -0,0 +1,63 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd +module F = Format + +module Duration = struct + type t = {time: float; comp: float} + + let zero = {time= 0.; comp= 0.} + + (* https://en.wikipedia.org/wiki/Kahan_summation_algorithm *) + let neumaier_sum_elapsed d elapsed = + let time = d.time +. elapsed in + let comp = + if Float.(abs d.time >. abs elapsed) then d.comp +. (d.time -. time +. elapsed) + else d.comp +. (elapsed -. time +. d.time) + in + {time; comp} + + + let since from ~now = {time= now -. from; comp= 0.} + + let add d1 d2 = neumaier_sum_elapsed {d1 with comp= d1.comp +. d2.comp} d2.time + + let secs d = d.time +. d.comp +end + +type t = {utime: Duration.t; stime: Duration.t} + +type 'a evaluation_result = {result: 'a; execution_duration: t} + +let zero = {utime= Duration.zero; stime= Duration.zero} + +let since (from : Unix.process_times) = + let now = Unix.times () in + { utime= Duration.since from.tms_utime ~now:now.tms_utime + ; stime= Duration.since from.tms_stime ~now:now.tms_stime } + + +let add exe_d1 exe_d2 = + {utime= Duration.add exe_d1.utime exe_d2.utime; stime= Duration.add exe_d1.stime exe_d2.stime} + + +let add_duration_since exe_duration (from : Unix.process_times) = add exe_duration (since from) + +let user_time exe_duration = Duration.secs exe_duration.utime + +let sys_time exe_duration = Duration.secs exe_duration.stime + +let pp ~field fmt exe_duration = + F.fprintf fmt "%s_utime= %.8f@;%s_stime= %.8f" field (user_time exe_duration) field + (sys_time exe_duration) + + +let timed_evaluate ~f = + let start_time = Unix.times () in + let result = f () in + {result; execution_duration= since start_time} diff --git a/infer/src/backend/ExecutionDuration.mli b/infer/src/backend/ExecutionDuration.mli new file mode 100644 index 000000000..f6938b529 --- /dev/null +++ b/infer/src/backend/ExecutionDuration.mli @@ -0,0 +1,28 @@ +(* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + *) + +open! IStd + +type t + +type 'a evaluation_result = {result: 'a; execution_duration: t} + +val zero : t + +val since : Unix.process_times -> t + +val add_duration_since : t -> Unix.process_times -> t + +val add : t -> t -> t + +val user_time : t -> float + +val sys_time : t -> float + +val pp : field:string -> Format.formatter -> t -> unit + +val timed_evaluate : f:(unit -> 'a) -> 'a evaluation_result diff --git a/infer/src/backend/ProcLocker.ml b/infer/src/backend/ProcLocker.ml index 5cbe014b5..e69323820 100644 --- a/infer/src/backend/ProcLocker.ml +++ b/infer/src/backend/ProcLocker.ml @@ -6,20 +6,16 @@ *) open! IStd -module L = Logging exception UnlockNotLocked of Procname.t -let log_lock_systime = BackendStats.add_to_proc_locker_lock_sys_time +let log_lock_time = BackendStats.add_to_proc_locker_lock_time -let log_unlock_systime = BackendStats.add_to_proc_locker_unlock_sys_time +let log_unlock_time = BackendStats.add_to_proc_locker_unlock_time -let record_sys_time_of ~f ~log_f = - let start_time = Unix.times () in - let result = f () in - let end_time = Unix.times () in - let sys_time_spent = end_time.tms_stime -. start_time.tms_stime in - log_f sys_time_spent ; result +let record_time_of ~f ~log_f = + let ExecutionDuration.{result; execution_duration} = ExecutionDuration.timed_evaluate ~f in + log_f execution_duration ; result let locks_dir = Config.procnames_locks_dir @@ -35,13 +31,13 @@ let clean () = () let filename_from pname = locks_dir ^/ Procname.to_filename pname let unlock pname = - record_sys_time_of ~log_f:log_unlock_systime ~f:(fun () -> + record_time_of ~log_f:log_unlock_time ~f:(fun () -> try Unix.unlink (filename_from pname) with Unix.Unix_error (Unix.ENOENT, _, _) -> raise (UnlockNotLocked pname) ) let try_lock pname = - record_sys_time_of ~log_f:log_lock_systime ~f:(fun () -> + record_time_of ~log_f:log_lock_time ~f:(fun () -> try Unix.symlink ~target:locks_target ~link_name:(filename_from pname) ; true diff --git a/infer/src/backend/RestartScheduler.ml b/infer/src/backend/RestartScheduler.ml index 7d934a245..9e9d46f35 100644 --- a/infer/src/backend/RestartScheduler.ml +++ b/infer/src/backend/RestartScheduler.ml @@ -53,11 +53,37 @@ let if_restart_scheduler f = else match Config.scheduler with File | SyntacticCallGraph -> () | Restart -> f () +type locked_proc = + {pname: Procname.t; start: Unix.process_times; mutable callees_useful: ExecutionDuration.t} + let locked_procs = Stack.create () -let record_locked_proc (pname : Procname.t) = Stack.push locked_procs pname +let record_locked_proc (pname : Procname.t) = + Stack.push locked_procs {pname; start= Unix.times (); callees_useful= ExecutionDuration.zero} + + +let add_to_useful_time (from : Unix.process_times) = + BackendStats.add_to_restart_scheduler_useful_time (ExecutionDuration.since from) + + +let add_to_useful_exe_duration exe_duration = + BackendStats.add_to_restart_scheduler_useful_time exe_duration + + +let add_to_total_time (from : Unix.process_times) = + BackendStats.add_to_restart_scheduler_total_time (ExecutionDuration.since from) + + +let unlock_all () = + Stack.until_empty locked_procs (fun {pname; start; callees_useful} -> + ( match Stack.top locked_procs with + | Some caller -> + caller.callees_useful <- ExecutionDuration.add caller.callees_useful callees_useful + | None -> + add_to_useful_exe_duration callees_useful ; + add_to_total_time start ) ; + ProcLocker.unlock pname ) -let unlock_all () = Stack.until_empty locked_procs ProcLocker.unlock let lock_exn pname = if_restart_scheduler (fun () -> @@ -73,10 +99,16 @@ let unlock pname = | None -> L.die InternalError "Trying to unlock %s but it does not appear to be locked.@." (Procname.to_string pname) - | Some stack_pname when not (Procname.equal pname stack_pname) -> + | Some {pname= stack_pname} when not (Procname.equal pname stack_pname) -> L.die InternalError "Trying to unlock %s but top of stack is %s.@." (Procname.to_string pname) (Procname.to_string stack_pname) - | Some _ -> + | Some {start} -> + ( match Stack.top locked_procs with + | Some caller -> + caller.callees_useful <- + ExecutionDuration.add_duration_since caller.callees_useful start + | None -> + add_to_useful_time start ; add_to_total_time start ) ; ProcLocker.unlock pname )