[scheduler][restart] Quantify wasted work

Summary: Count the time used by the `RestartScheduler` for analysis (useful) and the time wasted because a a worker was not able to take a lock to have a metric to compare different versions of the scheduler. The wasted time is not actually count but it can be calculated by substracting useful time from the total time. This was implemented like these to avoid substractions that may make the floating point calculations more complicated.

Reviewed By: ngorogiannis

Differential Revision: D19969960

fbshipit-source-id: 68a1132ca
master
Fernando Gasperi Jabalera 5 years ago committed by Facebook Github Bot
parent 5c5609591e
commit a66f6cd736

@ -21,8 +21,10 @@ include struct
; mutable ondemand_procs_analyzed: int
; mutable ondemand_local_cache_hits: int
; mutable ondemand_local_cache_misses: int
; mutable proc_locker_lock_sys_time: float
; mutable proc_locker_unlock_sys_time: float }
; mutable proc_locker_lock_time: ExecutionDuration.t
; mutable proc_locker_unlock_time: ExecutionDuration.t
; mutable restart_scheduler_useful_time: ExecutionDuration.t
; mutable restart_scheduler_total_time: ExecutionDuration.t }
[@@deriving fields]
end
@ -35,27 +37,25 @@ let global_stats =
; ondemand_procs_analyzed= 0
; ondemand_local_cache_hits= 0
; ondemand_local_cache_misses= 0
; proc_locker_lock_sys_time= 0.
; proc_locker_unlock_sys_time= 0. }
; proc_locker_lock_time= ExecutionDuration.zero
; proc_locker_unlock_time= ExecutionDuration.zero
; restart_scheduler_useful_time= ExecutionDuration.zero
; restart_scheduler_total_time= ExecutionDuration.zero }
let get () = global_stats
let incr field =
let update_with field ~f =
match Field.setter field with
| None ->
L.die InternalError "incr on non-mutable field %s" (Field.name field)
| Some set ->
set global_stats (Field.get field global_stats + 1)
set global_stats (f (Field.get field global_stats))
let add field seconds_count =
match Field.setter field with
| None ->
L.die InternalError "add on non-mutable field %s" (Field.name field)
| Some set ->
set global_stats (Field.get field global_stats +. seconds_count)
let incr field = update_with field ~f:(( + ) 1)
let add field exe_duration = update_with field ~f:(ExecutionDuration.add exe_duration)
let incr_summary_file_try_load () = incr Fields.summary_file_try_load
@ -73,12 +73,20 @@ let incr_ondemand_local_cache_hits () = incr Fields.ondemand_local_cache_hits
let incr_ondemand_local_cache_misses () = incr Fields.ondemand_local_cache_misses
let add_to_proc_locker_lock_sys_time seconds_count =
add Fields.proc_locker_lock_sys_time seconds_count
let add_to_proc_locker_lock_time execution_duration =
add Fields.proc_locker_lock_time execution_duration
let add_to_proc_locker_unlock_time execution_duration =
add Fields.proc_locker_unlock_time execution_duration
let add_to_restart_scheduler_useful_time execution_duration =
add Fields.restart_scheduler_useful_time execution_duration
let add_to_proc_locker_unlock_sys_time seconds_count =
add Fields.proc_locker_unlock_sys_time seconds_count
let add_to_restart_scheduler_total_time execution_duration =
add Fields.restart_scheduler_total_time execution_duration
let copy from ~into : unit =
@ -90,14 +98,16 @@ let copy from ~into : unit =
; ondemand_procs_analyzed
; ondemand_local_cache_hits
; ondemand_local_cache_misses
; proc_locker_lock_sys_time
; proc_locker_unlock_sys_time } =
; proc_locker_lock_time
; proc_locker_unlock_time
; restart_scheduler_useful_time
; restart_scheduler_total_time } =
from
in
Fields.Direct.set_all_mutable_fields into ~summary_file_try_load ~summary_read_from_disk
~summary_cache_hits ~summary_cache_misses ~summary_has_model_queries ~ondemand_procs_analyzed
~ondemand_local_cache_hits ~ondemand_local_cache_misses ~proc_locker_lock_sys_time
~proc_locker_unlock_sys_time
~ondemand_local_cache_hits ~ondemand_local_cache_misses ~proc_locker_lock_time
~proc_locker_unlock_time ~restart_scheduler_useful_time ~restart_scheduler_total_time
let merge stats1 stats2 =
@ -110,9 +120,16 @@ let merge stats1 stats2 =
; ondemand_local_cache_hits= stats1.ondemand_local_cache_hits + stats2.ondemand_local_cache_hits
; ondemand_local_cache_misses=
stats1.ondemand_local_cache_misses + stats2.ondemand_local_cache_misses
; proc_locker_lock_sys_time= stats1.proc_locker_lock_sys_time +. stats2.proc_locker_lock_sys_time
; proc_locker_unlock_sys_time=
stats1.proc_locker_unlock_sys_time +. stats2.proc_locker_unlock_sys_time }
; proc_locker_lock_time=
ExecutionDuration.add stats1.proc_locker_lock_time stats2.proc_locker_lock_time
; proc_locker_unlock_time=
ExecutionDuration.add stats1.proc_locker_unlock_time stats2.proc_locker_unlock_time
; restart_scheduler_useful_time=
ExecutionDuration.add stats1.restart_scheduler_useful_time
stats2.restart_scheduler_useful_time
; restart_scheduler_total_time=
ExecutionDuration.add stats1.restart_scheduler_total_time stats2.restart_scheduler_total_time
}
let initial =
@ -124,8 +141,10 @@ let initial =
; ondemand_procs_analyzed= 0
; ondemand_local_cache_hits= 0
; ondemand_local_cache_misses= 0
; proc_locker_lock_sys_time= 0.
; proc_locker_unlock_sys_time= 0. }
; proc_locker_lock_time= ExecutionDuration.zero
; proc_locker_unlock_time= ExecutionDuration.zero
; restart_scheduler_useful_time= ExecutionDuration.zero
; restart_scheduler_total_time= ExecutionDuration.zero }
let reset () = copy initial ~into:global_stats
@ -138,8 +157,10 @@ let pp f stats =
let pp_int_field stats f field =
F.fprintf f "%s= %d@;" (Field.name field) (Field.get field stats)
in
let pp_float_field stats f field =
F.fprintf f "%s= %.8f@;" (Field.name field) (Field.get field stats)
let pp_execution_duration_field stats f field =
let field_value = Field.get field stats in
let field_name = Field.name field in
F.fprintf f "%a@;" (ExecutionDuration.pp ~field:field_name) field_value
in
let pp_cache_hits stats cache_misses f cache_hits_field =
let cache_hits = Field.get cache_hits_field stats in
@ -154,27 +175,36 @@ let pp f stats =
~ondemand_procs_analyzed:(pp_int_field stats f)
~ondemand_local_cache_hits:(pp_cache_hits stats stats.ondemand_local_cache_misses f)
~ondemand_local_cache_misses:(pp_int_field stats f)
~proc_locker_lock_sys_time:(pp_float_field stats f)
~proc_locker_unlock_sys_time:(pp_float_field stats f)
~proc_locker_lock_time:(pp_execution_duration_field stats f)
~proc_locker_unlock_time:(pp_execution_duration_field stats f)
~restart_scheduler_useful_time:(pp_execution_duration_field stats f)
~restart_scheduler_total_time:(pp_execution_duration_field stats f)
in
F.fprintf f "@[Backend stats:@\n@[<v2> %t@]@]@." (pp_stats stats)
let log_to_scuba stats =
let create_counter field =
LogEntry.mk_count ~label:("backend_stats." ^ Field.name field) ~value:(Field.get field stats)
[LogEntry.mk_count ~label:("backend_stats." ^ Field.name field) ~value:(Field.get field stats)]
in
let create_float_counter field =
LogEntry.mk_count
~label:("backend_stats." ^ Field.name field)
~value:(Field.get field stats |> Int.of_float)
let secs_to_ms s = s *. 1000. |> Float.to_int in
let create_time_entry field =
let field_value = Field.get field stats in
[ LogEntry.mk_time
~label:("backend_stats." ^ Field.name field ^ "_sys")
~duration_ms:(ExecutionDuration.sys_time field_value |> secs_to_ms)
; LogEntry.mk_time
~label:("backend_stats." ^ Field.name field ^ "_user")
~duration_ms:(ExecutionDuration.user_time field_value |> secs_to_ms) ]
in
let entries =
Fields.to_list ~summary_file_try_load:create_counter ~summary_read_from_disk:create_counter
~summary_cache_hits:create_counter ~summary_cache_misses:create_counter
~summary_has_model_queries:create_counter ~ondemand_procs_analyzed:create_counter
~ondemand_local_cache_hits:create_counter ~ondemand_local_cache_misses:create_counter
~proc_locker_lock_sys_time:create_float_counter
~proc_locker_unlock_sys_time:create_float_counter
~proc_locker_lock_time:create_time_entry ~proc_locker_unlock_time:create_time_entry
~restart_scheduler_useful_time:create_time_entry
~restart_scheduler_total_time:create_time_entry
|> List.concat
in
ScubaLogging.log_many entries

@ -31,9 +31,13 @@ val incr_ondemand_local_cache_hits : unit -> unit
val incr_ondemand_local_cache_misses : unit -> unit
val add_to_proc_locker_lock_sys_time : float -> unit
val add_to_proc_locker_lock_time : ExecutionDuration.t -> unit
val add_to_proc_locker_unlock_sys_time : float -> unit
val add_to_proc_locker_unlock_time : ExecutionDuration.t -> unit
val add_to_restart_scheduler_total_time : ExecutionDuration.t -> unit
val add_to_restart_scheduler_useful_time : ExecutionDuration.t -> unit
val reset : unit -> unit
(** reset all stats *)

@ -0,0 +1,63 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
module F = Format
module Duration = struct
type t = {time: float; comp: float}
let zero = {time= 0.; comp= 0.}
(* https://en.wikipedia.org/wiki/Kahan_summation_algorithm *)
let neumaier_sum_elapsed d elapsed =
let time = d.time +. elapsed in
let comp =
if Float.(abs d.time >. abs elapsed) then d.comp +. (d.time -. time +. elapsed)
else d.comp +. (elapsed -. time +. d.time)
in
{time; comp}
let since from ~now = {time= now -. from; comp= 0.}
let add d1 d2 = neumaier_sum_elapsed {d1 with comp= d1.comp +. d2.comp} d2.time
let secs d = d.time +. d.comp
end
type t = {utime: Duration.t; stime: Duration.t}
type 'a evaluation_result = {result: 'a; execution_duration: t}
let zero = {utime= Duration.zero; stime= Duration.zero}
let since (from : Unix.process_times) =
let now = Unix.times () in
{ utime= Duration.since from.tms_utime ~now:now.tms_utime
; stime= Duration.since from.tms_stime ~now:now.tms_stime }
let add exe_d1 exe_d2 =
{utime= Duration.add exe_d1.utime exe_d2.utime; stime= Duration.add exe_d1.stime exe_d2.stime}
let add_duration_since exe_duration (from : Unix.process_times) = add exe_duration (since from)
let user_time exe_duration = Duration.secs exe_duration.utime
let sys_time exe_duration = Duration.secs exe_duration.stime
let pp ~field fmt exe_duration =
F.fprintf fmt "%s_utime= %.8f@;%s_stime= %.8f" field (user_time exe_duration) field
(sys_time exe_duration)
let timed_evaluate ~f =
let start_time = Unix.times () in
let result = f () in
{result; execution_duration= since start_time}

@ -0,0 +1,28 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
type t
type 'a evaluation_result = {result: 'a; execution_duration: t}
val zero : t
val since : Unix.process_times -> t
val add_duration_since : t -> Unix.process_times -> t
val add : t -> t -> t
val user_time : t -> float
val sys_time : t -> float
val pp : field:string -> Format.formatter -> t -> unit
val timed_evaluate : f:(unit -> 'a) -> 'a evaluation_result

@ -6,20 +6,16 @@
*)
open! IStd
module L = Logging
exception UnlockNotLocked of Procname.t
let log_lock_systime = BackendStats.add_to_proc_locker_lock_sys_time
let log_lock_time = BackendStats.add_to_proc_locker_lock_time
let log_unlock_systime = BackendStats.add_to_proc_locker_unlock_sys_time
let log_unlock_time = BackendStats.add_to_proc_locker_unlock_time
let record_sys_time_of ~f ~log_f =
let start_time = Unix.times () in
let result = f () in
let end_time = Unix.times () in
let sys_time_spent = end_time.tms_stime -. start_time.tms_stime in
log_f sys_time_spent ; result
let record_time_of ~f ~log_f =
let ExecutionDuration.{result; execution_duration} = ExecutionDuration.timed_evaluate ~f in
log_f execution_duration ; result
let locks_dir = Config.procnames_locks_dir
@ -35,13 +31,13 @@ let clean () = ()
let filename_from pname = locks_dir ^/ Procname.to_filename pname
let unlock pname =
record_sys_time_of ~log_f:log_unlock_systime ~f:(fun () ->
record_time_of ~log_f:log_unlock_time ~f:(fun () ->
try Unix.unlink (filename_from pname)
with Unix.Unix_error (Unix.ENOENT, _, _) -> raise (UnlockNotLocked pname) )
let try_lock pname =
record_sys_time_of ~log_f:log_lock_systime ~f:(fun () ->
record_time_of ~log_f:log_lock_time ~f:(fun () ->
try
Unix.symlink ~target:locks_target ~link_name:(filename_from pname) ;
true

@ -53,11 +53,37 @@ let if_restart_scheduler f =
else match Config.scheduler with File | SyntacticCallGraph -> () | Restart -> f ()
type locked_proc =
{pname: Procname.t; start: Unix.process_times; mutable callees_useful: ExecutionDuration.t}
let locked_procs = Stack.create ()
let record_locked_proc (pname : Procname.t) = Stack.push locked_procs pname
let record_locked_proc (pname : Procname.t) =
Stack.push locked_procs {pname; start= Unix.times (); callees_useful= ExecutionDuration.zero}
let add_to_useful_time (from : Unix.process_times) =
BackendStats.add_to_restart_scheduler_useful_time (ExecutionDuration.since from)
let add_to_useful_exe_duration exe_duration =
BackendStats.add_to_restart_scheduler_useful_time exe_duration
let add_to_total_time (from : Unix.process_times) =
BackendStats.add_to_restart_scheduler_total_time (ExecutionDuration.since from)
let unlock_all () =
Stack.until_empty locked_procs (fun {pname; start; callees_useful} ->
( match Stack.top locked_procs with
| Some caller ->
caller.callees_useful <- ExecutionDuration.add caller.callees_useful callees_useful
| None ->
add_to_useful_exe_duration callees_useful ;
add_to_total_time start ) ;
ProcLocker.unlock pname )
let unlock_all () = Stack.until_empty locked_procs ProcLocker.unlock
let lock_exn pname =
if_restart_scheduler (fun () ->
@ -73,10 +99,16 @@ let unlock pname =
| None ->
L.die InternalError "Trying to unlock %s but it does not appear to be locked.@."
(Procname.to_string pname)
| Some stack_pname when not (Procname.equal pname stack_pname) ->
| Some {pname= stack_pname} when not (Procname.equal pname stack_pname) ->
L.die InternalError "Trying to unlock %s but top of stack is %s.@."
(Procname.to_string pname) (Procname.to_string stack_pname)
| Some _ ->
| Some {start} ->
( match Stack.top locked_procs with
| Some caller ->
caller.callees_useful <-
ExecutionDuration.add_duration_since caller.callees_useful start
| None ->
add_to_useful_time start ; add_to_total_time start ) ;
ProcLocker.unlock pname )

Loading…
Cancel
Save