[scheduler][restart] Quantify wasted work

Summary: Count the time used by the `RestartScheduler` for analysis (useful) and the time wasted because a a worker was not able to take a lock to have a metric to compare different versions of the scheduler. The wasted time is not actually count but it can be calculated by substracting useful time from the total time. This was implemented like these to avoid substractions that may make the floating point calculations more complicated.

Reviewed By: ngorogiannis

Differential Revision: D19969960

fbshipit-source-id: 68a1132ca
master
Fernando Gasperi Jabalera 5 years ago committed by Facebook Github Bot
parent 5c5609591e
commit a66f6cd736

@ -21,8 +21,10 @@ include struct
; mutable ondemand_procs_analyzed: int ; mutable ondemand_procs_analyzed: int
; mutable ondemand_local_cache_hits: int ; mutable ondemand_local_cache_hits: int
; mutable ondemand_local_cache_misses: int ; mutable ondemand_local_cache_misses: int
; mutable proc_locker_lock_sys_time: float ; mutable proc_locker_lock_time: ExecutionDuration.t
; mutable proc_locker_unlock_sys_time: float } ; mutable proc_locker_unlock_time: ExecutionDuration.t
; mutable restart_scheduler_useful_time: ExecutionDuration.t
; mutable restart_scheduler_total_time: ExecutionDuration.t }
[@@deriving fields] [@@deriving fields]
end end
@ -35,27 +37,25 @@ let global_stats =
; ondemand_procs_analyzed= 0 ; ondemand_procs_analyzed= 0
; ondemand_local_cache_hits= 0 ; ondemand_local_cache_hits= 0
; ondemand_local_cache_misses= 0 ; ondemand_local_cache_misses= 0
; proc_locker_lock_sys_time= 0. ; proc_locker_lock_time= ExecutionDuration.zero
; proc_locker_unlock_sys_time= 0. } ; proc_locker_unlock_time= ExecutionDuration.zero
; restart_scheduler_useful_time= ExecutionDuration.zero
; restart_scheduler_total_time= ExecutionDuration.zero }
let get () = global_stats let get () = global_stats
let incr field = let update_with field ~f =
match Field.setter field with match Field.setter field with
| None -> | None ->
L.die InternalError "incr on non-mutable field %s" (Field.name field) L.die InternalError "incr on non-mutable field %s" (Field.name field)
| Some set -> | Some set ->
set global_stats (Field.get field global_stats + 1) set global_stats (f (Field.get field global_stats))
let add field seconds_count = let incr field = update_with field ~f:(( + ) 1)
match Field.setter field with
| None ->
L.die InternalError "add on non-mutable field %s" (Field.name field)
| Some set ->
set global_stats (Field.get field global_stats +. seconds_count)
let add field exe_duration = update_with field ~f:(ExecutionDuration.add exe_duration)
let incr_summary_file_try_load () = incr Fields.summary_file_try_load let incr_summary_file_try_load () = incr Fields.summary_file_try_load
@ -73,12 +73,20 @@ let incr_ondemand_local_cache_hits () = incr Fields.ondemand_local_cache_hits
let incr_ondemand_local_cache_misses () = incr Fields.ondemand_local_cache_misses let incr_ondemand_local_cache_misses () = incr Fields.ondemand_local_cache_misses
let add_to_proc_locker_lock_sys_time seconds_count = let add_to_proc_locker_lock_time execution_duration =
add Fields.proc_locker_lock_sys_time seconds_count add Fields.proc_locker_lock_time execution_duration
let add_to_proc_locker_unlock_time execution_duration =
add Fields.proc_locker_unlock_time execution_duration
let add_to_restart_scheduler_useful_time execution_duration =
add Fields.restart_scheduler_useful_time execution_duration
let add_to_proc_locker_unlock_sys_time seconds_count = let add_to_restart_scheduler_total_time execution_duration =
add Fields.proc_locker_unlock_sys_time seconds_count add Fields.restart_scheduler_total_time execution_duration
let copy from ~into : unit = let copy from ~into : unit =
@ -90,14 +98,16 @@ let copy from ~into : unit =
; ondemand_procs_analyzed ; ondemand_procs_analyzed
; ondemand_local_cache_hits ; ondemand_local_cache_hits
; ondemand_local_cache_misses ; ondemand_local_cache_misses
; proc_locker_lock_sys_time ; proc_locker_lock_time
; proc_locker_unlock_sys_time } = ; proc_locker_unlock_time
; restart_scheduler_useful_time
; restart_scheduler_total_time } =
from from
in in
Fields.Direct.set_all_mutable_fields into ~summary_file_try_load ~summary_read_from_disk Fields.Direct.set_all_mutable_fields into ~summary_file_try_load ~summary_read_from_disk
~summary_cache_hits ~summary_cache_misses ~summary_has_model_queries ~ondemand_procs_analyzed ~summary_cache_hits ~summary_cache_misses ~summary_has_model_queries ~ondemand_procs_analyzed
~ondemand_local_cache_hits ~ondemand_local_cache_misses ~proc_locker_lock_sys_time ~ondemand_local_cache_hits ~ondemand_local_cache_misses ~proc_locker_lock_time
~proc_locker_unlock_sys_time ~proc_locker_unlock_time ~restart_scheduler_useful_time ~restart_scheduler_total_time
let merge stats1 stats2 = let merge stats1 stats2 =
@ -110,9 +120,16 @@ let merge stats1 stats2 =
; ondemand_local_cache_hits= stats1.ondemand_local_cache_hits + stats2.ondemand_local_cache_hits ; ondemand_local_cache_hits= stats1.ondemand_local_cache_hits + stats2.ondemand_local_cache_hits
; ondemand_local_cache_misses= ; ondemand_local_cache_misses=
stats1.ondemand_local_cache_misses + stats2.ondemand_local_cache_misses stats1.ondemand_local_cache_misses + stats2.ondemand_local_cache_misses
; proc_locker_lock_sys_time= stats1.proc_locker_lock_sys_time +. stats2.proc_locker_lock_sys_time ; proc_locker_lock_time=
; proc_locker_unlock_sys_time= ExecutionDuration.add stats1.proc_locker_lock_time stats2.proc_locker_lock_time
stats1.proc_locker_unlock_sys_time +. stats2.proc_locker_unlock_sys_time } ; proc_locker_unlock_time=
ExecutionDuration.add stats1.proc_locker_unlock_time stats2.proc_locker_unlock_time
; restart_scheduler_useful_time=
ExecutionDuration.add stats1.restart_scheduler_useful_time
stats2.restart_scheduler_useful_time
; restart_scheduler_total_time=
ExecutionDuration.add stats1.restart_scheduler_total_time stats2.restart_scheduler_total_time
}
let initial = let initial =
@ -124,8 +141,10 @@ let initial =
; ondemand_procs_analyzed= 0 ; ondemand_procs_analyzed= 0
; ondemand_local_cache_hits= 0 ; ondemand_local_cache_hits= 0
; ondemand_local_cache_misses= 0 ; ondemand_local_cache_misses= 0
; proc_locker_lock_sys_time= 0. ; proc_locker_lock_time= ExecutionDuration.zero
; proc_locker_unlock_sys_time= 0. } ; proc_locker_unlock_time= ExecutionDuration.zero
; restart_scheduler_useful_time= ExecutionDuration.zero
; restart_scheduler_total_time= ExecutionDuration.zero }
let reset () = copy initial ~into:global_stats let reset () = copy initial ~into:global_stats
@ -138,8 +157,10 @@ let pp f stats =
let pp_int_field stats f field = let pp_int_field stats f field =
F.fprintf f "%s= %d@;" (Field.name field) (Field.get field stats) F.fprintf f "%s= %d@;" (Field.name field) (Field.get field stats)
in in
let pp_float_field stats f field = let pp_execution_duration_field stats f field =
F.fprintf f "%s= %.8f@;" (Field.name field) (Field.get field stats) let field_value = Field.get field stats in
let field_name = Field.name field in
F.fprintf f "%a@;" (ExecutionDuration.pp ~field:field_name) field_value
in in
let pp_cache_hits stats cache_misses f cache_hits_field = let pp_cache_hits stats cache_misses f cache_hits_field =
let cache_hits = Field.get cache_hits_field stats in let cache_hits = Field.get cache_hits_field stats in
@ -154,27 +175,36 @@ let pp f stats =
~ondemand_procs_analyzed:(pp_int_field stats f) ~ondemand_procs_analyzed:(pp_int_field stats f)
~ondemand_local_cache_hits:(pp_cache_hits stats stats.ondemand_local_cache_misses f) ~ondemand_local_cache_hits:(pp_cache_hits stats stats.ondemand_local_cache_misses f)
~ondemand_local_cache_misses:(pp_int_field stats f) ~ondemand_local_cache_misses:(pp_int_field stats f)
~proc_locker_lock_sys_time:(pp_float_field stats f) ~proc_locker_lock_time:(pp_execution_duration_field stats f)
~proc_locker_unlock_sys_time:(pp_float_field stats f) ~proc_locker_unlock_time:(pp_execution_duration_field stats f)
~restart_scheduler_useful_time:(pp_execution_duration_field stats f)
~restart_scheduler_total_time:(pp_execution_duration_field stats f)
in in
F.fprintf f "@[Backend stats:@\n@[<v2> %t@]@]@." (pp_stats stats) F.fprintf f "@[Backend stats:@\n@[<v2> %t@]@]@." (pp_stats stats)
let log_to_scuba stats = let log_to_scuba stats =
let create_counter field = let create_counter field =
LogEntry.mk_count ~label:("backend_stats." ^ Field.name field) ~value:(Field.get field stats) [LogEntry.mk_count ~label:("backend_stats." ^ Field.name field) ~value:(Field.get field stats)]
in in
let create_float_counter field = let secs_to_ms s = s *. 1000. |> Float.to_int in
LogEntry.mk_count let create_time_entry field =
~label:("backend_stats." ^ Field.name field) let field_value = Field.get field stats in
~value:(Field.get field stats |> Int.of_float) [ LogEntry.mk_time
~label:("backend_stats." ^ Field.name field ^ "_sys")
~duration_ms:(ExecutionDuration.sys_time field_value |> secs_to_ms)
; LogEntry.mk_time
~label:("backend_stats." ^ Field.name field ^ "_user")
~duration_ms:(ExecutionDuration.user_time field_value |> secs_to_ms) ]
in in
let entries = let entries =
Fields.to_list ~summary_file_try_load:create_counter ~summary_read_from_disk:create_counter Fields.to_list ~summary_file_try_load:create_counter ~summary_read_from_disk:create_counter
~summary_cache_hits:create_counter ~summary_cache_misses:create_counter ~summary_cache_hits:create_counter ~summary_cache_misses:create_counter
~summary_has_model_queries:create_counter ~ondemand_procs_analyzed:create_counter ~summary_has_model_queries:create_counter ~ondemand_procs_analyzed:create_counter
~ondemand_local_cache_hits:create_counter ~ondemand_local_cache_misses:create_counter ~ondemand_local_cache_hits:create_counter ~ondemand_local_cache_misses:create_counter
~proc_locker_lock_sys_time:create_float_counter ~proc_locker_lock_time:create_time_entry ~proc_locker_unlock_time:create_time_entry
~proc_locker_unlock_sys_time:create_float_counter ~restart_scheduler_useful_time:create_time_entry
~restart_scheduler_total_time:create_time_entry
|> List.concat
in in
ScubaLogging.log_many entries ScubaLogging.log_many entries

@ -31,9 +31,13 @@ val incr_ondemand_local_cache_hits : unit -> unit
val incr_ondemand_local_cache_misses : unit -> unit val incr_ondemand_local_cache_misses : unit -> unit
val add_to_proc_locker_lock_sys_time : float -> unit val add_to_proc_locker_lock_time : ExecutionDuration.t -> unit
val add_to_proc_locker_unlock_sys_time : float -> unit val add_to_proc_locker_unlock_time : ExecutionDuration.t -> unit
val add_to_restart_scheduler_total_time : ExecutionDuration.t -> unit
val add_to_restart_scheduler_useful_time : ExecutionDuration.t -> unit
val reset : unit -> unit val reset : unit -> unit
(** reset all stats *) (** reset all stats *)

@ -0,0 +1,63 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
module F = Format
module Duration = struct
type t = {time: float; comp: float}
let zero = {time= 0.; comp= 0.}
(* https://en.wikipedia.org/wiki/Kahan_summation_algorithm *)
let neumaier_sum_elapsed d elapsed =
let time = d.time +. elapsed in
let comp =
if Float.(abs d.time >. abs elapsed) then d.comp +. (d.time -. time +. elapsed)
else d.comp +. (elapsed -. time +. d.time)
in
{time; comp}
let since from ~now = {time= now -. from; comp= 0.}
let add d1 d2 = neumaier_sum_elapsed {d1 with comp= d1.comp +. d2.comp} d2.time
let secs d = d.time +. d.comp
end
type t = {utime: Duration.t; stime: Duration.t}
type 'a evaluation_result = {result: 'a; execution_duration: t}
let zero = {utime= Duration.zero; stime= Duration.zero}
let since (from : Unix.process_times) =
let now = Unix.times () in
{ utime= Duration.since from.tms_utime ~now:now.tms_utime
; stime= Duration.since from.tms_stime ~now:now.tms_stime }
let add exe_d1 exe_d2 =
{utime= Duration.add exe_d1.utime exe_d2.utime; stime= Duration.add exe_d1.stime exe_d2.stime}
let add_duration_since exe_duration (from : Unix.process_times) = add exe_duration (since from)
let user_time exe_duration = Duration.secs exe_duration.utime
let sys_time exe_duration = Duration.secs exe_duration.stime
let pp ~field fmt exe_duration =
F.fprintf fmt "%s_utime= %.8f@;%s_stime= %.8f" field (user_time exe_duration) field
(sys_time exe_duration)
let timed_evaluate ~f =
let start_time = Unix.times () in
let result = f () in
{result; execution_duration= since start_time}

@ -0,0 +1,28 @@
(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
open! IStd
type t
type 'a evaluation_result = {result: 'a; execution_duration: t}
val zero : t
val since : Unix.process_times -> t
val add_duration_since : t -> Unix.process_times -> t
val add : t -> t -> t
val user_time : t -> float
val sys_time : t -> float
val pp : field:string -> Format.formatter -> t -> unit
val timed_evaluate : f:(unit -> 'a) -> 'a evaluation_result

@ -6,20 +6,16 @@
*) *)
open! IStd open! IStd
module L = Logging
exception UnlockNotLocked of Procname.t exception UnlockNotLocked of Procname.t
let log_lock_systime = BackendStats.add_to_proc_locker_lock_sys_time let log_lock_time = BackendStats.add_to_proc_locker_lock_time
let log_unlock_systime = BackendStats.add_to_proc_locker_unlock_sys_time let log_unlock_time = BackendStats.add_to_proc_locker_unlock_time
let record_sys_time_of ~f ~log_f = let record_time_of ~f ~log_f =
let start_time = Unix.times () in let ExecutionDuration.{result; execution_duration} = ExecutionDuration.timed_evaluate ~f in
let result = f () in log_f execution_duration ; result
let end_time = Unix.times () in
let sys_time_spent = end_time.tms_stime -. start_time.tms_stime in
log_f sys_time_spent ; result
let locks_dir = Config.procnames_locks_dir let locks_dir = Config.procnames_locks_dir
@ -35,13 +31,13 @@ let clean () = ()
let filename_from pname = locks_dir ^/ Procname.to_filename pname let filename_from pname = locks_dir ^/ Procname.to_filename pname
let unlock pname = let unlock pname =
record_sys_time_of ~log_f:log_unlock_systime ~f:(fun () -> record_time_of ~log_f:log_unlock_time ~f:(fun () ->
try Unix.unlink (filename_from pname) try Unix.unlink (filename_from pname)
with Unix.Unix_error (Unix.ENOENT, _, _) -> raise (UnlockNotLocked pname) ) with Unix.Unix_error (Unix.ENOENT, _, _) -> raise (UnlockNotLocked pname) )
let try_lock pname = let try_lock pname =
record_sys_time_of ~log_f:log_lock_systime ~f:(fun () -> record_time_of ~log_f:log_lock_time ~f:(fun () ->
try try
Unix.symlink ~target:locks_target ~link_name:(filename_from pname) ; Unix.symlink ~target:locks_target ~link_name:(filename_from pname) ;
true true

@ -53,11 +53,37 @@ let if_restart_scheduler f =
else match Config.scheduler with File | SyntacticCallGraph -> () | Restart -> f () else match Config.scheduler with File | SyntacticCallGraph -> () | Restart -> f ()
type locked_proc =
{pname: Procname.t; start: Unix.process_times; mutable callees_useful: ExecutionDuration.t}
let locked_procs = Stack.create () let locked_procs = Stack.create ()
let record_locked_proc (pname : Procname.t) = Stack.push locked_procs pname let record_locked_proc (pname : Procname.t) =
Stack.push locked_procs {pname; start= Unix.times (); callees_useful= ExecutionDuration.zero}
let add_to_useful_time (from : Unix.process_times) =
BackendStats.add_to_restart_scheduler_useful_time (ExecutionDuration.since from)
let add_to_useful_exe_duration exe_duration =
BackendStats.add_to_restart_scheduler_useful_time exe_duration
let add_to_total_time (from : Unix.process_times) =
BackendStats.add_to_restart_scheduler_total_time (ExecutionDuration.since from)
let unlock_all () =
Stack.until_empty locked_procs (fun {pname; start; callees_useful} ->
( match Stack.top locked_procs with
| Some caller ->
caller.callees_useful <- ExecutionDuration.add caller.callees_useful callees_useful
| None ->
add_to_useful_exe_duration callees_useful ;
add_to_total_time start ) ;
ProcLocker.unlock pname )
let unlock_all () = Stack.until_empty locked_procs ProcLocker.unlock
let lock_exn pname = let lock_exn pname =
if_restart_scheduler (fun () -> if_restart_scheduler (fun () ->
@ -73,10 +99,16 @@ let unlock pname =
| None -> | None ->
L.die InternalError "Trying to unlock %s but it does not appear to be locked.@." L.die InternalError "Trying to unlock %s but it does not appear to be locked.@."
(Procname.to_string pname) (Procname.to_string pname)
| Some stack_pname when not (Procname.equal pname stack_pname) -> | Some {pname= stack_pname} when not (Procname.equal pname stack_pname) ->
L.die InternalError "Trying to unlock %s but top of stack is %s.@." L.die InternalError "Trying to unlock %s but top of stack is %s.@."
(Procname.to_string pname) (Procname.to_string stack_pname) (Procname.to_string pname) (Procname.to_string stack_pname)
| Some _ -> | Some {start} ->
( match Stack.top locked_procs with
| Some caller ->
caller.callees_useful <-
ExecutionDuration.add_duration_since caller.callees_useful start
| None ->
add_to_useful_time start ; add_to_total_time start ) ;
ProcLocker.unlock pname ) ProcLocker.unlock pname )

Loading…
Cancel
Save