From 7e902f241d5c569529ea28238fa482fbc3d83022 Mon Sep 17 00:00:00 2001
From: Sungkeun Cho <scho@fb.com>
Date: Mon, 1 Jun 2020 05:50:37 -0700
Subject: [PATCH] [infer] Explicitly exit when SIGINT is given

Summary:
There were two problems: (1) `Signal.Expert.handle` does not call exit by itself; (2) it calls `flush` inside, which introduced deadlock, result in zombie processes.

This diff changes overall processes of the signal handler.

1. It uses `Caml.Sys.set_signal` instead of `Signal.Expert.handle`.
2. Inside the signal handler it raises an exception, then which is catched in `uncaught_exception_handler` of `Config.ml`. Epilogues are executed there.

Reviewed By: jvillard

Differential Revision: D21769246

fbshipit-source-id: cecd998c6
---
 infer/src/base/Epilogues.ml         | 13 ++++---------
 infer/src/base/ProcessPool.ml       |  8 ++++++++
 infer/src/base/ProcessPoolState.ml  |  2 ++
 infer/src/base/ProcessPoolState.mli |  2 ++
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/infer/src/base/Epilogues.ml b/infer/src/base/Epilogues.ml
index 82ec3f7fa..490bd8eff 100644
--- a/infer/src/base/Epilogues.ml
+++ b/infer/src/base/Epilogues.ml
@@ -38,16 +38,11 @@ let run () =
   late ()
 
 
-(* Run the epilogues when we get SIGINT (Control-C). *)
-let () =
-  let run_epilogues_on_signal s =
-    F.eprintf "*** %s: Caught %s, time to die@."
-      (Filename.basename Sys.executable_name)
-      (Signal.to_string s) ;
-    run ()
-  in
-  Signal.Expert.handle Signal.int run_epilogues_on_signal
+(** Raised when we are interrupted by SIGINT *)
+exception Sigint
 
+(* Raise a specific exception when we get SIGINT (Control-C). *)
+let () = Caml.Sys.(set_signal sigint (Signal_handle (fun _ -> raise Sigint)))
 
 let reset () =
   (early_callback := fun () -> ()) ;
diff --git a/infer/src/base/ProcessPool.ml b/infer/src/base/ProcessPool.ml
index 57c595ea2..e41ea2890 100644
--- a/infer/src/base/ProcessPool.ml
+++ b/infer/src/base/ProcessPool.ml
@@ -189,6 +189,7 @@ let killall pool ~slot status =
       try Unix.wait (`Pid pid) |> ignore
       with Unix.Unix_error (ECHILD, _, _) ->
         (* some children may have died already, it's fine *) () ) ;
+  ProcessPoolState.has_running_children := false ;
   L.die InternalError "Subprocess %d: %s" slot status
 
 
@@ -322,6 +323,7 @@ let wait_all pool =
             (* Collect all children errors and die only at the end to avoid creating zombies. *)
             (slot, status) :: errors )
   in
+  ProcessPoolState.has_running_children := false ;
   ( if not (List.is_empty errors) then
     let pp_error f (slot, status) =
       F.fprintf f "Error in infer subprocess %d: %s@." slot
@@ -444,6 +446,12 @@ let create :
         let child_pipe = List.nth_exn children_pipes slot in
         fork_child ~file_lock ~child_prelude ~slot child_pipe ~f ~epilogue:child_epilogue )
   in
+  ProcessPoolState.has_running_children := true ;
+  Epilogues.register ~description:"Wait children processes exit" ~f:(fun () ->
+      if !ProcessPoolState.has_running_children then (
+        Array.iter slots ~f:(fun {pid} ->
+            ignore (Unix.wait (`Pid pid) : Pid.t * Unix.Exit_or_signal.t) ) ;
+        ProcessPoolState.has_running_children := false ) ) ;
   (* we have forked the child processes and are now in the parent *)
   let children_updates = List.map children_pipes ~f:(fun (pipe_child_r, _) -> pipe_child_r) in
   let children_states = Array.create ~len:jobs Initializing in
diff --git a/infer/src/base/ProcessPoolState.ml b/infer/src/base/ProcessPoolState.ml
index c9b22c2eb..d8aac5398 100644
--- a/infer/src/base/ProcessPoolState.ml
+++ b/infer/src/base/ProcessPoolState.ml
@@ -17,3 +17,5 @@ let pid = ref (lazy (Unix.getpid ()))
 let reset_pid () = pid := lazy (Unix.getpid ())
 
 let get_pid () = Lazy.force !pid
+
+let has_running_children = ref false
diff --git a/infer/src/base/ProcessPoolState.mli b/infer/src/base/ProcessPoolState.mli
index b5e845c2d..0b9db78f2 100644
--- a/infer/src/base/ProcessPoolState.mli
+++ b/infer/src/base/ProcessPoolState.mli
@@ -17,3 +17,5 @@ val update_status : (Mtime.t -> string -> unit) ref
 val get_pid : unit -> Pid.t
 
 val reset_pid : unit -> unit
+
+val has_running_children : bool ref