diff options
Diffstat (limited to 'src/main/cpp')
-rw-r--r-- | src/main/cpp/blaze.cc | 30 | ||||
-rw-r--r-- | src/main/cpp/blaze_util.cc | 45 | ||||
-rw-r--r-- | src/main/cpp/blaze_util.h | 24 | ||||
-rw-r--r-- | src/main/cpp/blaze_util_darwin.cc | 15 | ||||
-rw-r--r-- | src/main/cpp/blaze_util_freebsd.cc | 15 | ||||
-rw-r--r-- | src/main/cpp/blaze_util_linux.cc | 19 | ||||
-rw-r--r-- | src/main/cpp/blaze_util_platform.h | 10 | ||||
-rw-r--r-- | src/main/cpp/blaze_util_posix.cc | 24 | ||||
-rw-r--r-- | src/main/cpp/blaze_util_windows.cc | 23 |
9 files changed, 145 insertions, 60 deletions
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc index 6809b0b7f7..3571b236d2 100644 --- a/src/main/cpp/blaze.cc +++ b/src/main/cpp/blaze.cc @@ -731,9 +731,8 @@ static void StartServerAndConnect(const WorkspaceLayout *workspace_layout, // disaster. int server_pid = GetServerPid(server_dir); if (server_pid > 0) { - if (VerifyServerProcess(server_pid, globals->options->output_base, - globals->options->install_base)) { - if (KillServerProcess(server_pid)) { + if (VerifyServerProcess(server_pid, globals->options->output_base)) { + if (KillServerProcess(server_pid, globals->options->output_base)) { fprintf(stderr, "Killed non-responsive server process (pid=%d)\n", server_pid); SetRestartReasonIfNotSet(SERVER_UNRESPONSIVE); @@ -1458,8 +1457,7 @@ bool GrpcBlazeServer::Connect() { return false; } - if (!VerifyServerProcess(server_pid, globals->options->output_base, - globals->options->install_base)) { + if (!VerifyServerProcess(server_pid, globals->options->output_base)) { return false; } @@ -1587,11 +1585,13 @@ void GrpcBlazeServer::KillRunningServer() { while (reader->Read(&response)) { } - // Kill the server process for good measure (if we know the server PID) + // Wait for the server process to terminate (if we know the server PID). + // If it does not terminate itself gracefully within 1m, terminate it. if (globals->server_pid > 0 && - VerifyServerProcess(globals->server_pid, globals->options->output_base, - globals->options->install_base)) { - KillServerProcess(globals->server_pid); + !AwaitServerProcessTermination(globals->server_pid, + globals->options->output_base, + kPostShutdownGracePeriodSeconds)) { + KillServerProcess(globals->server_pid, globals->options->output_base); } connected_ = false; @@ -1599,6 +1599,7 @@ void GrpcBlazeServer::KillRunningServer() { unsigned int GrpcBlazeServer::Communicate() { assert(connected_); + assert(globals->server_pid > 0); vector<string> arg_vector; string command = globals->option_processor->GetCommand(); @@ -1643,6 +1644,7 @@ unsigned int GrpcBlazeServer::Communicate() { int exit_code = -1; bool finished = false; bool finished_warning_emitted = false; + bool termination_expected = false; while (reader->Read(&response)) { if (finished && !finished_warning_emitted) { @@ -1659,6 +1661,7 @@ unsigned int GrpcBlazeServer::Communicate() { if (response.finished()) { exit_code = response.exit_code(); + termination_expected = response.termination_expected(); finished = true; } @@ -1694,6 +1697,15 @@ unsigned int GrpcBlazeServer::Communicate() { } } + // If the server has shut down, but does not terminate itself within a 1m + // grace period, terminate it. + if (termination_expected && + !AwaitServerProcessTermination(globals->server_pid, + globals->options->output_base, + kPostShutdownGracePeriodSeconds)) { + KillServerProcess(globals->server_pid, globals->options->output_base); + } + SendAction(CancelThreadAction::JOIN); cancel_thread.join(); diff --git a/src/main/cpp/blaze_util.cc b/src/main/cpp/blaze_util.cc index 2831ead175..be3e0ebfd9 100644 --- a/src/main/cpp/blaze_util.cc +++ b/src/main/cpp/blaze_util.cc @@ -37,6 +37,10 @@ using std::vector; const char kServerPidFile[] = "server.pid.txt"; +const unsigned int kPostShutdownGracePeriodSeconds = 60; + +const unsigned int kPostKillGracePeriodSeconds = 10; + string MakeAbsolute(const string &path) { if (path.empty()) { return blaze_util::GetCwd(); @@ -163,4 +167,45 @@ bool IsArg(const string& arg) { && (arg != "-help") && (arg != "-h"); } +void LogWait(unsigned int elapsed_seconds, unsigned int wait_seconds) { + SigPrintf("WARNING: Waiting for server process to terminate " + "(waited %d seconds, waiting at most %d)\n", + elapsed_seconds, wait_seconds); +} + +bool AwaitServerProcessTermination(int pid, const string& output_base, + unsigned int wait_seconds) { + uint64_t st = GetMillisecondsMonotonic(); + const unsigned int first_seconds = 5; + bool logged_first = false; + const unsigned int second_seconds = 10; + bool logged_second = false; + const unsigned int third_seconds = 30; + bool logged_third = false; + + while (VerifyServerProcess(pid, output_base)) { + TrySleep(100); + uint64_t elapsed_millis = GetMillisecondsMonotonic() - st; + if (!logged_first && elapsed_millis > first_seconds * 1000) { + LogWait(first_seconds, wait_seconds); + logged_first = true; + } + if (!logged_second && elapsed_millis > second_seconds * 1000) { + LogWait(second_seconds, wait_seconds); + logged_second = true; + } + if (!logged_third && elapsed_millis > third_seconds * 1000) { + LogWait(third_seconds, wait_seconds); + logged_third = true; + } + if (elapsed_millis > wait_seconds * 1000) { + SigPrintf("INFO: Waited %d seconds for server process (pid=%d) to" + " terminate.\n", + wait_seconds, pid); + return false; + } + } + return true; +} + } // namespace blaze diff --git a/src/main/cpp/blaze_util.h b/src/main/cpp/blaze_util.h index 4c8d6c291e..f2c3a2f187 100644 --- a/src/main/cpp/blaze_util.h +++ b/src/main/cpp/blaze_util.h @@ -52,13 +52,14 @@ bool GetNullaryOption(const char *arg, const char *key); // Searches for 'key' in 'args' using GetUnaryOption. Arguments found after '--' // are omitted from the search. -// Returns true iff key is a flag in args. +// Returns the value of the 'key' flag iff it occurs in args. +// Returns NULL otherwise. const char* SearchUnaryOption(const std::vector<std::string>& args, const char* key); // Searches for 'key' in 'args' using GetNullaryOption. Arguments found after // '--' are omitted from the search. -// Returns the value of the 'key' flag iff it occurs in args. +// Returns true iff key is a flag in args. bool SearchNullaryOption(const std::vector<std::string>& args, const char* key); @@ -82,6 +83,25 @@ bool CheckJavaVersionIsAtLeast(const std::string &jvm_version, // Returns true iff arg is a valid command line argument for bazel. bool IsArg(const std::string& arg); +// Wait to see if the server process terminates. Checks the server's status +// immediately, and repeats the check every 100ms until approximately +// wait_seconds elapses or the server process terminates. Returns true if a +// check sees that the server process terminated. Logs to stderr after 5, 10, +// and 30 seconds if the wait lasts that long. +bool AwaitServerProcessTermination(int pid, const std::string& output_base, + unsigned int wait_seconds); + +// The number of seconds the client will wait for the server process to +// terminate itself after the client receives the final response from a command +// that shuts down the server. After waiting this time, if the server process +// remains, the client will forcibly kill the server. +extern const unsigned int kPostShutdownGracePeriodSeconds; + +// The number of seconds the client will wait for the server process to +// terminate after the client forcibly kills the server. After waiting this +// time, if the server process remains, the client will die. +extern const unsigned int kPostKillGracePeriodSeconds; + // Returns the string representation of `value`. // Workaround for mingw where std::to_string is not implemented. // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52015. diff --git a/src/main/cpp/blaze_util_darwin.cc b/src/main/cpp/blaze_util_darwin.cc index 075b48a1e0..008b721e0a 100644 --- a/src/main/cpp/blaze_util_darwin.cc +++ b/src/main/cpp/blaze_util_darwin.cc @@ -196,16 +196,11 @@ void WriteSystemSpecificProcessIdentifier( const string& server_dir, pid_t server_pid) { } -bool VerifyServerProcess( - int pid, const string& output_base, const string& install_base) { - // TODO(lberki): This might accidentally kill an unrelated process if the - // server died and the PID got reused. - return true; -} - -bool KillServerProcess(int pid) { - killpg(pid, SIGKILL); - return true; +bool VerifyServerProcess(int pid, const string &output_base) { + // TODO(lberki): This only checks for the process's existence, not whether + // its start time matches. Therefore this might accidentally kill an + // unrelated process if the server died and the PID got reused. + return killpg(pid, 0) == 0; } // Sets a flag on path to exclude the path from Apple's automatic backup service diff --git a/src/main/cpp/blaze_util_freebsd.cc b/src/main/cpp/blaze_util_freebsd.cc index 213af024e5..4ff2278b54 100644 --- a/src/main/cpp/blaze_util_freebsd.cc +++ b/src/main/cpp/blaze_util_freebsd.cc @@ -152,16 +152,11 @@ void WriteSystemSpecificProcessIdentifier( const string& server_dir, pid_t server_pid) { } -bool VerifyServerProcess( - int pid, const string& output_base, const string& install_base) { - // TODO(lberki): This might accidentally kill an unrelated process if the - // server died and the PID got reused. - return true; -} - -bool KillServerProcess(int pid) { - killpg(pid, SIGKILL); - return true; +bool VerifyServerProcess(int pid, const string &output_base) { + // TODO(lberki): This only checks for the process's existence, not whether + // its start time matches. Therefore this might accidentally kill an + // unrelated process if the server died and the PID got reused. + return killpg(pid, 0) == 0; } // Not supported. diff --git a/src/main/cpp/blaze_util_linux.cc b/src/main/cpp/blaze_util_linux.cc index 2e5e07c04f..72eccb2d9a 100644 --- a/src/main/cpp/blaze_util_linux.cc +++ b/src/main/cpp/blaze_util_linux.cc @@ -234,8 +234,7 @@ void WriteSystemSpecificProcessIdentifier( // On Linux we use a combination of PID and start time to identify the server // process. That is supposed to be unique unless one can start more processes // than there are PIDs available within a single jiffy. -bool VerifyServerProcess( - int pid, const string& output_base, const string& install_base) { +bool VerifyServerProcess(int pid, const string& output_base) { string start_time; if (!GetStartTime(ToString(pid), &start_time)) { // Cannot read PID file from /proc . Process died meantime, all is good. No @@ -253,22 +252,6 @@ bool VerifyServerProcess( return !file_present || recorded_start_time == start_time; } -bool KillServerProcess(int pid) { - // Kill the process and make sure it's dead before proceeding. - killpg(pid, SIGKILL); - int check_killed_retries = 10; - while (killpg(pid, 0) == 0) { - if (check_killed_retries-- > 0) { - sleep(1); - } else { - die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, - "Attempted to kill stale blaze server process (pid=%d) using " - "SIGKILL, but it did not die in a timely fashion.", pid); - } - } - return true; -} - // Not supported. void ExcludePathFromBackup(const string &path) { } diff --git a/src/main/cpp/blaze_util_platform.h b/src/main/cpp/blaze_util_platform.h index 05eb772062..b842ae8385 100644 --- a/src/main/cpp/blaze_util_platform.h +++ b/src/main/cpp/blaze_util_platform.h @@ -168,13 +168,17 @@ uint64_t AcquireLock(const std::string& output_base, bool batch_mode, void ReleaseLock(BlazeLock* blaze_lock); // Verifies whether the server process still exists. Returns true if it does. -bool VerifyServerProcess(int pid, const std::string& output_base, - const std::string& install_base); +bool VerifyServerProcess(int pid, const std::string& output_base); // Kills a server process based on its PID. // Returns true if the server process was found and killed. // WARNING! This function can be called from a signal handler! -bool KillServerProcess(int pid); +bool KillServerProcess(int pid, const std::string& output_base); + +// Wait for approximately the specified number of milliseconds. The actual +// amount of time waited may be more or less because of interrupts or system +// clock resolution. +void TrySleep(unsigned int milliseconds); // Mark path as being excluded from backups (if supported by operating system). void ExcludePathFromBackup(const std::string& path); diff --git a/src/main/cpp/blaze_util_posix.cc b/src/main/cpp/blaze_util_posix.cc index af0ce17385..f5c7241db8 100644 --- a/src/main/cpp/blaze_util_posix.cc +++ b/src/main/cpp/blaze_util_posix.cc @@ -72,7 +72,9 @@ static void handler(int signum) { "\n%s caught third interrupt signal; killed.\n\n", SignalHandler::Get().GetGlobals()->options->product_name.c_str()); if (SignalHandler::Get().GetGlobals()->server_pid != -1) { - KillServerProcess(SignalHandler::Get().GetGlobals()->server_pid); + KillServerProcess( + SignalHandler::Get().GetGlobals()->server_pid, + SignalHandler::Get().GetGlobals()->options->output_base); } _exit(1); } @@ -630,6 +632,26 @@ void ReleaseLock(BlazeLock* blaze_lock) { close(blaze_lock->lockfd); } +bool KillServerProcess(int pid, const string& output_base) { + // Kill the process and make sure it's dead before proceeding. + killpg(pid, SIGKILL); + if (!AwaitServerProcessTermination(pid, output_base, + kPostKillGracePeriodSeconds)) { + die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, + "Attempted to kill stale server process (pid=%d) using " + "SIGKILL, but it did not die in a timely fashion.", + pid); + } + return true; +} + +void TrySleep(unsigned int milliseconds) { + unsigned int seconds_part = milliseconds / 1000; + unsigned int nanoseconds_part = (milliseconds % 1000) * 1000 * 1000; + struct timespec sleeptime = {seconds_part, nanoseconds_part}; + nanosleep(&sleeptime, NULL); +} + string GetUserName() { string user = GetEnv("USER"); if (!user.empty()) { diff --git a/src/main/cpp/blaze_util_windows.cc b/src/main/cpp/blaze_util_windows.cc index a5bcf45450..802b3c2395 100644 --- a/src/main/cpp/blaze_util_windows.cc +++ b/src/main/cpp/blaze_util_windows.cc @@ -122,7 +122,9 @@ BOOL WINAPI ConsoleCtrlHandler(_In_ DWORD ctrlType) { "\n%s caught third Ctrl+C handler signal; killed.\n\n", SignalHandler::Get().GetGlobals()->options->product_name.c_str()); if (SignalHandler::Get().GetGlobals()->server_pid != -1) { - KillServerProcess(SignalHandler::Get().GetGlobals()->server_pid); + KillServerProcess( + SignalHandler::Get().GetGlobals()->server_pid, + SignalHandler::Get().GetGlobals()->options->output_base); } _exit(1); } @@ -171,7 +173,9 @@ static void handler(int signum) { "\n%s caught third interrupt signal; killed.\n\n", SignalHandler::Get().GetGlobals()->options->product_name.c_str()); if (SignalHandler::Get().GetGlobals()->server_pid != -1) { - KillServerProcess(SignalHandler::Get().GetGlobals()->server_pid); + KillServerProcess( + SignalHandler::Get().GetGlobals()->server_pid, + SignalHandler::Get().GetGlobals()->options->output_base); } _exit(1); } @@ -1018,8 +1022,7 @@ bool CompareAbsolutePaths(const string& a, const string& b) { // On Windows (and Linux) we use a combination of PID and start time to identify // the server process. That is supposed to be unique unless one can start more // processes than there are PIDs available within a single jiffy. -bool VerifyServerProcess( - int pid, const string& output_base, const string& install_base) { +bool VerifyServerProcess(int pid, const string& output_base) { AutoHandle process( ::OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid)); if (!process.IsValid()) { @@ -1046,7 +1049,7 @@ bool VerifyServerProcess( return !file_present || recorded_start_time == ToString(start_time); } -bool KillServerProcess(int pid) { +bool KillServerProcess(int pid, const string& output_base) { AutoHandle process(::OpenProcess( PROCESS_TERMINATE | PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid)); DWORD exitcode = 0; @@ -1058,12 +1061,18 @@ bool KillServerProcess(int pid) { } BOOL result = TerminateProcess(process, /*uExitCode*/ 0); - if (!result) { - blaze_util::PrintError("Cannot terminate server process with PID %d", pid); + if (!result || !AwaitServerProcessTermination(pid, output_base, + kPostKillGracePeriodSeconds)) { + pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, + "Cannot terminate server process with PID %d", pid); } return result; } +void TrySleep(unsigned int milliseconds) { + Sleep(milliseconds); +} + // Not supported. void ExcludePathFromBackup(const string &path) { } |