aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/main/cpp/blaze.cc30
-rw-r--r--src/main/cpp/blaze_util.cc45
-rw-r--r--src/main/cpp/blaze_util.h24
-rw-r--r--src/main/cpp/blaze_util_darwin.cc15
-rw-r--r--src/main/cpp/blaze_util_freebsd.cc15
-rw-r--r--src/main/cpp/blaze_util_linux.cc19
-rw-r--r--src/main/cpp/blaze_util_platform.h10
-rw-r--r--src/main/cpp/blaze_util_posix.cc24
-rw-r--r--src/main/cpp/blaze_util_windows.cc23
-rw-r--r--src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java10
-rw-r--r--src/main/protobuf/command_server.proto4
11 files changed, 154 insertions, 65 deletions
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index 6809b0b7f7..3571b236d2 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -731,9 +731,8 @@ static void StartServerAndConnect(const WorkspaceLayout *workspace_layout,
// disaster.
int server_pid = GetServerPid(server_dir);
if (server_pid > 0) {
- if (VerifyServerProcess(server_pid, globals->options->output_base,
- globals->options->install_base)) {
- if (KillServerProcess(server_pid)) {
+ if (VerifyServerProcess(server_pid, globals->options->output_base)) {
+ if (KillServerProcess(server_pid, globals->options->output_base)) {
fprintf(stderr, "Killed non-responsive server process (pid=%d)\n",
server_pid);
SetRestartReasonIfNotSet(SERVER_UNRESPONSIVE);
@@ -1458,8 +1457,7 @@ bool GrpcBlazeServer::Connect() {
return false;
}
- if (!VerifyServerProcess(server_pid, globals->options->output_base,
- globals->options->install_base)) {
+ if (!VerifyServerProcess(server_pid, globals->options->output_base)) {
return false;
}
@@ -1587,11 +1585,13 @@ void GrpcBlazeServer::KillRunningServer() {
while (reader->Read(&response)) {
}
- // Kill the server process for good measure (if we know the server PID)
+ // Wait for the server process to terminate (if we know the server PID).
+ // If it does not terminate itself gracefully within 1m, terminate it.
if (globals->server_pid > 0 &&
- VerifyServerProcess(globals->server_pid, globals->options->output_base,
- globals->options->install_base)) {
- KillServerProcess(globals->server_pid);
+ !AwaitServerProcessTermination(globals->server_pid,
+ globals->options->output_base,
+ kPostShutdownGracePeriodSeconds)) {
+ KillServerProcess(globals->server_pid, globals->options->output_base);
}
connected_ = false;
@@ -1599,6 +1599,7 @@ void GrpcBlazeServer::KillRunningServer() {
unsigned int GrpcBlazeServer::Communicate() {
assert(connected_);
+ assert(globals->server_pid > 0);
vector<string> arg_vector;
string command = globals->option_processor->GetCommand();
@@ -1643,6 +1644,7 @@ unsigned int GrpcBlazeServer::Communicate() {
int exit_code = -1;
bool finished = false;
bool finished_warning_emitted = false;
+ bool termination_expected = false;
while (reader->Read(&response)) {
if (finished && !finished_warning_emitted) {
@@ -1659,6 +1661,7 @@ unsigned int GrpcBlazeServer::Communicate() {
if (response.finished()) {
exit_code = response.exit_code();
+ termination_expected = response.termination_expected();
finished = true;
}
@@ -1694,6 +1697,15 @@ unsigned int GrpcBlazeServer::Communicate() {
}
}
+ // If the server has shut down, but does not terminate itself within a 1m
+ // grace period, terminate it.
+ if (termination_expected &&
+ !AwaitServerProcessTermination(globals->server_pid,
+ globals->options->output_base,
+ kPostShutdownGracePeriodSeconds)) {
+ KillServerProcess(globals->server_pid, globals->options->output_base);
+ }
+
SendAction(CancelThreadAction::JOIN);
cancel_thread.join();
diff --git a/src/main/cpp/blaze_util.cc b/src/main/cpp/blaze_util.cc
index 2831ead175..be3e0ebfd9 100644
--- a/src/main/cpp/blaze_util.cc
+++ b/src/main/cpp/blaze_util.cc
@@ -37,6 +37,10 @@ using std::vector;
const char kServerPidFile[] = "server.pid.txt";
+const unsigned int kPostShutdownGracePeriodSeconds = 60;
+
+const unsigned int kPostKillGracePeriodSeconds = 10;
+
string MakeAbsolute(const string &path) {
if (path.empty()) {
return blaze_util::GetCwd();
@@ -163,4 +167,45 @@ bool IsArg(const string& arg) {
&& (arg != "-help") && (arg != "-h");
}
+void LogWait(unsigned int elapsed_seconds, unsigned int wait_seconds) {
+ SigPrintf("WARNING: Waiting for server process to terminate "
+ "(waited %d seconds, waiting at most %d)\n",
+ elapsed_seconds, wait_seconds);
+}
+
+bool AwaitServerProcessTermination(int pid, const string& output_base,
+ unsigned int wait_seconds) {
+ uint64_t st = GetMillisecondsMonotonic();
+ const unsigned int first_seconds = 5;
+ bool logged_first = false;
+ const unsigned int second_seconds = 10;
+ bool logged_second = false;
+ const unsigned int third_seconds = 30;
+ bool logged_third = false;
+
+ while (VerifyServerProcess(pid, output_base)) {
+ TrySleep(100);
+ uint64_t elapsed_millis = GetMillisecondsMonotonic() - st;
+ if (!logged_first && elapsed_millis > first_seconds * 1000) {
+ LogWait(first_seconds, wait_seconds);
+ logged_first = true;
+ }
+ if (!logged_second && elapsed_millis > second_seconds * 1000) {
+ LogWait(second_seconds, wait_seconds);
+ logged_second = true;
+ }
+ if (!logged_third && elapsed_millis > third_seconds * 1000) {
+ LogWait(third_seconds, wait_seconds);
+ logged_third = true;
+ }
+ if (elapsed_millis > wait_seconds * 1000) {
+ SigPrintf("INFO: Waited %d seconds for server process (pid=%d) to"
+ " terminate.\n",
+ wait_seconds, pid);
+ return false;
+ }
+ }
+ return true;
+}
+
} // namespace blaze
diff --git a/src/main/cpp/blaze_util.h b/src/main/cpp/blaze_util.h
index 4c8d6c291e..f2c3a2f187 100644
--- a/src/main/cpp/blaze_util.h
+++ b/src/main/cpp/blaze_util.h
@@ -52,13 +52,14 @@ bool GetNullaryOption(const char *arg, const char *key);
// Searches for 'key' in 'args' using GetUnaryOption. Arguments found after '--'
// are omitted from the search.
-// Returns true iff key is a flag in args.
+// Returns the value of the 'key' flag iff it occurs in args.
+// Returns NULL otherwise.
const char* SearchUnaryOption(const std::vector<std::string>& args,
const char* key);
// Searches for 'key' in 'args' using GetNullaryOption. Arguments found after
// '--' are omitted from the search.
-// Returns the value of the 'key' flag iff it occurs in args.
+// Returns true iff key is a flag in args.
bool SearchNullaryOption(const std::vector<std::string>& args,
const char* key);
@@ -82,6 +83,25 @@ bool CheckJavaVersionIsAtLeast(const std::string &jvm_version,
// Returns true iff arg is a valid command line argument for bazel.
bool IsArg(const std::string& arg);
+// Wait to see if the server process terminates. Checks the server's status
+// immediately, and repeats the check every 100ms until approximately
+// wait_seconds elapses or the server process terminates. Returns true if a
+// check sees that the server process terminated. Logs to stderr after 5, 10,
+// and 30 seconds if the wait lasts that long.
+bool AwaitServerProcessTermination(int pid, const std::string& output_base,
+ unsigned int wait_seconds);
+
+// The number of seconds the client will wait for the server process to
+// terminate itself after the client receives the final response from a command
+// that shuts down the server. After waiting this time, if the server process
+// remains, the client will forcibly kill the server.
+extern const unsigned int kPostShutdownGracePeriodSeconds;
+
+// The number of seconds the client will wait for the server process to
+// terminate after the client forcibly kills the server. After waiting this
+// time, if the server process remains, the client will die.
+extern const unsigned int kPostKillGracePeriodSeconds;
+
// Returns the string representation of `value`.
// Workaround for mingw where std::to_string is not implemented.
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52015.
diff --git a/src/main/cpp/blaze_util_darwin.cc b/src/main/cpp/blaze_util_darwin.cc
index 075b48a1e0..008b721e0a 100644
--- a/src/main/cpp/blaze_util_darwin.cc
+++ b/src/main/cpp/blaze_util_darwin.cc
@@ -196,16 +196,11 @@ void WriteSystemSpecificProcessIdentifier(
const string& server_dir, pid_t server_pid) {
}
-bool VerifyServerProcess(
- int pid, const string& output_base, const string& install_base) {
- // TODO(lberki): This might accidentally kill an unrelated process if the
- // server died and the PID got reused.
- return true;
-}
-
-bool KillServerProcess(int pid) {
- killpg(pid, SIGKILL);
- return true;
+bool VerifyServerProcess(int pid, const string &output_base) {
+ // TODO(lberki): This only checks for the process's existence, not whether
+ // its start time matches. Therefore this might accidentally kill an
+ // unrelated process if the server died and the PID got reused.
+ return killpg(pid, 0) == 0;
}
// Sets a flag on path to exclude the path from Apple's automatic backup service
diff --git a/src/main/cpp/blaze_util_freebsd.cc b/src/main/cpp/blaze_util_freebsd.cc
index 213af024e5..4ff2278b54 100644
--- a/src/main/cpp/blaze_util_freebsd.cc
+++ b/src/main/cpp/blaze_util_freebsd.cc
@@ -152,16 +152,11 @@ void WriteSystemSpecificProcessIdentifier(
const string& server_dir, pid_t server_pid) {
}
-bool VerifyServerProcess(
- int pid, const string& output_base, const string& install_base) {
- // TODO(lberki): This might accidentally kill an unrelated process if the
- // server died and the PID got reused.
- return true;
-}
-
-bool KillServerProcess(int pid) {
- killpg(pid, SIGKILL);
- return true;
+bool VerifyServerProcess(int pid, const string &output_base) {
+ // TODO(lberki): This only checks for the process's existence, not whether
+ // its start time matches. Therefore this might accidentally kill an
+ // unrelated process if the server died and the PID got reused.
+ return killpg(pid, 0) == 0;
}
// Not supported.
diff --git a/src/main/cpp/blaze_util_linux.cc b/src/main/cpp/blaze_util_linux.cc
index 2e5e07c04f..72eccb2d9a 100644
--- a/src/main/cpp/blaze_util_linux.cc
+++ b/src/main/cpp/blaze_util_linux.cc
@@ -234,8 +234,7 @@ void WriteSystemSpecificProcessIdentifier(
// On Linux we use a combination of PID and start time to identify the server
// process. That is supposed to be unique unless one can start more processes
// than there are PIDs available within a single jiffy.
-bool VerifyServerProcess(
- int pid, const string& output_base, const string& install_base) {
+bool VerifyServerProcess(int pid, const string& output_base) {
string start_time;
if (!GetStartTime(ToString(pid), &start_time)) {
// Cannot read PID file from /proc . Process died meantime, all is good. No
@@ -253,22 +252,6 @@ bool VerifyServerProcess(
return !file_present || recorded_start_time == start_time;
}
-bool KillServerProcess(int pid) {
- // Kill the process and make sure it's dead before proceeding.
- killpg(pid, SIGKILL);
- int check_killed_retries = 10;
- while (killpg(pid, 0) == 0) {
- if (check_killed_retries-- > 0) {
- sleep(1);
- } else {
- die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
- "Attempted to kill stale blaze server process (pid=%d) using "
- "SIGKILL, but it did not die in a timely fashion.", pid);
- }
- }
- return true;
-}
-
// Not supported.
void ExcludePathFromBackup(const string &path) {
}
diff --git a/src/main/cpp/blaze_util_platform.h b/src/main/cpp/blaze_util_platform.h
index 05eb772062..b842ae8385 100644
--- a/src/main/cpp/blaze_util_platform.h
+++ b/src/main/cpp/blaze_util_platform.h
@@ -168,13 +168,17 @@ uint64_t AcquireLock(const std::string& output_base, bool batch_mode,
void ReleaseLock(BlazeLock* blaze_lock);
// Verifies whether the server process still exists. Returns true if it does.
-bool VerifyServerProcess(int pid, const std::string& output_base,
- const std::string& install_base);
+bool VerifyServerProcess(int pid, const std::string& output_base);
// Kills a server process based on its PID.
// Returns true if the server process was found and killed.
// WARNING! This function can be called from a signal handler!
-bool KillServerProcess(int pid);
+bool KillServerProcess(int pid, const std::string& output_base);
+
+// Wait for approximately the specified number of milliseconds. The actual
+// amount of time waited may be more or less because of interrupts or system
+// clock resolution.
+void TrySleep(unsigned int milliseconds);
// Mark path as being excluded from backups (if supported by operating system).
void ExcludePathFromBackup(const std::string& path);
diff --git a/src/main/cpp/blaze_util_posix.cc b/src/main/cpp/blaze_util_posix.cc
index af0ce17385..f5c7241db8 100644
--- a/src/main/cpp/blaze_util_posix.cc
+++ b/src/main/cpp/blaze_util_posix.cc
@@ -72,7 +72,9 @@ static void handler(int signum) {
"\n%s caught third interrupt signal; killed.\n\n",
SignalHandler::Get().GetGlobals()->options->product_name.c_str());
if (SignalHandler::Get().GetGlobals()->server_pid != -1) {
- KillServerProcess(SignalHandler::Get().GetGlobals()->server_pid);
+ KillServerProcess(
+ SignalHandler::Get().GetGlobals()->server_pid,
+ SignalHandler::Get().GetGlobals()->options->output_base);
}
_exit(1);
}
@@ -630,6 +632,26 @@ void ReleaseLock(BlazeLock* blaze_lock) {
close(blaze_lock->lockfd);
}
+bool KillServerProcess(int pid, const string& output_base) {
+ // Kill the process and make sure it's dead before proceeding.
+ killpg(pid, SIGKILL);
+ if (!AwaitServerProcessTermination(pid, output_base,
+ kPostKillGracePeriodSeconds)) {
+ die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "Attempted to kill stale server process (pid=%d) using "
+ "SIGKILL, but it did not die in a timely fashion.",
+ pid);
+ }
+ return true;
+}
+
+void TrySleep(unsigned int milliseconds) {
+ unsigned int seconds_part = milliseconds / 1000;
+ unsigned int nanoseconds_part = (milliseconds % 1000) * 1000 * 1000;
+ struct timespec sleeptime = {seconds_part, nanoseconds_part};
+ nanosleep(&sleeptime, NULL);
+}
+
string GetUserName() {
string user = GetEnv("USER");
if (!user.empty()) {
diff --git a/src/main/cpp/blaze_util_windows.cc b/src/main/cpp/blaze_util_windows.cc
index a5bcf45450..802b3c2395 100644
--- a/src/main/cpp/blaze_util_windows.cc
+++ b/src/main/cpp/blaze_util_windows.cc
@@ -122,7 +122,9 @@ BOOL WINAPI ConsoleCtrlHandler(_In_ DWORD ctrlType) {
"\n%s caught third Ctrl+C handler signal; killed.\n\n",
SignalHandler::Get().GetGlobals()->options->product_name.c_str());
if (SignalHandler::Get().GetGlobals()->server_pid != -1) {
- KillServerProcess(SignalHandler::Get().GetGlobals()->server_pid);
+ KillServerProcess(
+ SignalHandler::Get().GetGlobals()->server_pid,
+ SignalHandler::Get().GetGlobals()->options->output_base);
}
_exit(1);
}
@@ -171,7 +173,9 @@ static void handler(int signum) {
"\n%s caught third interrupt signal; killed.\n\n",
SignalHandler::Get().GetGlobals()->options->product_name.c_str());
if (SignalHandler::Get().GetGlobals()->server_pid != -1) {
- KillServerProcess(SignalHandler::Get().GetGlobals()->server_pid);
+ KillServerProcess(
+ SignalHandler::Get().GetGlobals()->server_pid,
+ SignalHandler::Get().GetGlobals()->options->output_base);
}
_exit(1);
}
@@ -1018,8 +1022,7 @@ bool CompareAbsolutePaths(const string& a, const string& b) {
// On Windows (and Linux) we use a combination of PID and start time to identify
// the server process. That is supposed to be unique unless one can start more
// processes than there are PIDs available within a single jiffy.
-bool VerifyServerProcess(
- int pid, const string& output_base, const string& install_base) {
+bool VerifyServerProcess(int pid, const string& output_base) {
AutoHandle process(
::OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid));
if (!process.IsValid()) {
@@ -1046,7 +1049,7 @@ bool VerifyServerProcess(
return !file_present || recorded_start_time == ToString(start_time);
}
-bool KillServerProcess(int pid) {
+bool KillServerProcess(int pid, const string& output_base) {
AutoHandle process(::OpenProcess(
PROCESS_TERMINATE | PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid));
DWORD exitcode = 0;
@@ -1058,12 +1061,18 @@ bool KillServerProcess(int pid) {
}
BOOL result = TerminateProcess(process, /*uExitCode*/ 0);
- if (!result) {
- blaze_util::PrintError("Cannot terminate server process with PID %d", pid);
+ if (!result || !AwaitServerProcessTermination(pid, output_base,
+ kPostKillGracePeriodSeconds)) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "Cannot terminate server process with PID %d", pid);
}
return result;
}
+void TrySleep(unsigned int milliseconds) {
+ Sleep(milliseconds);
+}
+
// Not supported.
void ExcludePathFromBackup(const string &path) {
}
diff --git a/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java b/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java
index 032da198d3..6abef0e4ac 100644
--- a/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java
+++ b/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java
@@ -860,12 +860,17 @@ public class GrpcServerImpl implements RPCServer {
// the cancel request won't find the thread to interrupt)
Thread.interrupted();
+ boolean shutdown = commandExecutor.shutdown();
+ if (shutdown) {
+ server.shutdown();
+ }
RunResponse response =
RunResponse.newBuilder()
.setCookie(responseCookie)
.setCommandId(commandId)
.setFinished(true)
.setExitCode(exitCode)
+ .setTerminationExpected(shutdown)
.build();
try {
@@ -876,11 +881,6 @@ public class GrpcServerImpl implements RPCServer {
log.info(String.format("Client cancelled command %s just right before its end: %s",
commandId, e.getMessage()));
}
-
- if (commandExecutor.shutdown()) {
- pidFileWatcherThread.signalShutdown();
- server.shutdown();
- }
}
private final CommandServerGrpc.CommandServerImplBase commandServer =
diff --git a/src/main/protobuf/command_server.proto b/src/main/protobuf/command_server.proto
index 3dfedd4c91..79f44b9930 100644
--- a/src/main/protobuf/command_server.proto
+++ b/src/main/protobuf/command_server.proto
@@ -77,6 +77,10 @@ message RunResponse {
// as soon as possible. This is not required to be set (non-empty) on every
// response.
string command_id = 6;
+
+ // Whether the command has shut down the server; if set, the client should
+ // wait until the server process dies before finishing.
+ bool termination_expected = 7;
}
// Passed to CommandServer.cancel to initiate graceful cancellation of an