diff options
author | 2016-08-12 18:05:29 +0000 | |
---|---|---|
committer | 2016-08-16 15:20:22 +0000 | |
commit | 641c1327f08de4122a360a327e02101aa00f7394 (patch) | |
tree | f67788c28caf7fccba2b0f8705db6f23a06ff805 | |
parent | c780d63ca2ded5bc7e62e7305339c3edcae06bf7 (diff) |
Be more lenient with slow-to-react servers.
Increase the ping timeout on connect from five to ten seconds. This gives
servers which may be suffering from gc pressure or other ailments extra time to
respond.
On the other end, wait for orphaned servers to really die before proceeding.
This prevents race conditions around the delivery of SIGKILL and the starting
of the new server.
This may make us fail slower when the server is having hard times, however it
should give us better determinism, and these conditions should be rare.
--
MOS_MIGRATED_REVID=130118918
-rw-r--r-- | src/main/cpp/blaze.cc | 2 | ||||
-rw-r--r-- | src/main/cpp/blaze_util_linux.cc | 11 |
2 files changed, 12 insertions, 1 deletions
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc index 7fb3511a0a..74ac64fe86 100644 --- a/src/main/cpp/blaze.cc +++ b/src/main/cpp/blaze.cc @@ -1941,7 +1941,7 @@ bool GrpcBlazeServer::Connect() { grpc::ClientContext context; context.set_deadline( - std::chrono::system_clock::now() + std::chrono::seconds(5)); + std::chrono::system_clock::now() + std::chrono::seconds(10)); command_server::PingRequest request; command_server::PingResponse response; diff --git a/src/main/cpp/blaze_util_linux.cc b/src/main/cpp/blaze_util_linux.cc index 27ab344159..82142d5b06 100644 --- a/src/main/cpp/blaze_util_linux.cc +++ b/src/main/cpp/blaze_util_linux.cc @@ -250,7 +250,18 @@ bool KillServerProcess( return false; } + // Kill the process and make sure it's dead before proceeding. killpg(pid, SIGKILL); + int check_killed_retries = 10; + while (killpg(pid, 0) == 0) { + if (check_killed_retries-- > 0) { + sleep(1); + } else { + die(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, + "Attempted to kill stale blaze server process (pid=%d) using " + "SIGKILL, but it did not die in a timely fashion.", pid); + } + } return true; } |