aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/cpp/blaze.cc
diff options
context:
space:
mode:
authorGravatar Lukacs Berki <lberki@google.com>2017-01-11 09:08:54 +0000
committerGravatar Marcel Hlopko <hlopko@google.com>2017-01-11 10:02:29 +0000
commit10dd6380591704ffd63dcb43c86af66ba678931a (patch)
treeed8ceadaf7a047ea7995b6f56ec5e01efdbb7467 /src/main/cpp/blaze.cc
parent5163ec2c0db8fbdc7f367bb4aca5b6d7ae6d80b0 (diff)
Kill the server when the server.pid.txt file cannot be found and start a new one.
This makes Bazel not be stuck when a gRPC-only server is used in a client, then an AF_UNIX-only one, then a gRPC-only one again. -- PiperOrigin-RevId: 144176526 MOS_MIGRATED_REVID=144176526
Diffstat (limited to 'src/main/cpp/blaze.cc')
-rw-r--r--src/main/cpp/blaze.cc72
1 files changed, 48 insertions, 24 deletions
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index d5948ab38f..9958d0438e 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -245,6 +245,7 @@ class GrpcBlazeServer : public BlazeServer {
// actions from.
blaze_util::IPipe *pipe_;
+ bool TryConnect(command_server::CommandServer::Stub* client);
void CancelThread();
void SendAction(CancelThreadAction action);
void SendCancelMessage();
@@ -1372,6 +1373,29 @@ GrpcBlazeServer::~GrpcBlazeServer() {
pipe_ = NULL;
}
+bool GrpcBlazeServer::TryConnect(command_server::CommandServer::Stub* client) {
+ grpc::ClientContext context;
+ context.set_deadline(
+ std::chrono::system_clock::now() +
+ std::chrono::seconds(connect_timeout_secs_));
+
+ command_server::PingRequest request;
+ command_server::PingResponse response;
+ request.set_cookie(request_cookie_);
+
+ debug_log("Trying to connect to server (timeout: %d secs)...",
+ connect_timeout_secs_);
+ grpc::Status status = client->Ping(&context, request, &response);
+
+ if (!status.ok() || response.cookie() != response_cookie_) {
+ debug_log("Connection to server failed: %s",
+ status.error_message().c_str());
+ return false;
+ }
+
+ return true;
+}
+
bool GrpcBlazeServer::Connect() {
assert(!connected_);
@@ -1409,34 +1433,34 @@ bool GrpcBlazeServer::Connect() {
std::unique_ptr<command_server::CommandServer::Stub> client(
command_server::CommandServer::NewStub(channel));
- grpc::ClientContext context;
- context.set_deadline(
- std::chrono::system_clock::now() +
- std::chrono::seconds(connect_timeout_secs_));
-
- command_server::PingRequest request;
- command_server::PingResponse response;
- request.set_cookie(request_cookie_);
-
- debug_log("Trying to connect to server (timeout: %d secs)...",
- connect_timeout_secs_);
- grpc::Status status = client->Ping(&context, request, &response);
-
- if (!status.ok() || response.cookie() != response_cookie_) {
- debug_log("Connection to server failed: %s",
- status.error_message().c_str());
+ if (!TryConnect(client.get())) {
return false;
}
+ this->client_ = std::move(client);
+ connected_ = true;
+
globals->server_pid = GetServerPid(server_dir);
if (globals->server_pid <= 0) {
- pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
- "can't get PID of existing server (server dir=%s)",
- server_dir.c_str());
+ fprintf(stderr, "Can't get PID of existing server (server dir=%s). "
+ "Shutting it down and starting a new one...\n",
+ server_dir.c_str());
+ // This means that we have a server we could connect to but without a PID
+ // file, which in turn means that something went wrong before. Kill the
+ // server so that we can start with as clean a slate as possible. This may
+ // happen if someone (e.g. a client or server that's very old and uses an
+ // AF_UNIX socket instead of gRPC) deletes the server.pid.txt file.
+ KillRunningServer();
+ // Then wait until it actually dies
+ do {
+ auto next_attempt_time(
+ std::chrono::system_clock::now() + std::chrono::milliseconds(1000));
+ std::this_thread::sleep_until(next_attempt_time);
+ } while (TryConnect(client_.get()));
+
+ return false;
}
- this->client_ = std::move(client);
- connected_ = true;
return true;
}
@@ -1533,7 +1557,6 @@ void GrpcBlazeServer::SendCancelMessage() {
// This will wait indefinitely until the server shuts down
void GrpcBlazeServer::KillRunningServer() {
assert(connected_);
- assert(globals->server_pid > 0);
grpc::ClientContext context;
command_server::RunRequest request;
@@ -1548,8 +1571,9 @@ void GrpcBlazeServer::KillRunningServer() {
while (reader->Read(&response)) {}
- // Kill the server process for good measure.
- if (VerifyServerProcess(globals->server_pid, globals->options->output_base,
+ // Kill the server process for good measure (if we know the server PID)
+ if (globals->server_pid > 0 &&
+ VerifyServerProcess(globals->server_pid, globals->options->output_base,
globals->options->install_base)) {
KillServerProcess(globals->server_pid);
}