aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main
diff options
context:
space:
mode:
authorGravatar lberki <lberki@google.com>2017-05-10 04:30:39 -0400
committerGravatar Kristina Chodorow <kchodorow@google.com>2017-05-10 13:37:17 -0400
commitf2014a17a2541d98cf319c322ac461cec8082945 (patch)
tree27ba9844779411170c98ced7d479e1f9eb63ff5a /src/main
parent0f0ccc4fc8229c1860a9c9b58089d6cfb2ee971f (diff)
Update ExecuteDaemon() on POSIX systems so that the client writes the PID file and not the server.
This is so that the server does as few things as possible before exec() (preferably, nothing) so that we don't accidentally call malloc() which would make it possible to deadlock if the server spawned multiple threads before ExecuteDaemon(). RELNOTES: None. PiperOrigin-RevId: 155603273
Diffstat (limited to 'src/main')
-rw-r--r--src/main/cpp/blaze_util_darwin.cc3
-rw-r--r--src/main/cpp/blaze_util_freebsd.cc3
-rw-r--r--src/main/cpp/blaze_util_linux.cc11
-rw-r--r--src/main/cpp/blaze_util_posix.cc120
4 files changed, 99 insertions, 38 deletions
diff --git a/src/main/cpp/blaze_util_darwin.cc b/src/main/cpp/blaze_util_darwin.cc
index 63bfcec7c8..0bb523f9f9 100644
--- a/src/main/cpp/blaze_util_darwin.cc
+++ b/src/main/cpp/blaze_util_darwin.cc
@@ -183,7 +183,8 @@ string GetDefaultHostJavabase() {
return javabase.substr(0, javabase.length()-1);
}
-void WriteSystemSpecificProcessIdentifier(const string& server_dir) {
+void WriteSystemSpecificProcessIdentifier(
+ const string& server_dir, pid_t server_pid) {
}
bool VerifyServerProcess(
diff --git a/src/main/cpp/blaze_util_freebsd.cc b/src/main/cpp/blaze_util_freebsd.cc
index 713b791ee1..2855332ecf 100644
--- a/src/main/cpp/blaze_util_freebsd.cc
+++ b/src/main/cpp/blaze_util_freebsd.cc
@@ -149,7 +149,8 @@ string GetDefaultHostJavabase() {
return !javahome.empty() ? javahome : "/usr/local/openjdk8";
}
-void WriteSystemSpecificProcessIdentifier(const string& server_dir) {
+void WriteSystemSpecificProcessIdentifier(
+ const string& server_dir, pid_t server_pid) {
}
bool VerifyServerProcess(
diff --git a/src/main/cpp/blaze_util_linux.cc b/src/main/cpp/blaze_util_linux.cc
index 61c95afe23..9f2b544dd8 100644
--- a/src/main/cpp/blaze_util_linux.cc
+++ b/src/main/cpp/blaze_util_linux.cc
@@ -214,13 +214,14 @@ static bool GetStartTime(const string& pid, string* start_time) {
return true;
}
-void WriteSystemSpecificProcessIdentifier(const string& server_dir) {
- string pid = ToString(getpid());
+void WriteSystemSpecificProcessIdentifier(
+ const string& server_dir, pid_t server_pid) {
+ string pid_string = ToString(server_pid);
string start_time;
- if (!GetStartTime(pid, &start_time)) {
+ if (!GetStartTime(pid_string, &start_time)) {
pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
- "Cannot get start time of process %s", pid.c_str());
+ "Cannot get start time of process %s", pid_string.c_str());
}
string start_time_file = blaze_util::JoinPath(server_dir, "server.starttime");
@@ -247,7 +248,7 @@ bool VerifyServerProcess(
blaze_util::JoinPath(output_base, "server/server.starttime"),
&recorded_start_time);
- // If start time file got deleted, but PID file didn't, assume taht this is an
+ // If start time file got deleted, but PID file didn't, assume that this is an
// old Blaze process that doesn't know how to write start time files yet.
return !file_present || recorded_start_time == start_time;
}
diff --git a/src/main/cpp/blaze_util_posix.cc b/src/main/cpp/blaze_util_posix.cc
index 30e9353e59..418bfa5685 100644
--- a/src/main/cpp/blaze_util_posix.cc
+++ b/src/main/cpp/blaze_util_posix.cc
@@ -16,11 +16,13 @@
#include <errno.h>
#include <fcntl.h>
#include <limits.h> // PATH_MAX
+#include <poll.h>
#include <pwd.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <sys/ioctl.h>
+#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -235,68 +237,124 @@ static void Daemonize(const string& daemon_output) {
(void) dup(STDOUT_FILENO); // stderr (2>&1)
}
-class PipeBlazeServerStartup : public BlazeServerStartup {
+// Notifies the client about the death of the server process by keeping a socket
+// open in the server. If the server dies for any reason, the socket will be
+// closed, which can be detected by the client.
+class SocketBlazeServerStartup : public BlazeServerStartup {
public:
- PipeBlazeServerStartup(int pipe_fd);
- virtual ~PipeBlazeServerStartup();
+ SocketBlazeServerStartup(int pipe_fd);
+ virtual ~SocketBlazeServerStartup();
virtual bool IsStillAlive();
private:
- int pipe_fd;
+ int fd;
};
-PipeBlazeServerStartup::PipeBlazeServerStartup(int pipe_fd) {
- this->pipe_fd = pipe_fd;
- if (fcntl(pipe_fd, F_SETFL, O_NONBLOCK | fcntl(pipe_fd, F_GETFL))) {
- pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
- "Failed: fcntl to enable O_NONBLOCK on pipe");
+SocketBlazeServerStartup::SocketBlazeServerStartup(int fd)
+ : fd(fd) {
+}
+
+SocketBlazeServerStartup::~SocketBlazeServerStartup() {
+ close(fd);
+}
+
+bool SocketBlazeServerStartup::IsStillAlive() {
+ struct pollfd pfd;
+ pfd.fd = fd;
+ pfd.events = POLLIN;
+ int result;
+ do {
+ result = poll(&pfd, 1, 0);
+ } while (result < 0 && errno == EINTR);
+ if (result == 0) {
+ // Timeout, server is still alive
+ return true;
+ } else {
+ // Whether it's an error or pfd.revents & POLLHUP > 0, we assume child is
+ // dead.
+ return false;
}
}
-PipeBlazeServerStartup::~PipeBlazeServerStartup() {
- close(pipe_fd);
+static void ReadFromFdWithRetryEintr(
+ int fd, void *buf, size_t count, const char* error_message) {
+ ssize_t result;
+ do {
+ result = read(fd, buf, count);
+ } while (result < 0 && errno == EINTR);
+ if (result != count) {
+ pdie(blaze_exit_code::INTERNAL_ERROR, "%s", error_message);
+ }
}
-bool PipeBlazeServerStartup::IsStillAlive() {
- char c;
- return read(this->pipe_fd, &c, 1) == -1 && errno == EAGAIN;
+
+static void WriteToFdWithRetryEintr(
+ int fd, void *buf, size_t count, const char* error_message) {
+ ssize_t result;
+ do {
+ // Ideally, we'd use send(..., MSG_NOSIGNAL), but that's not available on
+ // Darwin.
+ result = write(fd, buf, count);
+ } while (result < 0 && errno == EINTR);
+ if (result != count) {
+ pdie(blaze_exit_code::INTERNAL_ERROR, "%s", error_message);
+ }
}
-void WriteSystemSpecificProcessIdentifier(const string& server_dir);
+void WriteSystemSpecificProcessIdentifier(
+ const string& server_dir, pid_t server_pid);
void ExecuteDaemon(const string& exe,
const std::vector<string>& args_vector,
const string& daemon_output, const string& server_dir,
BlazeServerStartup** server_startup) {
int fds[2];
- if (pipe(fds)) {
- pdie(blaze_exit_code::INTERNAL_ERROR, "pipe creation failed");
+
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
+ pdie(blaze_exit_code::INTERNAL_ERROR, "socket creation failed");
}
+
CheckSingleThreaded();
int child = fork();
if (child == -1) {
pdie(blaze_exit_code::INTERNAL_ERROR, "fork() failed");
- } else if (child > 0) { // we're the parent
- close(fds[1]); // parent keeps only the reading side
+ } else if (child > 0) {
+ // Parent process (i.e. the client)
+ close(fds[1]); // parent keeps one side...
int unused_status;
waitpid(child, &unused_status, 0); // child double-forks
- *server_startup = new PipeBlazeServerStartup(fds[0]);
+ pid_t server_pid;
+ ReadFromFdWithRetryEintr(fds[0], &server_pid, sizeof server_pid,
+ "cannot read server PID from server");
+ string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile);
+ if (!blaze_util::WriteFile(ToString(server_pid), pid_file)) {
+ pdie(blaze_exit_code::INTERNAL_ERROR, "cannot write PID file");
+ }
+
+ WriteSystemSpecificProcessIdentifier(server_dir, server_pid);
+ char dummy = 'a';
+ WriteToFdWithRetryEintr(fds[0], &dummy, 1,
+ "cannot notify server about having written PID file");
+ *server_startup = new SocketBlazeServerStartup(fds[0]);
return;
- } else {
- close(fds[0]); // child keeps only the writing side
}
- Daemonize(daemon_output);
- string pid_string = GetProcessIdAsString();
- string pid_file = blaze_util::JoinPath(server_dir, kServerPidFile);
+ // Child process (i.e. the server)
+ close(fds[0]); // ...child keeps the other.
- if (!blaze_util::WriteFile(pid_string, pid_file)) {
- // The exit code does not matter because we are already in the daemonized
- // server. The output of this operation will end up in jvm.out .
- pdie(0, "Cannot write PID file");
- }
+ Daemonize(daemon_output);
- WriteSystemSpecificProcessIdentifier(server_dir);
+ pid_t server_pid = getpid();
+ WriteToFdWithRetryEintr(fds[1], &server_pid, sizeof server_pid,
+ "cannot communicate server PID to client");
+ // We wait until the client writes the PID file so that there is no race
+ // condition; the server expects the PID file to already be there so that
+ // it can read it and know its own PID (see the ctor GrpcServerImpl) and so
+ // that it can kill itself if the PID file is deleted (see
+ // GrpcServerImpl.PidFileWatcherThread)
+ char dummy;
+ ReadFromFdWithRetryEintr(fds[1], &dummy, 1,
+ "cannot get PID file write acknowledgement from client");
ExecuteProgram(exe, args_vector);
pdie(0, "Cannot execute %s", exe.c_str());