aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Lukacs Berki <lberki@google.com>2016-05-02 11:47:40 +0000
committerGravatar Damien Martin-Guillerez <dmarting@google.com>2016-05-02 11:52:39 +0000
commit43e620bfcd20ea24f079b0aec5eacf2b0f6c8a4e (patch)
tree17512579b288f58cf1f3f101e5c44d225697c97f /src
parent29c4a950228a11201667c947f9843644e6a43145 (diff)
Linux-specific: check if the stray server process we are about to kill -9 is actually a server process.
This should be implemented for other OSes, too, but OS X seems to lack a procfs and it's not clear how to discover anything about a process based on its PID and of course, Windows is a wholly different cup of tea. More work for #930. -- MOS_MIGRATED_REVID=121262673
Diffstat (limited to 'src')
-rw-r--r--src/main/cpp/blaze.cc7
-rw-r--r--src/main/cpp/blaze_util_darwin.cc11
-rw-r--r--src/main/cpp/blaze_util_freebsd.cc10
-rw-r--r--src/main/cpp/blaze_util_linux.cc67
-rw-r--r--src/main/cpp/blaze_util_mingw.cc3
-rw-r--r--src/main/cpp/blaze_util_platform.h3
-rw-r--r--src/main/cpp/blaze_util_posix.cc13
7 files changed, 102 insertions, 12 deletions
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index 5a696583b2..f30b3076bb 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -971,7 +971,6 @@ static void WriteFileToStreamOrDie(FILE *stream, const char *file_name) {
static int GetServerPid(const string &server_dir) {
// Note: there is no race here on startup since the server creates
// the pid file strictly before it binds the socket.
-
char buf[33];
// The server writes a file, but we need to handle old servers that still
@@ -1026,7 +1025,8 @@ static void StartServerAndConnect(BlazeServer *server) {
fprintf(stderr,
"Found non-responsive server process (pid=%d). Killing it.\n",
server_pid);
- KillServerProcess(server_pid, globals->options.output_base);
+ KillServerProcess(server_pid, globals->options.output_base,
+ globals->options.install_base);
}
SetScheduling(globals->options.batch_cpu_scheduling,
@@ -2014,7 +2014,8 @@ void GrpcBlazeServer::KillRunningServer() {
while (reader->Read(&response)) {}
// Kill the server process for good measure.
- KillServerProcess(globals->server_pid, globals->options.output_base);
+ KillServerProcess(globals->server_pid, globals->options.output_base,
+ globals->options.install_base);
connected_ = false;
}
diff --git a/src/main/cpp/blaze_util_darwin.cc b/src/main/cpp/blaze_util_darwin.cc
index 26db3d1e86..3487640e97 100644
--- a/src/main/cpp/blaze_util_darwin.cc
+++ b/src/main/cpp/blaze_util_darwin.cc
@@ -13,6 +13,7 @@
// limitations under the License.
#include <libproc.h>
+#include <signal.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/un.h>
@@ -120,4 +121,14 @@ string GetDefaultHostJavabase() {
return javabase.substr(0, javabase.length()-1);
}
+void WriteSystemSpecificProcessIdentifier(const string& server_dir) {
+}
+
+void KillServerProcess(
+ int pid, const string& output_base, const string& install_base) {
+ // TODO(lberki): This might accidentally kill an unrelated process if the
+ // server died and the PID got reused.
+ killpg(pid, SIGKILL);
+}
+
} // namespace blaze.
diff --git a/src/main/cpp/blaze_util_freebsd.cc b/src/main/cpp/blaze_util_freebsd.cc
index 0b87a10299..448f525e13 100644
--- a/src/main/cpp/blaze_util_freebsd.cc
+++ b/src/main/cpp/blaze_util_freebsd.cc
@@ -154,4 +154,14 @@ string GetDefaultHostJavabase() {
return "/usr/local/openjdk8";
}
+void WriteSystemSpecificProcessIdentifier(const string& server_dir) {
+}
+
+void KillServerProcess(
+ int pid, const string& output_base, const string& install_base) {
+ // TODO(lberki): This might accidentally kill an unrelated process if the
+ // server died and the PID got reused.
+ killpg(pid, SIGKILL);
+}
+
} // namespace blaze
diff --git a/src/main/cpp/blaze_util_linux.cc b/src/main/cpp/blaze_util_linux.cc
index 1f1144c426..4be6fd3e5a 100644
--- a/src/main/cpp/blaze_util_linux.cc
+++ b/src/main/cpp/blaze_util_linux.cc
@@ -15,6 +15,7 @@
#include <errno.h> // errno, ENAMETOOLONG
#include <limits.h>
#include <pwd.h>
+#include <signal.h>
#include <string.h> // strerror
#include <sys/socket.h>
#include <sys/statfs.h>
@@ -34,6 +35,7 @@ namespace blaze {
using blaze_util::die;
using blaze_util::pdie;
using std::string;
+using std::vector;
string GetOutputRoot() {
char buf[2048];
@@ -180,4 +182,69 @@ string GetDefaultHostJavabase() {
return blaze_util::Dirname(blaze_util::Dirname(javac_dir));
}
+static bool GetStartTime(const string& pid, string* start_time) {
+ string statfile = "/proc/" + pid + "/stat";
+ string statline;
+
+ if (!ReadFile(statfile, &statline)) {
+ return false;
+ }
+
+ vector<string> stat_entries = blaze_util::Split(statline, ' ');
+ if (stat_entries.size() < 22) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "Format of stat file at %s is unknown", statfile.c_str());
+ }
+
+ // Start time since startup in jiffies. This combined with the PID should be
+ // unique.
+ *start_time = stat_entries[21];
+ return true;
+}
+
+void WriteSystemSpecificProcessIdentifier(const string& server_dir) {
+ string pid = ToString(getpid());
+
+ string start_time;
+ if (!GetStartTime(pid, &start_time)) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "Cannot get start time of process %s", pid.c_str());
+ }
+
+ string start_time_file = blaze_util::JoinPath(server_dir, "server.starttime");
+ if (!WriteFile(start_time, start_time_file)) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "Cannot write start time in server dir %s", server_dir.c_str());
+ }
+}
+
+// On Linux we use a combination of PID and start time to identify the server
+// process. That is supposed to be unique unless one can start more processes
+// than there are PIDs available within a single jiffy.
+void KillServerProcess(
+ int pid, const string& output_base, const string& install_base) {
+ string start_time;
+ if (!GetStartTime(ToString(pid), &start_time)) {
+ // Cannot read PID file from /proc . Process died in the meantime?
+ return;
+ }
+
+ string recorded_start_time;
+ if (!ReadFile(blaze_util::JoinPath(output_base, "server/server.starttime"),
+ &recorded_start_time)) {
+ // start time file got deleted, but PID file didn't. This is strange. Let's
+ // not kill a random process. Note that this makes Blaze unable to kill
+ // hung servers that do not write a server.starttime file.
+ return;
+ }
+
+ if (recorded_start_time != start_time) {
+ // This is a different process.
+ fprintf(stderr, "PID %d got reused. Not killing the process.\n", pid);
+ return;
+ }
+
+ killpg(pid, SIGKILL);
+}
+
} // namespace blaze
diff --git a/src/main/cpp/blaze_util_mingw.cc b/src/main/cpp/blaze_util_mingw.cc
index 27d13fc4a9..ce0b68457b 100644
--- a/src/main/cpp/blaze_util_mingw.cc
+++ b/src/main/cpp/blaze_util_mingw.cc
@@ -628,7 +628,8 @@ bool CompareAbsolutePaths(const string& a, const string& b) {
return a_real == b_real;
}
-void KillServerProcess(int pid, const string& output_base) {
+void KillServerProcess(
+ int pid, const string& output_base, const string& install_base) {
// Not implemented yet. TerminateProcess should work.
}
diff --git a/src/main/cpp/blaze_util_platform.h b/src/main/cpp/blaze_util_platform.h
index 09dda2eab7..461323a0e6 100644
--- a/src/main/cpp/blaze_util_platform.h
+++ b/src/main/cpp/blaze_util_platform.h
@@ -118,7 +118,8 @@ uint64_t AcquireLock(const string& output_base, bool batch_mode,
void ReleaseLock(BlazeLock* blaze_lock);
// Kills a server process based on its output base and PID.
-void KillServerProcess(int pid, const string& output_base);
+void KillServerProcess(
+ int pid, const string& output_base, const string& install_base);
} // namespace blaze
diff --git a/src/main/cpp/blaze_util_posix.cc b/src/main/cpp/blaze_util_posix.cc
index 5c28bd372b..112731f7ee 100644
--- a/src/main/cpp/blaze_util_posix.cc
+++ b/src/main/cpp/blaze_util_posix.cc
@@ -126,7 +126,10 @@ bool PipeBlazeServerStartup::IsStillAlive() {
return read(this->pipe_fd, &c, 1) == -1 && errno == EAGAIN;
}
-void ExecuteDaemon(const string& exe, const std::vector<string>& args_vector,
+void WriteSystemSpecificProcessIdentifier(const string& server_dir);
+
+void ExecuteDaemon(const string& exe,
+ const std::vector<string>& args_vector,
const string& daemon_output, const string& server_dir,
BlazeServerStartup** server_startup) {
int fds[2];
@@ -161,6 +164,8 @@ void ExecuteDaemon(const string& exe, const std::vector<string>& args_vector,
pdie(0, "Cannot write PID symlink");
}
+ WriteSystemSpecificProcessIdentifier(server_dir);
+
ExecuteProgram(exe, args_vector);
pdie(0, "Cannot execute %s", exe.c_str());
}
@@ -209,10 +214,4 @@ bool CompareAbsolutePaths(const string& a, const string& b) {
return a == b;
}
-void KillServerProcess(int pid, const string& output_base) {
- // TODO(lberki): This might accidentally kill an unrelated process if the
- // server died and the PID got reused.
- killpg(pid, SIGKILL);
-}
-
} // namespace blaze.