From 43e620bfcd20ea24f079b0aec5eacf2b0f6c8a4e Mon Sep 17 00:00:00 2001 From: Lukacs Berki Date: Mon, 2 May 2016 11:47:40 +0000 Subject: Linux-specific: check if the stray server process we are about to kill -9 is actually a server process. This should be implemented for other OSes, too, but OS X seems to lack a procfs and it's not clear how to discover anything about a process based on its PID and of course, Windows is a wholly different cup of tea. More work for #930. -- MOS_MIGRATED_REVID=121262673 --- src/main/cpp/blaze.cc | 7 ++-- src/main/cpp/blaze_util_darwin.cc | 11 +++++++ src/main/cpp/blaze_util_freebsd.cc | 10 ++++++ src/main/cpp/blaze_util_linux.cc | 67 ++++++++++++++++++++++++++++++++++++++ src/main/cpp/blaze_util_mingw.cc | 3 +- src/main/cpp/blaze_util_platform.h | 3 +- src/main/cpp/blaze_util_posix.cc | 13 ++++---- 7 files changed, 102 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc index 5a696583b2..f30b3076bb 100644 --- a/src/main/cpp/blaze.cc +++ b/src/main/cpp/blaze.cc @@ -971,7 +971,6 @@ static void WriteFileToStreamOrDie(FILE *stream, const char *file_name) { static int GetServerPid(const string &server_dir) { // Note: there is no race here on startup since the server creates // the pid file strictly before it binds the socket. - char buf[33]; // The server writes a file, but we need to handle old servers that still @@ -1026,7 +1025,8 @@ static void StartServerAndConnect(BlazeServer *server) { fprintf(stderr, "Found non-responsive server process (pid=%d). Killing it.\n", server_pid); - KillServerProcess(server_pid, globals->options.output_base); + KillServerProcess(server_pid, globals->options.output_base, + globals->options.install_base); } SetScheduling(globals->options.batch_cpu_scheduling, @@ -2014,7 +2014,8 @@ void GrpcBlazeServer::KillRunningServer() { while (reader->Read(&response)) {} // Kill the server process for good measure. - KillServerProcess(globals->server_pid, globals->options.output_base); + KillServerProcess(globals->server_pid, globals->options.output_base, + globals->options.install_base); connected_ = false; } diff --git a/src/main/cpp/blaze_util_darwin.cc b/src/main/cpp/blaze_util_darwin.cc index 26db3d1e86..3487640e97 100644 --- a/src/main/cpp/blaze_util_darwin.cc +++ b/src/main/cpp/blaze_util_darwin.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -120,4 +121,14 @@ string GetDefaultHostJavabase() { return javabase.substr(0, javabase.length()-1); } +void WriteSystemSpecificProcessIdentifier(const string& server_dir) { +} + +void KillServerProcess( + int pid, const string& output_base, const string& install_base) { + // TODO(lberki): This might accidentally kill an unrelated process if the + // server died and the PID got reused. + killpg(pid, SIGKILL); +} + } // namespace blaze. diff --git a/src/main/cpp/blaze_util_freebsd.cc b/src/main/cpp/blaze_util_freebsd.cc index 0b87a10299..448f525e13 100644 --- a/src/main/cpp/blaze_util_freebsd.cc +++ b/src/main/cpp/blaze_util_freebsd.cc @@ -154,4 +154,14 @@ string GetDefaultHostJavabase() { return "/usr/local/openjdk8"; } +void WriteSystemSpecificProcessIdentifier(const string& server_dir) { +} + +void KillServerProcess( + int pid, const string& output_base, const string& install_base) { + // TODO(lberki): This might accidentally kill an unrelated process if the + // server died and the PID got reused. + killpg(pid, SIGKILL); +} + } // namespace blaze diff --git a/src/main/cpp/blaze_util_linux.cc b/src/main/cpp/blaze_util_linux.cc index 1f1144c426..4be6fd3e5a 100644 --- a/src/main/cpp/blaze_util_linux.cc +++ b/src/main/cpp/blaze_util_linux.cc @@ -15,6 +15,7 @@ #include // errno, ENAMETOOLONG #include #include +#include #include // strerror #include #include @@ -34,6 +35,7 @@ namespace blaze { using blaze_util::die; using blaze_util::pdie; using std::string; +using std::vector; string GetOutputRoot() { char buf[2048]; @@ -180,4 +182,69 @@ string GetDefaultHostJavabase() { return blaze_util::Dirname(blaze_util::Dirname(javac_dir)); } +static bool GetStartTime(const string& pid, string* start_time) { + string statfile = "/proc/" + pid + "/stat"; + string statline; + + if (!ReadFile(statfile, &statline)) { + return false; + } + + vector stat_entries = blaze_util::Split(statline, ' '); + if (stat_entries.size() < 22) { + pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, + "Format of stat file at %s is unknown", statfile.c_str()); + } + + // Start time since startup in jiffies. This combined with the PID should be + // unique. + *start_time = stat_entries[21]; + return true; +} + +void WriteSystemSpecificProcessIdentifier(const string& server_dir) { + string pid = ToString(getpid()); + + string start_time; + if (!GetStartTime(pid, &start_time)) { + pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, + "Cannot get start time of process %s", pid.c_str()); + } + + string start_time_file = blaze_util::JoinPath(server_dir, "server.starttime"); + if (!WriteFile(start_time, start_time_file)) { + pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, + "Cannot write start time in server dir %s", server_dir.c_str()); + } +} + +// On Linux we use a combination of PID and start time to identify the server +// process. That is supposed to be unique unless one can start more processes +// than there are PIDs available within a single jiffy. +void KillServerProcess( + int pid, const string& output_base, const string& install_base) { + string start_time; + if (!GetStartTime(ToString(pid), &start_time)) { + // Cannot read PID file from /proc . Process died in the meantime? + return; + } + + string recorded_start_time; + if (!ReadFile(blaze_util::JoinPath(output_base, "server/server.starttime"), + &recorded_start_time)) { + // start time file got deleted, but PID file didn't. This is strange. Let's + // not kill a random process. Note that this makes Blaze unable to kill + // hung servers that do not write a server.starttime file. + return; + } + + if (recorded_start_time != start_time) { + // This is a different process. + fprintf(stderr, "PID %d got reused. Not killing the process.\n", pid); + return; + } + + killpg(pid, SIGKILL); +} + } // namespace blaze diff --git a/src/main/cpp/blaze_util_mingw.cc b/src/main/cpp/blaze_util_mingw.cc index 27d13fc4a9..ce0b68457b 100644 --- a/src/main/cpp/blaze_util_mingw.cc +++ b/src/main/cpp/blaze_util_mingw.cc @@ -628,7 +628,8 @@ bool CompareAbsolutePaths(const string& a, const string& b) { return a_real == b_real; } -void KillServerProcess(int pid, const string& output_base) { +void KillServerProcess( + int pid, const string& output_base, const string& install_base) { // Not implemented yet. TerminateProcess should work. } diff --git a/src/main/cpp/blaze_util_platform.h b/src/main/cpp/blaze_util_platform.h index 09dda2eab7..461323a0e6 100644 --- a/src/main/cpp/blaze_util_platform.h +++ b/src/main/cpp/blaze_util_platform.h @@ -118,7 +118,8 @@ uint64_t AcquireLock(const string& output_base, bool batch_mode, void ReleaseLock(BlazeLock* blaze_lock); // Kills a server process based on its output base and PID. -void KillServerProcess(int pid, const string& output_base); +void KillServerProcess( + int pid, const string& output_base, const string& install_base); } // namespace blaze diff --git a/src/main/cpp/blaze_util_posix.cc b/src/main/cpp/blaze_util_posix.cc index 5c28bd372b..112731f7ee 100644 --- a/src/main/cpp/blaze_util_posix.cc +++ b/src/main/cpp/blaze_util_posix.cc @@ -126,7 +126,10 @@ bool PipeBlazeServerStartup::IsStillAlive() { return read(this->pipe_fd, &c, 1) == -1 && errno == EAGAIN; } -void ExecuteDaemon(const string& exe, const std::vector& args_vector, +void WriteSystemSpecificProcessIdentifier(const string& server_dir); + +void ExecuteDaemon(const string& exe, + const std::vector& args_vector, const string& daemon_output, const string& server_dir, BlazeServerStartup** server_startup) { int fds[2]; @@ -161,6 +164,8 @@ void ExecuteDaemon(const string& exe, const std::vector& args_vector, pdie(0, "Cannot write PID symlink"); } + WriteSystemSpecificProcessIdentifier(server_dir); + ExecuteProgram(exe, args_vector); pdie(0, "Cannot execute %s", exe.c_str()); } @@ -209,10 +214,4 @@ bool CompareAbsolutePaths(const string& a, const string& b) { return a == b; } -void KillServerProcess(int pid, const string& output_base) { - // TODO(lberki): This might accidentally kill an unrelated process if the - // server died and the PID got reused. - killpg(pid, SIGKILL); -} - } // namespace blaze. -- cgit v1.2.3