diff options
author | Lukacs Berki <lberki@google.com> | 2016-05-02 11:47:40 +0000 |
---|---|---|
committer | Damien Martin-Guillerez <dmarting@google.com> | 2016-05-02 11:52:39 +0000 |
commit | 43e620bfcd20ea24f079b0aec5eacf2b0f6c8a4e (patch) | |
tree | 17512579b288f58cf1f3f101e5c44d225697c97f /src/main/cpp/blaze_util_linux.cc | |
parent | 29c4a950228a11201667c947f9843644e6a43145 (diff) |
Linux-specific: check if the stray server process we are about to kill -9 is actually a server process.
This should be implemented for other OSes, too, but OS X seems to lack a procfs and it's not clear how to discover anything about a process based on its PID and of course, Windows is a wholly different cup of tea.
More work for #930.
--
MOS_MIGRATED_REVID=121262673
Diffstat (limited to 'src/main/cpp/blaze_util_linux.cc')
-rw-r--r-- | src/main/cpp/blaze_util_linux.cc | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/main/cpp/blaze_util_linux.cc b/src/main/cpp/blaze_util_linux.cc index 1f1144c426..4be6fd3e5a 100644 --- a/src/main/cpp/blaze_util_linux.cc +++ b/src/main/cpp/blaze_util_linux.cc @@ -15,6 +15,7 @@ #include <errno.h> // errno, ENAMETOOLONG #include <limits.h> #include <pwd.h> +#include <signal.h> #include <string.h> // strerror #include <sys/socket.h> #include <sys/statfs.h> @@ -34,6 +35,7 @@ namespace blaze { using blaze_util::die; using blaze_util::pdie; using std::string; +using std::vector; string GetOutputRoot() { char buf[2048]; @@ -180,4 +182,69 @@ string GetDefaultHostJavabase() { return blaze_util::Dirname(blaze_util::Dirname(javac_dir)); } +static bool GetStartTime(const string& pid, string* start_time) { + string statfile = "/proc/" + pid + "/stat"; + string statline; + + if (!ReadFile(statfile, &statline)) { + return false; + } + + vector<string> stat_entries = blaze_util::Split(statline, ' '); + if (stat_entries.size() < 22) { + pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, + "Format of stat file at %s is unknown", statfile.c_str()); + } + + // Start time since startup in jiffies. This combined with the PID should be + // unique. + *start_time = stat_entries[21]; + return true; +} + +void WriteSystemSpecificProcessIdentifier(const string& server_dir) { + string pid = ToString(getpid()); + + string start_time; + if (!GetStartTime(pid, &start_time)) { + pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, + "Cannot get start time of process %s", pid.c_str()); + } + + string start_time_file = blaze_util::JoinPath(server_dir, "server.starttime"); + if (!WriteFile(start_time, start_time_file)) { + pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR, + "Cannot write start time in server dir %s", server_dir.c_str()); + } +} + +// On Linux we use a combination of PID and start time to identify the server +// process. That is supposed to be unique unless one can start more processes +// than there are PIDs available within a single jiffy. +void KillServerProcess( + int pid, const string& output_base, const string& install_base) { + string start_time; + if (!GetStartTime(ToString(pid), &start_time)) { + // Cannot read PID file from /proc . Process died in the meantime? + return; + } + + string recorded_start_time; + if (!ReadFile(blaze_util::JoinPath(output_base, "server/server.starttime"), + &recorded_start_time)) { + // start time file got deleted, but PID file didn't. This is strange. Let's + // not kill a random process. Note that this makes Blaze unable to kill + // hung servers that do not write a server.starttime file. + return; + } + + if (recorded_start_time != start_time) { + // This is a different process. + fprintf(stderr, "PID %d got reused. Not killing the process.\n", pid); + return; + } + + killpg(pid, SIGKILL); +} + } // namespace blaze |