aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/cpp/blaze_util_linux.cc
diff options
context:
space:
mode:
authorGravatar Lukacs Berki <lberki@google.com>2016-05-02 11:47:40 +0000
committerGravatar Damien Martin-Guillerez <dmarting@google.com>2016-05-02 11:52:39 +0000
commit43e620bfcd20ea24f079b0aec5eacf2b0f6c8a4e (patch)
tree17512579b288f58cf1f3f101e5c44d225697c97f /src/main/cpp/blaze_util_linux.cc
parent29c4a950228a11201667c947f9843644e6a43145 (diff)
Linux-specific: check if the stray server process we are about to kill -9 is actually a server process.
This should be implemented for other OSes, too, but OS X seems to lack a procfs and it's not clear how to discover anything about a process based on its PID and of course, Windows is a wholly different cup of tea. More work for #930. -- MOS_MIGRATED_REVID=121262673
Diffstat (limited to 'src/main/cpp/blaze_util_linux.cc')
-rw-r--r--src/main/cpp/blaze_util_linux.cc67
1 files changed, 67 insertions, 0 deletions
diff --git a/src/main/cpp/blaze_util_linux.cc b/src/main/cpp/blaze_util_linux.cc
index 1f1144c426..4be6fd3e5a 100644
--- a/src/main/cpp/blaze_util_linux.cc
+++ b/src/main/cpp/blaze_util_linux.cc
@@ -15,6 +15,7 @@
#include <errno.h> // errno, ENAMETOOLONG
#include <limits.h>
#include <pwd.h>
+#include <signal.h>
#include <string.h> // strerror
#include <sys/socket.h>
#include <sys/statfs.h>
@@ -34,6 +35,7 @@ namespace blaze {
using blaze_util::die;
using blaze_util::pdie;
using std::string;
+using std::vector;
string GetOutputRoot() {
char buf[2048];
@@ -180,4 +182,69 @@ string GetDefaultHostJavabase() {
return blaze_util::Dirname(blaze_util::Dirname(javac_dir));
}
+static bool GetStartTime(const string& pid, string* start_time) {
+ string statfile = "/proc/" + pid + "/stat";
+ string statline;
+
+ if (!ReadFile(statfile, &statline)) {
+ return false;
+ }
+
+ vector<string> stat_entries = blaze_util::Split(statline, ' ');
+ if (stat_entries.size() < 22) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "Format of stat file at %s is unknown", statfile.c_str());
+ }
+
+ // Start time since startup in jiffies. This combined with the PID should be
+ // unique.
+ *start_time = stat_entries[21];
+ return true;
+}
+
+void WriteSystemSpecificProcessIdentifier(const string& server_dir) {
+ string pid = ToString(getpid());
+
+ string start_time;
+ if (!GetStartTime(pid, &start_time)) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "Cannot get start time of process %s", pid.c_str());
+ }
+
+ string start_time_file = blaze_util::JoinPath(server_dir, "server.starttime");
+ if (!WriteFile(start_time, start_time_file)) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "Cannot write start time in server dir %s", server_dir.c_str());
+ }
+}
+
+// On Linux we use a combination of PID and start time to identify the server
+// process. That is supposed to be unique unless one can start more processes
+// than there are PIDs available within a single jiffy.
+void KillServerProcess(
+ int pid, const string& output_base, const string& install_base) {
+ string start_time;
+ if (!GetStartTime(ToString(pid), &start_time)) {
+ // Cannot read PID file from /proc . Process died in the meantime?
+ return;
+ }
+
+ string recorded_start_time;
+ if (!ReadFile(blaze_util::JoinPath(output_base, "server/server.starttime"),
+ &recorded_start_time)) {
+ // start time file got deleted, but PID file didn't. This is strange. Let's
+ // not kill a random process. Note that this makes Blaze unable to kill
+ // hung servers that do not write a server.starttime file.
+ return;
+ }
+
+ if (recorded_start_time != start_time) {
+ // This is a different process.
+ fprintf(stderr, "PID %d got reused. Not killing the process.\n", pid);
+ return;
+ }
+
+ killpg(pid, SIGKILL);
+}
+
} // namespace blaze