aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Lukacs Berki <lberki@google.com>2016-04-28 13:18:54 +0000
committerGravatar Lukacs Berki <lberki@google.com>2016-04-28 13:55:34 +0000
commit415d39a2a3b5cfb6207a4d6ee27844493007d2f4 (patch)
tree1a437bbeca2c20450ba34e51c6f9e203bc6df1b0
parente33cf0fb5306ad52d0066128a85bcd3b2e3f2ac8 (diff)
Acquire the server lock even if the client uses gRPC.
This is so that only one server instance is started up if two clients are started in a workspace that doesn't have a running server yet. More work towards #930. This may break Windows in case flock() doesn't work there as expected. In anticipation of this, locking is moved to blaze_util_platform.h / blaze_util.cc . -- MOS_MIGRATED_REVID=121013078
-rw-r--r--src/main/cpp/blaze.cc134
-rw-r--r--src/main/cpp/blaze_util.cc83
-rw-r--r--src/main/cpp/blaze_util_platform.h15
3 files changed, 132 insertions, 100 deletions
diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc
index 63afc7bd60..4fdc75f157 100644
--- a/src/main/cpp/blaze.cc
+++ b/src/main/cpp/blaze.cc
@@ -103,9 +103,14 @@ static string BuildServerRequest();
// This object has two states: connected and disconnected. Most of the methods
// can only be called in one of these states.
class BlazeServer {
+ protected:
+ BlazeLock blaze_lock;
+
public:
virtual ~BlazeServer() {}
+ uint64_t AcquireLock();
+
// Connect to the server. Returns if the connection was successful. Only
// call this when this object is in disconnected state. If it returns true,
// this object will be in connected state.
@@ -130,6 +135,29 @@ class BlazeServer {
virtual void Cancel() = 0;
};
+////////////////////////////////////////////////////////////////////////
+// Global Variables
+static GlobalVariables *globals;
+static BlazeServer *blaze_server;
+
+static void InitGlobals() {
+ globals = new GlobalVariables;
+ globals->server_pid = -1;
+ globals->sigint_count = 0;
+ globals->received_signal = 0;
+ globals->startup_time = 0;
+ globals->extract_data_time = 0;
+ globals->command_wait_time = 0;
+ globals->restart_reason = NO_RESTART;
+}
+
+
+uint64_t BlazeServer::AcquireLock() {
+ return blaze::AcquireLock(
+ globals->options.output_base, globals->options.batch,
+ globals->options.block_for_lock, &blaze_lock);
+}
+
class AfUnixBlazeServer : public BlazeServer {
public:
AfUnixBlazeServer();
@@ -180,22 +208,6 @@ class GrpcBlazeServer : public BlazeServer {
////////////////////////////////////////////////////////////////////////
-// Global Variables
-static GlobalVariables *globals;
-static BlazeServer *blaze_server;
-
-static void InitGlobals() {
- globals = new GlobalVariables;
- globals->server_pid = -1;
- globals->sigint_count = 0;
- globals->received_signal = 0;
- globals->startup_time = 0;
- globals->extract_data_time = 0;
- globals->command_wait_time = 0;
- globals->restart_reason = NO_RESTART;
-}
-
-////////////////////////////////////////////////////////////////////////
// Logic
@@ -1631,85 +1643,6 @@ static void CheckEnvironment() {
setenv("LC_CTYPE", "en_US.ISO-8859-1", 1);
}
-// Create the lockfile and take an exclusive lock on a region within it. This
-// lock is inherited with the file descriptor across execve(), but not fork().
-// So in the batch case, the JVM holds the lock until exit; otherwise, this
-// program holds it until exit.
-void AcquireLock() {
- int lockfd = open(globals->lockfile.c_str(), O_CREAT|O_RDWR, 0644);
-
- if (lockfd < 0) {
- pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
- "cannot open lockfile '%s' for writing", globals->lockfile.c_str());
- }
-
- // Keep server from inheriting a useless fd if we are not in batch mode
- if (!globals->options.batch) {
- if (fcntl(lockfd, F_SETFD, FD_CLOEXEC) == -1) {
- pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
- "fcntl(F_SETFD) failed for lockfile");
- }
- }
-
- struct flock lock;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- // This doesn't really matter now, but allows us to subdivide the lock
- // later if that becomes meaningful. (Ranges beyond EOF can be locked.)
- lock.l_len = 4096;
-
- // Try to take the lock, without blocking.
- if (fcntl(lockfd, F_SETLK, &lock) == -1) {
- if (errno != EACCES && errno != EAGAIN) {
- pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
- "unexpected result from F_SETLK");
- }
-
- // We didn't get the lock. Find out who has it.
- struct flock probe = lock;
- probe.l_pid = 0;
- if (fcntl(lockfd, F_GETLK, &probe) == -1) {
- pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
- "unexpected result from F_GETLK");
- }
- if (!globals->options.block_for_lock) {
- die(blaze_exit_code::BAD_ARGV,
- "Another %s command is running (pid=%d). Exiting immediately.",
- globals->options.GetProductName().c_str(), probe.l_pid);
- }
- fprintf(stderr, "Another %s command is running (pid = %d). "
- "Waiting for it to complete...",
- globals->options.GetProductName().c_str(), probe.l_pid);
- fflush(stderr);
-
- // Take a clock sample for that start of the waiting time
- uint64_t st = MonotonicClock();
- // Try to take the lock again (blocking).
- int r;
- do {
- r = fcntl(lockfd, F_SETLKW, &lock);
- } while (r == -1 && errno == EINTR);
- fprintf(stderr, "\n");
- if (r == -1) {
- pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
- "couldn't acquire file lock");
- }
- // Take another clock sample, calculate elapsed
- uint64_t et = MonotonicClock();
- globals->command_wait_time = (et - st) / 1000000LL;
- }
-
- // Identify ourselves in the lockfile.
- ftruncate(lockfd, 0);
- const char *tty = ttyname(STDIN_FILENO); // NOLINT (single-threaded)
- string msg = "owner=" + globals->options.GetProductName() + " launcher\npid="
- + ToString(getpid()) + "\ntty=" + (tty ? tty : "") + "\n";
- // Don't bother checking for error, since it's unlikely and unimportant.
- // The contents are currently meant only for debugging.
- write(lockfd, msg.data(), msg.size());
-}
-
static void SetupStreams() {
// Line-buffer stderr, since we always flush at the end of a server
// message. This saves lots of single-char calls to write(2).
@@ -1838,11 +1771,7 @@ int main(int argc, const char *argv[]) {
? static_cast<BlazeServer *>(new GrpcBlazeServer())
: static_cast<BlazeServer *>(new AfUnixBlazeServer());
- if (globals->options.command_port < 0 || globals->options.batch) {
- // The gRPC server can handle concurrent commands just fine. However, we
- // need to be careful not to start two parallel instances in batch mode.
- AcquireLock();
- }
+ globals->command_wait_time = blaze_server->AcquireLock();
WarnFilesystemType(globals->options.output_base);
EnsureFiniteStackLimit();
@@ -2013,6 +1942,11 @@ unsigned int GrpcBlazeServer::Communicate() {
std::unique_ptr<grpc::ClientReader<command_server::RunResponse>> reader(
client_->Run(&context, request));
+ // Release the server lock because the gRPC handles concurrent clients just
+ // fine. Note that this may result in two "waiting for other client" messages
+ // (one during server startup and one emitted by the server)
+ blaze::ReleaseLock(&blaze_lock);
+
{
std::unique_lock<std::recursive_mutex> lock(cancel_thread_mutex_);
cancel_thread_action_ = NOTHING;
diff --git a/src/main/cpp/blaze_util.cc b/src/main/cpp/blaze_util.cc
index 999209153d..e40f8085b5 100644
--- a/src/main/cpp/blaze_util.cc
+++ b/src/main/cpp/blaze_util.cc
@@ -343,4 +343,87 @@ bool CheckJavaVersionIsAtLeast(const string &jvm_version,
return true;
}
+uint64_t AcquireLock(const string& output_base, bool batch_mode, bool block,
+ BlazeLock* blaze_lock) {
+ string lockfile = output_base + "/lock";
+ int lockfd = open(lockfile.c_str(), O_CREAT|O_RDWR, 0644);
+
+ if (lockfd < 0) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "cannot open lockfile '%s' for writing", lockfile.c_str());
+ }
+
+ // Keep server from inheriting a useless fd if we are not in batch mode
+ if (!batch_mode) {
+ if (fcntl(lockfd, F_SETFD, FD_CLOEXEC) == -1) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "fcntl(F_SETFD) failed for lockfile");
+ }
+ }
+
+ struct flock lock;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ // This doesn't really matter now, but allows us to subdivide the lock
+ // later if that becomes meaningful. (Ranges beyond EOF can be locked.)
+ lock.l_len = 4096;
+
+ uint64_t wait_time = 0;
+ // Try to take the lock, without blocking.
+ if (fcntl(lockfd, F_SETLK, &lock) == -1) {
+ if (errno != EACCES && errno != EAGAIN) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "unexpected result from F_SETLK");
+ }
+
+ // We didn't get the lock. Find out who has it.
+ struct flock probe = lock;
+ probe.l_pid = 0;
+ if (fcntl(lockfd, F_GETLK, &probe) == -1) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "unexpected result from F_GETLK");
+ }
+ if (!block) {
+ die(blaze_exit_code::BAD_ARGV,
+ "Another command is running (pid=%d). Exiting immediately.",
+ probe.l_pid);
+ }
+ fprintf(stderr, "Another command is running (pid = %d). "
+ "Waiting for it to complete...", probe.l_pid);
+ fflush(stderr);
+
+ // Take a clock sample for that start of the waiting time
+ uint64_t st = MonotonicClock();
+ // Try to take the lock again (blocking).
+ int r;
+ do {
+ r = fcntl(lockfd, F_SETLKW, &lock);
+ } while (r == -1 && errno == EINTR);
+ fprintf(stderr, "\n");
+ if (r == -1) {
+ pdie(blaze_exit_code::LOCAL_ENVIRONMENTAL_ERROR,
+ "couldn't acquire file lock");
+ }
+ // Take another clock sample, calculate elapsed
+ uint64_t et = MonotonicClock();
+ wait_time = (et - st) / 1000000LL;
+ }
+
+ // Identify ourselves in the lockfile.
+ ftruncate(lockfd, 0);
+ const char *tty = ttyname(STDIN_FILENO); // NOLINT (single-threaded)
+ string msg = "owner=launcher\npid="
+ + ToString(getpid()) + "\ntty=" + (tty ? tty : "") + "\n";
+ // Don't bother checking for error, since it's unlikely and unimportant.
+ // The contents are currently meant only for debugging.
+ write(lockfd, msg.data(), msg.size());
+ blaze_lock->lockfd = lockfd;
+ return wait_time;
+}
+
+void ReleaseLock(BlazeLock* blaze_lock) {
+ close(blaze_lock->lockfd);
+}
+
} // namespace blaze
diff --git a/src/main/cpp/blaze_util_platform.h b/src/main/cpp/blaze_util_platform.h
index a2a865c26b..4afcd9dfbc 100644
--- a/src/main/cpp/blaze_util_platform.h
+++ b/src/main/cpp/blaze_util_platform.h
@@ -96,6 +96,21 @@ bool ReadDirectorySymlink(const string &symlink, string *result);
// (Windows-style) and "/c/foo/bar" (msys2 style). Returns if the paths are
// equal.
bool CompareAbsolutePaths(const string& a, const string& b);
+
+struct BlazeLock {
+ int lockfd;
+};
+
+// Acquires a lock on the output base. Exits if the lock cannot be acquired.
+// Sets ``lock`` to a value that can subsequently be passed to ReleaseLock().
+// Returns the number of milliseconds spent with waiting for the lock.
+uint64_t AcquireLock(const string& output_base, bool batch_mode,
+ bool block, BlazeLock* blaze_lock);
+
+// Releases the lock on the output base. In case of an error, continues as
+// usual.
+void ReleaseLock(BlazeLock* blaze_lock);
+
} // namespace blaze
#endif // BAZEL_SRC_MAIN_CPP_BLAZE_UTIL_PLATFORM_H_