aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/server
diff options
context:
space:
mode:
authorGravatar Dmitry Lomov <dslomov@google.com>2017-02-16 17:00:59 +0000
committerGravatar Dmitry Lomov <dslomov@google.com>2017-02-16 17:03:12 +0000
commitbef19d10b1262e610569b4f604635eb0bea07e01 (patch)
tree7026cd938aebeca54ee6e9965922b155866fad8c /src/main/java/com/google/devtools/build/lib/server
parent4b73e972d909bcd533f2f9940f95a00b9b73bdde (diff)
*** Reason for rollback *** Bisected by dmarting as a root cause of TF breakage: http://ci.bazel.io/job/TensorFlow/672/BAZEL_VERSION=HEAD,PLATFORM_NAME=linux-x86_64/console *** Original change description *** Reinstate idleness checks where the server self-terminates when it's idle and there is either too much memory pressure or the workspace directory is gone. Arguably, it should kill itself when the workspace directory is gone regardless of whether it's idle or not, but let's first get us back to a known good state, then we can think about improvements. -- PiperOrigin-RevId: 147726386 MOS_MIGRATED_REVID=147726386
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/server')
-rw-r--r--src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java106
-rw-r--r--src/main/java/com/google/devtools/build/lib/server/IdleServerTasks.java28
2 files changed, 32 insertions, 102 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java b/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java
index d6bcbb253c..e1f888d521 100644
--- a/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java
+++ b/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java
@@ -108,26 +108,17 @@ public class GrpcServerImpl implements RPCServer {
private static final long NANOSECONDS_IN_MS = TimeUnit.MILLISECONDS.toNanos(1);
- private static final long NANOS_PER_IDLE_CHECK =
- TimeUnit.NANOSECONDS.convert(5, TimeUnit.SECONDS);
-
-
private class RunningCommand implements AutoCloseable {
private final Thread thread;
private final String id;
- private RunningCommand() throws InterruptedException {
+ private RunningCommand() {
thread = Thread.currentThread();
id = UUID.randomUUID().toString();
synchronized (runningCommands) {
if (runningCommands.isEmpty()) {
busy();
}
-
- if (shuttingDown) {
- throw new InterruptedException();
- }
-
runningCommands.put(id, this);
runningCommands.notify();
}
@@ -453,56 +444,21 @@ public class GrpcServerImpl implements RPCServer {
}
}
- // The synchronized block is here so that if the "PID file deleted" timer or the idle shutdown
- // mechanism kicks in during a regular shutdown, they don't race.
- @VisibleForTesting // productionVisibility = Visibility.PRIVATE
- void signalShutdown() {
- synchronized (runningCommands) {
- shuttingDown = true;
- server.shutdown();
- }
- }
-
/**
- * A thread that shuts the server down under the following conditions:
- * <ul>
- * <li>The PID file changes (in this case, *very* quickly)</li>
- * <li>The workspace directory is deleted</li>
- * <li>There is too much memory pressure on the host</li>
- * </ul>
+ * A thread that watches if the PID file changes and shuts down the server immediately if so.
*/
- private class ShutdownWatcherThread extends Thread {
- private long lastIdleCheckNanos;
+ private class PidFileWatcherThread extends Thread {
+ private boolean shuttingDown = false;
- private ShutdownWatcherThread() {
- super("grpc-server-shutdown-watcher");
+ private PidFileWatcherThread() {
+ super("pid-file-watcher");
setDaemon(true);
}
- private void doIdleChecksMaybe() {
- synchronized (runningCommands) {
- if (!runningCommands.isEmpty()) {
- lastIdleCheckNanos = -1;
- return;
- }
-
- long currentNanos = BlazeClock.nanoTime();
- if (lastIdleCheckNanos == -1) {
- lastIdleCheckNanos = currentNanos;
- return;
- }
-
- if (currentNanos - lastIdleCheckNanos < NANOS_PER_IDLE_CHECK) {
- return;
- }
-
- if (!idleServerTasks.continueProcessing()) {
- signalShutdown();
- server.shutdown();
- }
-
- lastIdleCheckNanos = currentNanos;
- }
+ // The synchronized block is here so that if the "PID file deleted" timer kicks in during a
+ // regular shutdown, they don't race.
+ private synchronized void signalShutdown() {
+ shuttingDown = true;
}
@Override
@@ -518,12 +474,8 @@ public class GrpcServerImpl implements RPCServer {
// Handled by virtue of ok not being set to true
}
- if (ok) {
- doIdleChecksMaybe();
- }
-
if (!ok) {
- synchronized (ShutdownWatcherThread.this) {
+ synchronized (PidFileWatcherThread.this) {
if (shuttingDown) {
log.warning("PID file deleted or overwritten but shutdown is already in progress");
break;
@@ -559,16 +511,14 @@ public class GrpcServerImpl implements RPCServer {
private final String responseCookie;
private final AtomicLong interruptCounter = new AtomicLong(0);
private final int maxIdleSeconds;
- private final ShutdownWatcherThread shutdownWatcherThread;
+ private final PidFileWatcherThread pidFileWatcherThread;
private final Path pidFile;
private final String pidInFile;
private Server server;
private IdleServerTasks idleServerTasks;
private final int port;
- private InetSocketAddress address;
- private boolean serving;
- private boolean shuttingDown = false;
+ boolean serving;
public GrpcServerImpl(CommandExecutor commandExecutor, Clock clock, int port,
Path workspace, Path serverDirectory, int maxIdleSeconds) throws IOException {
@@ -599,22 +549,12 @@ public class GrpcServerImpl implements RPCServer {
requestCookie = generateCookie(random, 16);
responseCookie = generateCookie(random, 16);
- shutdownWatcherThread = new ShutdownWatcherThread();
- shutdownWatcherThread.start();
+ pidFileWatcherThread = new PidFileWatcherThread();
+ pidFileWatcherThread.start();
idleServerTasks = new IdleServerTasks(workspace);
idleServerTasks.idle();
}
- @VisibleForTesting // productionVisibility = Visibility.PRIVATE
- String getRequestCookie() {
- return requestCookie;
- }
-
- @VisibleForTesting // productionVisibility = Visibility.PRIVATE
- InetSocketAddress getAddress() {
- return address;
- }
-
private void idle() {
Preconditions.checkState(idleServerTasks == null);
idleServerTasks = new IdleServerTasks(workspace);
@@ -702,7 +642,7 @@ public class GrpcServerImpl implements RPCServer {
}
}
- signalShutdown();
+ server.shutdown();
}
/**
@@ -728,7 +668,7 @@ public class GrpcServerImpl implements RPCServer {
@Override
public void prepareForAbruptShutdown() {
disableShutdownHooks();
- signalShutdown();
+ pidFileWatcherThread.signalShutdown();
}
@Override
@@ -783,7 +723,7 @@ public class GrpcServerImpl implements RPCServer {
timeoutThread.start();
}
serving = true;
- this.address = new InetSocketAddress(address.getAddress(), server.getPort());
+
writeServerFile(
PORT_FILE, InetAddresses.toUriString(address.getAddress()) + ":" + server.getPort());
writeServerFile(REQUEST_COOKIE_FILE, requestCookie);
@@ -843,8 +783,7 @@ public class GrpcServerImpl implements RPCServer {
log.severe(err.toString());
}
- @VisibleForTesting // productionVisibility = Visibility.PRIVATE
- void executeCommand(
+ private void executeCommand(
RunRequest request, StreamObserver<RunResponse> observer, GrpcSink sink) {
sink.setCommandThread(Thread.currentThread());
@@ -939,7 +878,8 @@ public class GrpcServerImpl implements RPCServer {
}
if (commandExecutor.shutdown()) {
- signalShutdown();
+ pidFileWatcherThread.signalShutdown();
+ server.shutdown();
}
}
@@ -972,8 +912,6 @@ public class GrpcServerImpl implements RPCServer {
streamObserver.onNext(response.build());
streamObserver.onCompleted();
- } catch (InterruptedException e) {
- // Ignore, we are shutting down anyway
}
}
@@ -1021,8 +959,6 @@ public class GrpcServerImpl implements RPCServer {
log.info("Client cancelled RPC of cancellation request for "
+ request.getCommandId());
}
- } catch (InterruptedException e) {
- // Ignore, we are shutting down anyway
}
}
};
diff --git a/src/main/java/com/google/devtools/build/lib/server/IdleServerTasks.java b/src/main/java/com/google/devtools/build/lib/server/IdleServerTasks.java
index efcbc3f6eb..f37e97ae9f 100644
--- a/src/main/java/com/google/devtools/build/lib/server/IdleServerTasks.java
+++ b/src/main/java/com/google/devtools/build/lib/server/IdleServerTasks.java
@@ -16,7 +16,6 @@ package com.google.devtools.build.lib.server;
import com.google.devtools.build.lib.profiler.AutoProfiler;
import com.google.devtools.build.lib.unix.ProcMeminfoParser;
-import com.google.devtools.build.lib.util.BlazeClock;
import com.google.devtools.build.lib.util.LoggingUtil;
import com.google.devtools.build.lib.util.Preconditions;
import com.google.devtools.build.lib.vfs.FileStatus;
@@ -36,12 +35,11 @@ import javax.annotation.Nullable;
*/
class IdleServerTasks {
- private long idleStart;
private final Path workspaceDir;
private final ScheduledThreadPoolExecutor executor;
- private static final Logger log = Logger.getLogger(IdleServerTasks.class.getName());
+ private static final Logger LOG = Logger.getLogger(IdleServerTasks.class.getName());
- private static final long FIVE_MIN_NANOS = 1000L * 1000 * 1000 * 60 * 5;
+ private static final long FIVE_MIN_MILLIS = 1000 * 60 * 5;
/**
* Must be called from the main thread.
@@ -58,7 +56,6 @@ class IdleServerTasks {
public void idle() {
Preconditions.checkState(!executor.isShutdown());
- idleStart = BlazeClock.nanoTime();
// Do a GC cycle while the server is idle.
@SuppressWarnings("unused")
Future<?> possiblyIgnoredError =
@@ -66,7 +63,7 @@ class IdleServerTasks {
new Runnable() {
@Override
public void run() {
- try (AutoProfiler p = AutoProfiler.logged("Idle GC", log)) {
+ try (AutoProfiler p = AutoProfiler.logged("Idle GC", LOG)) {
System.gc();
}
}
@@ -108,8 +105,8 @@ class IdleServerTasks {
* Return true iff the server should continue processing requests.
* Called from the main thread, so it should return quickly.
*/
- public boolean continueProcessing() {
- if (!memoryHeuristic()) {
+ public boolean continueProcessing(long idleMillis) {
+ if (!memoryHeuristic(idleMillis)) {
return false;
}
if (workspaceDir == null) {
@@ -127,10 +124,8 @@ class IdleServerTasks {
return stat != null && stat.isDirectory();
}
- private boolean memoryHeuristic() {
- Preconditions.checkState(!executor.isShutdown());
- long idleNanos = BlazeClock.nanoTime() - idleStart;
- if (idleNanos < FIVE_MIN_NANOS) {
+ private boolean memoryHeuristic(long idleMillis) {
+ if (idleMillis < FIVE_MIN_MILLIS) {
// Don't check memory health until after five minutes.
return true;
}
@@ -139,12 +134,11 @@ class IdleServerTasks {
try {
memInfo = new ProcMeminfoParser();
} catch (IOException e) {
- log.info("Could not process /proc/meminfo: " + e);
+ LOG.info("Could not process /proc/meminfo: " + e);
return true;
}
- long totalPhysical;
- long totalFree;
+ long totalPhysical, totalFree;
try {
totalPhysical = memInfo.getTotalKb();
totalFree = memInfo.getFreeRamKb(); // See method javadoc.
@@ -159,8 +153,8 @@ class IdleServerTasks {
// If the system as a whole is low on memory, let this server die.
if (fractionFree < .1) {
- log.info("Terminating due to memory constraints");
- log.info(String.format("Total physical:%d\nTotal free: %d\n",
+ LOG.info("Terminating due to memory constraints");
+ LOG.info(String.format("Total physical:%d\nTotal free: %d\n",
totalPhysical, totalFree));
return false;
}