From 275ae45b1228bdd0f912c4fbd634b29ba4180383 Mon Sep 17 00:00:00 2001 From: pcloudy Date: Wed, 15 Nov 2017 07:11:08 -0800 Subject: Automated rollback of commit 4869c4e17d5b1410070a1570f3244148d8f97b5d. *** Reason for rollback *** Causing Bazel server to crash when running bazel clean --expunge https://github.com/bazelbuild/bazel/issues/3956 *** Original change description *** Delayed rollforward of commit 8fb311b4dced234b2f799c16c7d08148619f4087. This was rolled back due to Tensorflow breakage but the patch I exported to gerrit (https://bazel-review.googlesource.com/c/bazel/+/18590) passed Tensorflow (https://ci.bazel.io/job/bazel/job/presubmit/52/Downstream_projects/). Confirmed with jcater@ that the "newly failing" projects in the Global Tests are known issues. I think we can check this in now. Additionally I had attempted to reproduce any tensorflow issues with this by building and testing Tensor... *** ROLLBACK_OF=172361085 RELNOTES:None PiperOrigin-RevId: 175821671 --- .../devtools/build/lib/server/GrpcServerImpl.java | 117 ++++++--------------- .../devtools/build/lib/server/IdleServerTasks.java | 19 ++-- 2 files changed, 37 insertions(+), 99 deletions(-) (limited to 'src/main/java/com/google/devtools/build/lib/server') diff --git a/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java b/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java index 35c2002d87..c412131ead 100644 --- a/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java +++ b/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java @@ -116,25 +116,17 @@ public class GrpcServerImpl implements RPCServer { private static final long NANOSECONDS_IN_MS = TimeUnit.MILLISECONDS.toNanos(1); - private static final long NANOS_PER_IDLE_CHECK = - TimeUnit.NANOSECONDS.convert(5, TimeUnit.SECONDS); - private class RunningCommand implements AutoCloseable { private final Thread thread; private final String id; - private RunningCommand() throws InterruptedException { + private RunningCommand() { thread = Thread.currentThread(); id = UUID.randomUUID().toString(); synchronized (runningCommands) { if (runningCommands.isEmpty()) { busy(); } - - if (shuttingDown) { - throw new InterruptedException(); - } - runningCommands.put(id, this); runningCommands.notify(); } @@ -451,57 +443,21 @@ public class GrpcServerImpl implements RPCServer { } } - // The synchronized block is here so that if the "PID file deleted" timer or the idle shutdown - // mechanism kicks in during a regular shutdown, they don't race. - @VisibleForTesting // productionVisibility = Visibility.PRIVATE - void signalShutdown() { - synchronized (runningCommands) { - shuttingDown = true; - server.shutdown(); - } - } - /** - * A thread that shuts the server down under the following conditions: - * - * + * A thread that watches if the PID file changes and shuts down the server immediately if so. */ - private class ShutdownWatcherThread extends Thread { - private long lastIdleCheckNanos; + private class PidFileWatcherThread extends Thread { + private boolean shuttingDown = false; - private ShutdownWatcherThread() { - super("grpc-server-shutdown-watcher"); + private PidFileWatcherThread() { + super("pid-file-watcher"); setDaemon(true); } - private void doIdleChecksMaybe() { - synchronized (runningCommands) { - if (!runningCommands.isEmpty()) { - lastIdleCheckNanos = -1; - return; - } - - long currentNanos = BlazeClock.nanoTime(); - if (lastIdleCheckNanos == -1) { - lastIdleCheckNanos = currentNanos; - return; - } - - if (currentNanos - lastIdleCheckNanos < NANOS_PER_IDLE_CHECK) { - return; - } - - if (!idleServerTasks.continueProcessing()) { - signalShutdown(); - server.shutdown(); - } - - lastIdleCheckNanos = currentNanos; - } + // The synchronized block is here so that if the "PID file deleted" timer kicks in during a + // regular shutdown, they don't race. + private synchronized void signalShutdown() { + shuttingDown = true; } @Override @@ -517,12 +473,8 @@ public class GrpcServerImpl implements RPCServer { // Handled by virtue of ok not being set to true } - if (ok) { - doIdleChecksMaybe(); - } - if (!ok) { - synchronized (ShutdownWatcherThread.this) { + synchronized (PidFileWatcherThread.this) { if (shuttingDown) { logger.warning("PID file deleted or overwritten but shutdown is already in progress"); break; @@ -558,7 +510,7 @@ public class GrpcServerImpl implements RPCServer { private final String responseCookie; private final AtomicLong interruptCounter = new AtomicLong(0); private final int maxIdleSeconds; - private final ShutdownWatcherThread shutdownWatcherThread; + private final PidFileWatcherThread pidFileWatcherThread; private final Path pidFile; private final String pidInFile; private final List filesToDeleteAtExit = new ArrayList<>(); @@ -566,13 +518,16 @@ public class GrpcServerImpl implements RPCServer { private Server server; private IdleServerTasks idleServerTasks; - private InetSocketAddress address; - private boolean serving; - private boolean shuttingDown = false; + boolean serving; public GrpcServerImpl(CommandExecutor commandExecutor, Clock clock, int port, Path workspace, Path serverDirectory, int maxIdleSeconds) throws IOException { - Runtime.getRuntime().addShutdownHook(new Thread(() -> shutdownHook())); + Runtime.getRuntime().addShutdownHook(new Thread() { + @Override + public void run() { + shutdownHook(); + } + }); // server.pid was written in the C++ launcher after fork() but before exec() . // The client only accesses the pid file after connecting to the socket @@ -601,22 +556,12 @@ public class GrpcServerImpl implements RPCServer { requestCookie = generateCookie(random, 16); responseCookie = generateCookie(random, 16); - shutdownWatcherThread = new ShutdownWatcherThread(); - shutdownWatcherThread.start(); + pidFileWatcherThread = new PidFileWatcherThread(); + pidFileWatcherThread.start(); idleServerTasks = new IdleServerTasks(workspace); idleServerTasks.idle(); } - @VisibleForTesting // productionVisibility = Visibility.PRIVATE - String getRequestCookie() { - return requestCookie; - } - - @VisibleForTesting // productionVisibility = Visibility.PRIVATE - InetSocketAddress getAddress() { - return address; - } - private void idle() { Preconditions.checkState(idleServerTasks == null); idleServerTasks = new IdleServerTasks(workspace); @@ -702,7 +647,7 @@ public class GrpcServerImpl implements RPCServer { } logger.info("About to shutdown due to idleness"); - signalShutdown(); + server.shutdown(); } /** @@ -728,7 +673,7 @@ public class GrpcServerImpl implements RPCServer { @Override public void prepareForAbruptShutdown() { disableShutdownHooks(); - signalShutdown(); + pidFileWatcherThread.signalShutdown(); } @Override @@ -775,7 +720,7 @@ public class GrpcServerImpl implements RPCServer { timeoutThread.start(); } serving = true; - this.address = new InetSocketAddress(address.getAddress(), server.getPort()); + writeServerFile( PORT_FILE, InetAddresses.toUriString(address.getAddress()) + ":" + server.getPort()); writeServerFile(REQUEST_COOKIE_FILE, requestCookie); @@ -817,7 +762,9 @@ public class GrpcServerImpl implements RPCServer { } } - /** Schedule the specified file for (attempted) deletion at JVM exit. */ + /** + * Schedule the specified file for (attempted) deletion at JVM exit. + */ protected void deleteAtExit(final Path path) { synchronized (filesToDeleteAtExit) { filesToDeleteAtExit.add(path); @@ -837,8 +784,8 @@ public class GrpcServerImpl implements RPCServer { logger.severe(err.toString()); } - @VisibleForTesting // productionVisibility = Visibility.PRIVATE - void executeCommand(RunRequest request, StreamObserver observer, GrpcSink sink) { + private void executeCommand( + RunRequest request, StreamObserver observer, GrpcSink sink) { sink.setCommandThread(Thread.currentThread()); if (!request.getCookie().equals(requestCookie) || request.getClientDescription().isEmpty()) { @@ -934,7 +881,7 @@ public class GrpcServerImpl implements RPCServer { boolean shutdown = commandExecutor.shutdown(); if (shutdown) { - signalShutdown(); + server.shutdown(); } RunResponse response = RunResponse.newBuilder() @@ -981,8 +928,6 @@ public class GrpcServerImpl implements RPCServer { streamObserver.onNext(response.build()); streamObserver.onCompleted(); - } catch (InterruptedException e) { - // Ignore, we are shutting down anyway } } @@ -1025,8 +970,6 @@ public class GrpcServerImpl implements RPCServer { logger.info( "Client cancelled RPC of cancellation request for " + request.getCommandId()); } - } catch (InterruptedException e) { - // Ignore, we are shutting down anyway } } }; diff --git a/src/main/java/com/google/devtools/build/lib/server/IdleServerTasks.java b/src/main/java/com/google/devtools/build/lib/server/IdleServerTasks.java index 6562ca6734..64d4502e61 100644 --- a/src/main/java/com/google/devtools/build/lib/server/IdleServerTasks.java +++ b/src/main/java/com/google/devtools/build/lib/server/IdleServerTasks.java @@ -15,7 +15,6 @@ package com.google.devtools.build.lib.server; import com.google.common.base.Preconditions; -import com.google.devtools.build.lib.clock.BlazeClock; import com.google.devtools.build.lib.profiler.AutoProfiler; import com.google.devtools.build.lib.unix.ProcMeminfoParser; import com.google.devtools.build.lib.util.LoggingUtil; @@ -36,12 +35,11 @@ import javax.annotation.Nullable; */ class IdleServerTasks { - private long idleStart; private final Path workspaceDir; private final ScheduledThreadPoolExecutor executor; private static final Logger logger = Logger.getLogger(IdleServerTasks.class.getName()); - private static final long FIVE_MIN_NANOS = 1000L * 1000 * 1000 * 60 * 5; + private static final long FIVE_MIN_MILLIS = 1000 * 60 * 5; /** * Must be called from the main thread. @@ -58,7 +56,6 @@ class IdleServerTasks { public void idle() { Preconditions.checkState(!executor.isShutdown()); - idleStart = BlazeClock.nanoTime(); // Do a GC cycle while the server is idle. @SuppressWarnings("unused") Future possiblyIgnoredError = @@ -102,11 +99,11 @@ class IdleServerTasks { } /** - * Return true iff the server should continue processing requests. Called from the main thread, so - * it should return quickly. + * Return true iff the server should continue processing requests. + * Called from the main thread, so it should return quickly. */ - public boolean continueProcessing() { - if (!memoryHeuristic()) { + public boolean continueProcessing(long idleMillis) { + if (!memoryHeuristic(idleMillis)) { return false; } if (workspaceDir == null) { @@ -124,10 +121,8 @@ class IdleServerTasks { return stat != null && stat.isDirectory(); } - private boolean memoryHeuristic() { - Preconditions.checkState(!executor.isShutdown()); - long idleNanos = BlazeClock.nanoTime() - idleStart; - if (idleNanos < FIVE_MIN_NANOS) { + private boolean memoryHeuristic(long idleMillis) { + if (idleMillis < FIVE_MIN_MILLIS) { // Don't check memory health until after five minutes. return true; } -- cgit v1.2.3