diff options
author | 2018-07-10 14:13:50 -0700 | |
---|---|---|
committer | 2018-07-10 14:15:14 -0700 | |
commit | f600b693642b3ee2475c6222a0eebb25371c6ca6 (patch) | |
tree | ab5020d726bc4fc4e032bad281727f251e53ad1c /src/main/java/com | |
parent | bd4c5f92d582bdb4f4faa26a920c2e484e726ca4 (diff) |
Retry ensureInputsPresent/execute/download
This observably removes any ill effect of CAS transience.
Closes #5229.
PiperOrigin-RevId: 204010317
Diffstat (limited to 'src/main/java/com')
4 files changed, 109 insertions, 28 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteActionContextProvider.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteActionContextProvider.java index e86b41ec6f..145ae9bd8e 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteActionContextProvider.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteActionContextProvider.java @@ -40,6 +40,7 @@ final class RemoteActionContextProvider extends ActionContextProvider { private final CommandEnvironment env; @Nullable private final AbstractRemoteActionCache cache; @Nullable private final GrpcRemoteExecutor executor; + private final RemoteRetrier retrier; private final DigestUtil digestUtil; private final Path logDir; @@ -47,11 +48,13 @@ final class RemoteActionContextProvider extends ActionContextProvider { CommandEnvironment env, @Nullable AbstractRemoteActionCache cache, @Nullable GrpcRemoteExecutor executor, + RemoteRetrier retrier, DigestUtil digestUtil, Path logDir) { this.env = env; this.executor = executor; this.cache = cache; + this.retrier = retrier; this.digestUtil = digestUtil; this.logDir = logDir; } @@ -88,6 +91,7 @@ final class RemoteActionContextProvider extends ActionContextProvider { commandId, cache, executor, + retrier, digestUtil, logDir); return ImmutableList.of(new RemoteSpawnStrategy(env.getExecRoot(), spawnRunner)); diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java index 58a62480d9..a12ee300a4 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java @@ -28,6 +28,7 @@ import com.google.devtools.build.lib.buildeventstream.PathConverter; import com.google.devtools.build.lib.buildtool.BuildRequest; import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.exec.ExecutorBuilder; +import com.google.devtools.build.lib.remote.Retrier.RetryException; import com.google.devtools.build.lib.remote.logging.LoggingInterceptor; import com.google.devtools.build.lib.remote.util.DigestUtil; import com.google.devtools.build.lib.runtime.BlazeModule; @@ -43,11 +44,18 @@ import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.common.options.OptionsBase; import com.google.devtools.common.options.OptionsProvider; import com.google.devtools.remoteexecution.v1test.Digest; +import com.google.protobuf.Any; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.rpc.PreconditionFailure; +import com.google.rpc.PreconditionFailure.Violation; import io.grpc.Channel; import io.grpc.ClientInterceptors; +import io.grpc.Status.Code; +import io.grpc.protobuf.StatusProto; import java.io.IOException; import java.util.Map; import java.util.concurrent.Executors; +import java.util.function.Predicate; import java.util.logging.Logger; /** RemoteModule provides distributed cache and remote execution for Bazel. */ @@ -114,6 +122,40 @@ public final class RemoteModule extends BlazeModule { "remote"); } + private static final String VIOLATION_TYPE_MISSING = "MISSING"; + + private static final Predicate<? super Exception> RETRIABLE_EXEC_ERRORS = + e -> { + if (e instanceof CacheNotFoundException || e.getCause() instanceof CacheNotFoundException) { + return true; + } + if (!(e instanceof RetryException) + || !RemoteRetrierUtils.causedByStatus((RetryException) e, Code.FAILED_PRECONDITION)) { + return false; + } + com.google.rpc.Status status = StatusProto.fromThrowable(e); + if (status == null || status.getDetailsCount() == 0) { + return false; + } + for (Any details : status.getDetailsList()) { + PreconditionFailure f; + try { + f = details.unpack(PreconditionFailure.class); + } catch (InvalidProtocolBufferException protoEx) { + return false; + } + if (f.getViolationsCount() == 0) { + return false; // Generally shouldn't happen + } + for (Violation v : f.getViolationsList()) { + if (!v.getType().equals(VIOLATION_TYPE_MISSING)) { + return false; + } + } + } + return true; // if *all* > 0 violations have type MISSING + }; + @Override public void beforeCommand(CommandEnvironment env) throws AbruptExitException { env.getEventBus().register(this); @@ -167,6 +209,7 @@ public final class RemoteModule extends BlazeModule { logger = new LoggingInterceptor(rpcLogFile, env.getRuntime().getClock()); } + final RemoteRetrier executeRetrier; final AbstractRemoteActionCache cache; if (enableBlobStoreCache) { Retrier retrier = @@ -175,6 +218,7 @@ public final class RemoteModule extends BlazeModule { (e) -> false, retryScheduler, Retrier.ALLOW_ALL_CALLS); + executeRetrier = null; cache = new SimpleBlobStoreActionCache( remoteOptions, @@ -197,6 +241,7 @@ public final class RemoteModule extends BlazeModule { RemoteRetrier.RETRIABLE_GRPC_ERRORS, retryScheduler, Retrier.ALLOW_ALL_CALLS); + executeRetrier = createExecuteRetrier(remoteOptions, retryScheduler); cache = new GrpcRemoteCache( ch, @@ -205,6 +250,7 @@ public final class RemoteModule extends BlazeModule { retrier, digestUtil); } else { + executeRetrier = null; cache = null; } @@ -230,7 +276,7 @@ public final class RemoteModule extends BlazeModule { executor = null; } actionContextProvider = - new RemoteActionContextProvider(env, cache, executor, digestUtil, logDir); + new RemoteActionContextProvider(env, cache, executor, executeRetrier, digestUtil, logDir); } catch (IOException e) { env.getReporter().handle(Event.error(e.getMessage())); env.getBlazeModuleEnvironment() @@ -272,4 +318,15 @@ public final class RemoteModule extends BlazeModule { return SimpleBlobStoreFactory.isRemoteCacheOptions(options) || GrpcRemoteCache.isRemoteCacheOptions(options); } + + public static RemoteRetrier createExecuteRetrier( + RemoteOptions options, ListeningScheduledExecutorService retryService) { + return new RemoteRetrier( + options.experimentalRemoteRetry + ? () -> new Retrier.ZeroBackoff(options.experimentalRemoteRetryMaxAttempts) + : () -> Retrier.RETRIES_DISABLED, + RemoteModule.RETRIABLE_EXEC_ERRORS, + retryService, + Retrier.ALLOW_ALL_CALLS); + } } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index 1198c2f5d8..95511d0599 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -88,6 +88,7 @@ class RemoteSpawnRunner implements SpawnRunner { @Nullable private final Reporter cmdlineReporter; @Nullable private final AbstractRemoteActionCache remoteCache; @Nullable private final GrpcRemoteExecutor remoteExecutor; + @Nullable private final RemoteRetrier retrier; private final String buildRequestId; private final String commandId; private final DigestUtil digestUtil; @@ -107,6 +108,7 @@ class RemoteSpawnRunner implements SpawnRunner { String commandId, @Nullable AbstractRemoteActionCache remoteCache, @Nullable GrpcRemoteExecutor remoteExecutor, + @Nullable RemoteRetrier retrier, DigestUtil digestUtil, Path logDir) { this.execRoot = execRoot; @@ -119,6 +121,7 @@ class RemoteSpawnRunner implements SpawnRunner { this.cmdlineReporter = cmdlineReporter; this.buildRequestId = buildRequestId; this.commandId = commandId; + this.retrier = retrier; this.digestUtil = digestUtil; this.logDir = logDir; } @@ -194,34 +197,26 @@ class RemoteSpawnRunner implements SpawnRunner { return execLocally(spawn, context, inputMap, uploadLocalResults, remoteCache, actionKey); } + ExecuteRequest request = + ExecuteRequest.newBuilder() + .setInstanceName(remoteOptions.remoteInstanceName) + .setAction(action) + .setSkipCacheLookup(!acceptCachedResult) + .build(); try { - // Upload the command and all the inputs into the remote cache. - remoteCache.ensureInputsPresent(repository, execRoot, inputRoot, command); - } catch (IOException e) { - return execLocallyOrFail(spawn, context, inputMap, actionKey, uploadLocalResults, e); - } - - final ActionResult result; - boolean remoteCacheHit = false; - try { - ExecuteRequest.Builder request = - ExecuteRequest.newBuilder() - .setInstanceName(remoteOptions.remoteInstanceName) - .setAction(action) - .setSkipCacheLookup(!acceptCachedResult); - ExecuteResponse reply = remoteExecutor.executeRemotely(request.build()); - maybeDownloadServerLogs(reply, actionKey); - result = reply.getResult(); - remoteCacheHit = reply.getCachedResult(); - } catch (IOException e) { - return execLocallyOrFail(spawn, context, inputMap, actionKey, uploadLocalResults, e); - } - - try { - return downloadRemoteResults(result, context.getFileOutErr()) - .setRunnerName(remoteCacheHit ? "remote cache hit" : getName()) - .setCacheHit(remoteCacheHit) - .build(); + return retrier.execute( + () -> { + // Upload the command and all the inputs into the remote cache. + remoteCache.ensureInputsPresent(repository, execRoot, inputRoot, command); + + ExecuteResponse reply = remoteExecutor.executeRemotely(request); + maybeDownloadServerLogs(reply, actionKey); + + return downloadRemoteResults(reply.getResult(), context.getFileOutErr()) + .setRunnerName(reply.getCachedResult() ? "remote cache hit" : getName()) + .setCacheHit(reply.getCachedResult()) + .build(); + }); } catch (IOException e) { return execLocallyOrFail(spawn, context, inputMap, actionKey, uploadLocalResults, e); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/Retrier.java b/src/main/java/com/google/devtools/build/lib/remote/Retrier.java index a329c0475e..9d7228d051 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/Retrier.java +++ b/src/main/java/com/google/devtools/build/lib/remote/Retrier.java @@ -185,6 +185,31 @@ public class Retrier { } }; + /** No backoff. */ + public static class ZeroBackoff implements Backoff { + + private final int maxRetries; + private int retries; + + public ZeroBackoff(int maxRetries) { + this.maxRetries = maxRetries; + } + + @Override + public long nextDelayMillis() { + if (retries >= maxRetries) { + return -1; + } + retries++; + return 0; + } + + @Override + public int getRetryAttempts() { + return retries; + } + } + private final Supplier<Backoff> backoffSupplier; private final Predicate<? super Exception> shouldRetry; private final CircuitBreaker circuitBreaker; |