aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com
diff options
context:
space:
mode:
authorGravatar George Gensure <ggensure@uber.com>2018-07-10 14:13:50 -0700
committerGravatar Copybara-Service <copybara-piper@google.com>2018-07-10 14:15:14 -0700
commitf600b693642b3ee2475c6222a0eebb25371c6ca6 (patch)
treeab5020d726bc4fc4e032bad281727f251e53ad1c /src/main/java/com
parentbd4c5f92d582bdb4f4faa26a920c2e484e726ca4 (diff)
Retry ensureInputsPresent/execute/download
This observably removes any ill effect of CAS transience. Closes #5229. PiperOrigin-RevId: 204010317
Diffstat (limited to 'src/main/java/com')
-rw-r--r--src/main/java/com/google/devtools/build/lib/remote/RemoteActionContextProvider.java4
-rw-r--r--src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java59
-rw-r--r--src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java49
-rw-r--r--src/main/java/com/google/devtools/build/lib/remote/Retrier.java25
4 files changed, 109 insertions, 28 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteActionContextProvider.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteActionContextProvider.java
index e86b41ec6f..145ae9bd8e 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/RemoteActionContextProvider.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteActionContextProvider.java
@@ -40,6 +40,7 @@ final class RemoteActionContextProvider extends ActionContextProvider {
private final CommandEnvironment env;
@Nullable private final AbstractRemoteActionCache cache;
@Nullable private final GrpcRemoteExecutor executor;
+ private final RemoteRetrier retrier;
private final DigestUtil digestUtil;
private final Path logDir;
@@ -47,11 +48,13 @@ final class RemoteActionContextProvider extends ActionContextProvider {
CommandEnvironment env,
@Nullable AbstractRemoteActionCache cache,
@Nullable GrpcRemoteExecutor executor,
+ RemoteRetrier retrier,
DigestUtil digestUtil,
Path logDir) {
this.env = env;
this.executor = executor;
this.cache = cache;
+ this.retrier = retrier;
this.digestUtil = digestUtil;
this.logDir = logDir;
}
@@ -88,6 +91,7 @@ final class RemoteActionContextProvider extends ActionContextProvider {
commandId,
cache,
executor,
+ retrier,
digestUtil,
logDir);
return ImmutableList.of(new RemoteSpawnStrategy(env.getExecRoot(), spawnRunner));
diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java
index 58a62480d9..a12ee300a4 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java
@@ -28,6 +28,7 @@ import com.google.devtools.build.lib.buildeventstream.PathConverter;
import com.google.devtools.build.lib.buildtool.BuildRequest;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.exec.ExecutorBuilder;
+import com.google.devtools.build.lib.remote.Retrier.RetryException;
import com.google.devtools.build.lib.remote.logging.LoggingInterceptor;
import com.google.devtools.build.lib.remote.util.DigestUtil;
import com.google.devtools.build.lib.runtime.BlazeModule;
@@ -43,11 +44,18 @@ import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.common.options.OptionsBase;
import com.google.devtools.common.options.OptionsProvider;
import com.google.devtools.remoteexecution.v1test.Digest;
+import com.google.protobuf.Any;
+import com.google.protobuf.InvalidProtocolBufferException;
+import com.google.rpc.PreconditionFailure;
+import com.google.rpc.PreconditionFailure.Violation;
import io.grpc.Channel;
import io.grpc.ClientInterceptors;
+import io.grpc.Status.Code;
+import io.grpc.protobuf.StatusProto;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.Executors;
+import java.util.function.Predicate;
import java.util.logging.Logger;
/** RemoteModule provides distributed cache and remote execution for Bazel. */
@@ -114,6 +122,40 @@ public final class RemoteModule extends BlazeModule {
"remote");
}
+ private static final String VIOLATION_TYPE_MISSING = "MISSING";
+
+ private static final Predicate<? super Exception> RETRIABLE_EXEC_ERRORS =
+ e -> {
+ if (e instanceof CacheNotFoundException || e.getCause() instanceof CacheNotFoundException) {
+ return true;
+ }
+ if (!(e instanceof RetryException)
+ || !RemoteRetrierUtils.causedByStatus((RetryException) e, Code.FAILED_PRECONDITION)) {
+ return false;
+ }
+ com.google.rpc.Status status = StatusProto.fromThrowable(e);
+ if (status == null || status.getDetailsCount() == 0) {
+ return false;
+ }
+ for (Any details : status.getDetailsList()) {
+ PreconditionFailure f;
+ try {
+ f = details.unpack(PreconditionFailure.class);
+ } catch (InvalidProtocolBufferException protoEx) {
+ return false;
+ }
+ if (f.getViolationsCount() == 0) {
+ return false; // Generally shouldn't happen
+ }
+ for (Violation v : f.getViolationsList()) {
+ if (!v.getType().equals(VIOLATION_TYPE_MISSING)) {
+ return false;
+ }
+ }
+ }
+ return true; // if *all* > 0 violations have type MISSING
+ };
+
@Override
public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
env.getEventBus().register(this);
@@ -167,6 +209,7 @@ public final class RemoteModule extends BlazeModule {
logger = new LoggingInterceptor(rpcLogFile, env.getRuntime().getClock());
}
+ final RemoteRetrier executeRetrier;
final AbstractRemoteActionCache cache;
if (enableBlobStoreCache) {
Retrier retrier =
@@ -175,6 +218,7 @@ public final class RemoteModule extends BlazeModule {
(e) -> false,
retryScheduler,
Retrier.ALLOW_ALL_CALLS);
+ executeRetrier = null;
cache =
new SimpleBlobStoreActionCache(
remoteOptions,
@@ -197,6 +241,7 @@ public final class RemoteModule extends BlazeModule {
RemoteRetrier.RETRIABLE_GRPC_ERRORS,
retryScheduler,
Retrier.ALLOW_ALL_CALLS);
+ executeRetrier = createExecuteRetrier(remoteOptions, retryScheduler);
cache =
new GrpcRemoteCache(
ch,
@@ -205,6 +250,7 @@ public final class RemoteModule extends BlazeModule {
retrier,
digestUtil);
} else {
+ executeRetrier = null;
cache = null;
}
@@ -230,7 +276,7 @@ public final class RemoteModule extends BlazeModule {
executor = null;
}
actionContextProvider =
- new RemoteActionContextProvider(env, cache, executor, digestUtil, logDir);
+ new RemoteActionContextProvider(env, cache, executor, executeRetrier, digestUtil, logDir);
} catch (IOException e) {
env.getReporter().handle(Event.error(e.getMessage()));
env.getBlazeModuleEnvironment()
@@ -272,4 +318,15 @@ public final class RemoteModule extends BlazeModule {
return SimpleBlobStoreFactory.isRemoteCacheOptions(options)
|| GrpcRemoteCache.isRemoteCacheOptions(options);
}
+
+ public static RemoteRetrier createExecuteRetrier(
+ RemoteOptions options, ListeningScheduledExecutorService retryService) {
+ return new RemoteRetrier(
+ options.experimentalRemoteRetry
+ ? () -> new Retrier.ZeroBackoff(options.experimentalRemoteRetryMaxAttempts)
+ : () -> Retrier.RETRIES_DISABLED,
+ RemoteModule.RETRIABLE_EXEC_ERRORS,
+ retryService,
+ Retrier.ALLOW_ALL_CALLS);
+ }
}
diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java
index 1198c2f5d8..95511d0599 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java
@@ -88,6 +88,7 @@ class RemoteSpawnRunner implements SpawnRunner {
@Nullable private final Reporter cmdlineReporter;
@Nullable private final AbstractRemoteActionCache remoteCache;
@Nullable private final GrpcRemoteExecutor remoteExecutor;
+ @Nullable private final RemoteRetrier retrier;
private final String buildRequestId;
private final String commandId;
private final DigestUtil digestUtil;
@@ -107,6 +108,7 @@ class RemoteSpawnRunner implements SpawnRunner {
String commandId,
@Nullable AbstractRemoteActionCache remoteCache,
@Nullable GrpcRemoteExecutor remoteExecutor,
+ @Nullable RemoteRetrier retrier,
DigestUtil digestUtil,
Path logDir) {
this.execRoot = execRoot;
@@ -119,6 +121,7 @@ class RemoteSpawnRunner implements SpawnRunner {
this.cmdlineReporter = cmdlineReporter;
this.buildRequestId = buildRequestId;
this.commandId = commandId;
+ this.retrier = retrier;
this.digestUtil = digestUtil;
this.logDir = logDir;
}
@@ -194,34 +197,26 @@ class RemoteSpawnRunner implements SpawnRunner {
return execLocally(spawn, context, inputMap, uploadLocalResults, remoteCache, actionKey);
}
+ ExecuteRequest request =
+ ExecuteRequest.newBuilder()
+ .setInstanceName(remoteOptions.remoteInstanceName)
+ .setAction(action)
+ .setSkipCacheLookup(!acceptCachedResult)
+ .build();
try {
- // Upload the command and all the inputs into the remote cache.
- remoteCache.ensureInputsPresent(repository, execRoot, inputRoot, command);
- } catch (IOException e) {
- return execLocallyOrFail(spawn, context, inputMap, actionKey, uploadLocalResults, e);
- }
-
- final ActionResult result;
- boolean remoteCacheHit = false;
- try {
- ExecuteRequest.Builder request =
- ExecuteRequest.newBuilder()
- .setInstanceName(remoteOptions.remoteInstanceName)
- .setAction(action)
- .setSkipCacheLookup(!acceptCachedResult);
- ExecuteResponse reply = remoteExecutor.executeRemotely(request.build());
- maybeDownloadServerLogs(reply, actionKey);
- result = reply.getResult();
- remoteCacheHit = reply.getCachedResult();
- } catch (IOException e) {
- return execLocallyOrFail(spawn, context, inputMap, actionKey, uploadLocalResults, e);
- }
-
- try {
- return downloadRemoteResults(result, context.getFileOutErr())
- .setRunnerName(remoteCacheHit ? "remote cache hit" : getName())
- .setCacheHit(remoteCacheHit)
- .build();
+ return retrier.execute(
+ () -> {
+ // Upload the command and all the inputs into the remote cache.
+ remoteCache.ensureInputsPresent(repository, execRoot, inputRoot, command);
+
+ ExecuteResponse reply = remoteExecutor.executeRemotely(request);
+ maybeDownloadServerLogs(reply, actionKey);
+
+ return downloadRemoteResults(reply.getResult(), context.getFileOutErr())
+ .setRunnerName(reply.getCachedResult() ? "remote cache hit" : getName())
+ .setCacheHit(reply.getCachedResult())
+ .build();
+ });
} catch (IOException e) {
return execLocallyOrFail(spawn, context, inputMap, actionKey, uploadLocalResults, e);
}
diff --git a/src/main/java/com/google/devtools/build/lib/remote/Retrier.java b/src/main/java/com/google/devtools/build/lib/remote/Retrier.java
index a329c0475e..9d7228d051 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/Retrier.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/Retrier.java
@@ -185,6 +185,31 @@ public class Retrier {
}
};
+ /** No backoff. */
+ public static class ZeroBackoff implements Backoff {
+
+ private final int maxRetries;
+ private int retries;
+
+ public ZeroBackoff(int maxRetries) {
+ this.maxRetries = maxRetries;
+ }
+
+ @Override
+ public long nextDelayMillis() {
+ if (retries >= maxRetries) {
+ return -1;
+ }
+ retries++;
+ return 0;
+ }
+
+ @Override
+ public int getRetryAttempts() {
+ return retries;
+ }
+ }
+
private final Supplier<Backoff> backoffSupplier;
private final Predicate<? super Exception> shouldRetry;
private final CircuitBreaker circuitBreaker;