aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/buildeventservice
diff options
context:
space:
mode:
authorGravatar buchgr <buchgr@google.com>2017-09-26 10:41:37 +0200
committerGravatar Vladimir Moskva <vladmos@google.com>2017-09-26 12:31:53 +0200
commit926fd297ee08fa18d3206c25f74e70e0156d7aac (patch)
tree30826855154102e8666b872934b98f47d56ac9f2 /src/main/java/com/google/devtools/build/lib/buildeventservice
parent618f9816de7366dc01bca7f39a4622dbbd53fb75 (diff)
Improve BES retries.
- The upload of build events is retried infinitely often as long as between two failures progress was made, that is ACKs were received. - Set the initial backoff after a failure to 0, and every further backoff to 1s. This aligns relatively well with the backoff times gRPC connections are retried [1]. PiperOrigin-RevId: 170022707
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/buildeventservice')
-rw-r--r--src/main/java/com/google/devtools/build/lib/buildeventservice/BuildEventServiceTransport.java80
1 files changed, 46 insertions, 34 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/buildeventservice/BuildEventServiceTransport.java b/src/main/java/com/google/devtools/build/lib/buildeventservice/BuildEventServiceTransport.java
index f453add769..5df6580499 100644
--- a/src/main/java/com/google/devtools/build/lib/buildeventservice/BuildEventServiceTransport.java
+++ b/src/main/java/com/google/devtools/build/lib/buildeventservice/BuildEventServiceTransport.java
@@ -48,6 +48,8 @@ import com.google.devtools.build.lib.events.EventKind;
import com.google.devtools.build.lib.runtime.BlazeModule.ModuleEnvironment;
import com.google.devtools.build.lib.util.AbruptExitException;
import com.google.devtools.build.lib.util.ExitCode;
+import com.google.devtools.build.lib.util.JavaSleeper;
+import com.google.devtools.build.lib.util.Sleeper;
import com.google.devtools.build.v1.BuildStatus.Result;
import com.google.devtools.build.v1.PublishBuildToolEventStreamRequest;
import com.google.devtools.build.v1.PublishBuildToolEventStreamResponse;
@@ -65,7 +67,6 @@ import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
-import java.util.concurrent.locks.LockSupport;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.Nullable;
@@ -90,8 +91,8 @@ public class BuildEventServiceTransport implements BuildEventTransport {
private final BuildEventServiceProtoUtil besProtoUtil;
private final ModuleEnvironment moduleEnvironment;
private final EventHandler commandLineReporter;
-
private final PathConverter pathConverter;
+ private final Sleeper sleeper;
/** Contains all pendingAck events that might be retried in case of failures. */
private ConcurrentLinkedDeque<PublishBuildToolEventStreamRequest> pendingAck;
/** Contains all events should be sent ordered by sequence number. */
@@ -110,6 +111,11 @@ public class BuildEventServiceTransport implements BuildEventTransport {
private volatile Exception lastKnownError;
/** Returns true if we already reported a warning or error to UI. */
private volatile boolean errorsReported;
+ /**
+ * Returns the number of ACKs received since the last time {@link #publishEventStream()} was
+ * retried due to a failure.
+ */
+ private volatile int acksReceivedSinceLastRetry;
public BuildEventServiceTransport(
BuildEventServiceClient besClient,
@@ -124,29 +130,29 @@ public class BuildEventServiceTransport implements BuildEventTransport {
PathConverter pathConverter,
EventHandler commandLineReporter,
@Nullable String projectId) {
- this(
- besClient,
- uploadTimeout,
- bestEffortUpload,
- publishLifecycleEvents,
- moduleEnvironment,
- new BuildEventServiceProtoUtil(buildRequestId, invocationId, projectId, command, clock),
- pathConverter,
- commandLineReporter);
+ this(besClient, uploadTimeout, bestEffortUpload, publishLifecycleEvents, buildRequestId,
+ invocationId, command, moduleEnvironment, clock, pathConverter, commandLineReporter,
+ projectId, new JavaSleeper());
}
@VisibleForTesting
- BuildEventServiceTransport(
+ public BuildEventServiceTransport(
BuildEventServiceClient besClient,
Duration uploadTimeout,
boolean bestEffortUpload,
boolean publishLifecycleEvents,
+ String buildRequestId,
+ String invocationId,
+ String command,
ModuleEnvironment moduleEnvironment,
- BuildEventServiceProtoUtil besProtoUtil,
+ Clock clock,
PathConverter pathConverter,
- EventHandler commandLineReporter) {
+ EventHandler commandLineReporter,
+ @Nullable String projectId,
+ Sleeper sleeper) {
this.besClient = besClient;
- this.besProtoUtil = besProtoUtil;
+ this.besProtoUtil =
+ new BuildEventServiceProtoUtil(buildRequestId, invocationId, projectId, command, clock);
this.publishLifecycleEvents = publishLifecycleEvents;
this.moduleEnvironment = moduleEnvironment;
this.commandLineReporter = commandLineReporter;
@@ -162,6 +168,7 @@ public class BuildEventServiceTransport implements BuildEventTransport {
this.invocationResult = UNKNOWN_STATUS;
this.uploadTimeout = uploadTimeout;
this.bestEffortUpload = bestEffortUpload;
+ this.sleeper = sleeper;
}
@Override
@@ -449,7 +456,7 @@ public class BuildEventServiceTransport implements BuildEventTransport {
}
/** Method responsible for a single Streaming RPC. */
- private static void publishEventStream(
+ private void publishEventStream(
final ConcurrentLinkedDeque<PublishBuildToolEventStreamRequest> pendingAck,
final BlockingDeque<PublishBuildToolEventStreamRequest> pendingSend,
final BuildEventServiceClient besClient)
@@ -502,7 +509,8 @@ public class BuildEventServiceTransport implements BuildEventTransport {
&& event.getOrderedBuildEvent().getEvent().getEventCase() == COMPONENT_STREAM_FINISHED;
}
- private static Function<PublishBuildToolEventStreamResponse, Void> ackCallback(
+ @SuppressWarnings("NonAtomicVolatileUpdate")
+ private Function<PublishBuildToolEventStreamResponse, Void> ackCallback(
final Deque<PublishBuildToolEventStreamRequest> pendingAck,
final BuildEventServiceClient besClient) {
return ack -> {
@@ -522,43 +530,47 @@ public class BuildEventServiceTransport implements BuildEventTransport {
logger.log(Level.INFO, "Last ACK received.");
besClient.closeStream();
}
+ acksReceivedSinceLastRetry++;
return null;
};
}
- private void retryOnException(Callable<?> c) throws Exception {
- retryOnException(c, 3, 100);
- }
-
/**
* Executes a {@link Callable} retrying on exception thrown.
*/
// TODO(eduardocolaco): Implement transient/persistent failures
- private void retryOnException(Callable<?> c, final int maxRetries, final long initalDelayMillis)
- throws Exception {
+ private void retryOnException(Callable<?> c) throws Exception {
+ final int maxRetries = 5;
+ final long initialDelayMillis = 0;
+ final long delayMillis = 1000;
+
int tries = 0;
while (tries <= maxRetries) {
try {
+ acksReceivedSinceLastRetry = 0;
c.call();
lastKnownError = null;
return;
} catch (InterruptedException e) {
throw e;
} catch (Exception e) {
+ if (acksReceivedSinceLastRetry > 0) {
+ logger.fine(String.format("ACKs received since last retry %d.",
+ acksReceivedSinceLastRetry));
+ tries = 0;
+ }
tries++;
lastKnownError = e;
- /*
- * Exponential backoff:
- * Retry 1: initalDelayMillis * 2^0
- * Retry 2: initalDelayMillis * 2^1
- * Retry 3: initalDelayMillis * 2^2
- * ...
- */
- long sleepMillis = initalDelayMillis << (tries - 1);
- String message = String.format("Retrying RPC to BES. Attempt %s. Backoff %s ms.",
- tries, sleepMillis);
+ long sleepMillis;
+ if (tries == 1) {
+ sleepMillis = initialDelayMillis;
+ } else {
+ // This roughly matches the gRPC connection backoff.
+ sleepMillis = (long) (delayMillis * Math.pow(1.6, tries));
+ }
+ String message = String.format("Retrying RPC to BES. Backoff %s ms.", sleepMillis);
logger.log(Level.INFO, message, lastKnownError);
- LockSupport.parkNanos(TimeUnit.MILLISECONDS.toNanos(sleepMillis));
+ sleeper.sleepMillis(sleepMillis);
}
}
Preconditions.checkNotNull(lastKnownError);