aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/main/java/com/google/devtools/build/lib/remote/ExecutionStatusException.java61
-rw-r--r--src/main/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutor.java16
-rw-r--r--src/main/java/com/google/devtools/build/lib/remote/RemoteRetrierUtils.java5
-rw-r--r--src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java36
-rw-r--r--src/main/java/com/google/devtools/build/lib/remote/TimeoutException.java22
-rw-r--r--src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java31
-rwxr-xr-xsrc/test/shell/bazel/remote_execution_test.sh10
-rw-r--r--src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java11
-rw-r--r--src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/WatcherServer.java91
9 files changed, 188 insertions, 95 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/remote/ExecutionStatusException.java b/src/main/java/com/google/devtools/build/lib/remote/ExecutionStatusException.java
new file mode 100644
index 0000000000..63839a7ae8
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/remote/ExecutionStatusException.java
@@ -0,0 +1,61 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.remote;
+
+import com.google.devtools.remoteexecution.v1test.ExecuteResponse;
+import com.google.rpc.Status;
+import io.grpc.Status.Code;
+import io.grpc.StatusRuntimeException;
+import javax.annotation.Nullable;
+
+/**
+ * Exception to signal that a remote execution has failed with a certain status received from the
+ * server, and other details, such as the action result and the server logs. The exception may be
+ * retriable or not, depending on the status/details.
+ */
+public class ExecutionStatusException extends StatusRuntimeException {
+ private final Status status;
+ private final ExecuteResponse response;
+
+ public ExecutionStatusException(Status status, @Nullable ExecuteResponse response) {
+ super(convertStatus(status, response));
+ this.status = status;
+ this.response = response;
+ }
+
+ private static io.grpc.Status convertStatus(Status status, @Nullable ExecuteResponse response) {
+ io.grpc.Status result =
+ io.grpc.Status.fromCodeValue(
+ // Hack: convert to non-retriable exception on timeouts.
+ isExecutionTimeout(status, response)
+ ? Code.FAILED_PRECONDITION.value()
+ : status.getCode());
+ return result.withDescription(status.getMessage());
+ }
+
+ private static boolean isExecutionTimeout(Status status, @Nullable ExecuteResponse response) {
+ return response != null
+ && response.getStatus().equals(status)
+ && status.getCode() == Code.DEADLINE_EXCEEDED.value();
+ }
+
+ public boolean isExecutionTimeout() {
+ return isExecutionTimeout(status, response);
+ }
+
+ @Nullable
+ public ExecuteResponse getResponse() {
+ return response;
+ }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutor.java b/src/main/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutor.java
index 3b91174a06..fb8cf0d9c4 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutor.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutor.java
@@ -73,29 +73,23 @@ class GrpcRemoteExecutor {
.withCallCredentials(callCredentials);
}
- private void handleStatus(Status statusProto) throws IOException {
- StatusRuntimeException e = StatusProto.toStatusRuntimeException(statusProto);
- if (e.getStatus().getCode() == Code.OK) {
+ private void handleStatus(Status statusProto, @Nullable ExecuteResponse resp) throws IOException {
+ if (statusProto.getCode() == Code.OK.value()) {
return;
}
- if (e.getStatus().getCode() == Code.DEADLINE_EXCEEDED) {
- // This was caused by the command itself exceeding the timeout,
- // therefore it is not retriable.
- throw new TimeoutException();
- }
- throw e;
+ throw new ExecutionStatusException(statusProto, resp);
}
private @Nullable ExecuteResponse getOperationResponse(Operation op) throws IOException {
if (op.getResultCase() == Operation.ResultCase.ERROR) {
- handleStatus(op.getError());
+ handleStatus(op.getError(), null);
}
if (op.getDone()) {
Preconditions.checkState(op.getResultCase() != Operation.ResultCase.RESULT_NOT_SET);
try {
ExecuteResponse resp = op.getResponse().unpack(ExecuteResponse.class);
if (resp.hasStatus()) {
- handleStatus(resp.getStatus());
+ handleStatus(resp.getStatus(), resp);
}
return resp;
} catch (InvalidProtocolBufferException e) {
diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrierUtils.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrierUtils.java
index f518b43682..70c73fbcdf 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrierUtils.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteRetrierUtils.java
@@ -30,4 +30,9 @@ public final class RemoteRetrierUtils {
}
return false;
}
+
+ public static boolean causedByExecTimeout(RetryException e) {
+ return (e.getCause() instanceof ExecutionStatusException
+ && ((ExecutionStatusException) e.getCause()).isExecutionTimeout());
+ }
}
diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java
index 1b7a0809d6..289fe8b4a9 100644
--- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java
+++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java
@@ -55,8 +55,6 @@ import com.google.protobuf.TextFormat.ParseException;
import io.grpc.Context;
import io.grpc.Status.Code;
import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
@@ -239,27 +237,33 @@ class RemoteSpawnRunner implements SpawnRunner {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException();
}
- if (options.remoteLocalFallback && !(cause instanceof TimeoutException)) {
+ if (options.remoteLocalFallback
+ && !(cause instanceof RetryException
+ && RemoteRetrierUtils.causedByExecTimeout((RetryException) cause))) {
return execLocally(spawn, policy, inputMap, uploadLocalResults, remoteCache, actionKey);
}
return handleError(cause, policy.getFileOutErr());
}
- private SpawnResult handleError(IOException exception, FileOutErr outErr) throws IOException,
- ExecException {
+ private SpawnResult handleError(IOException exception, FileOutErr outErr)
+ throws ExecException, InterruptedException, IOException {
final Throwable cause = exception.getCause();
- if (exception instanceof TimeoutException || cause instanceof TimeoutException) {
- // TODO(buchgr): provide stdout/stderr from the action that timed out.
- // Remove the unsuported message once remote execution tests no longer check for it.
- try (OutputStream out = outErr.getOutputStream()) {
- String msg = "Log output for timeouts is not yet supported in remote execution.\n";
- out.write(msg.getBytes(StandardCharsets.UTF_8));
+ if (cause instanceof ExecutionStatusException) {
+ ExecutionStatusException e = (ExecutionStatusException) cause;
+ if (e.getResponse() != null) {
+ ExecuteResponse resp = e.getResponse();
+ if (resp.hasResult()) {
+ // We try to download all (partial) results even on server error, for debuggability.
+ remoteCache.download(resp.getResult(), execRoot, outErr);
+ }
+ }
+ if (e.isExecutionTimeout()) {
+ return new SpawnResult.Builder()
+ .setRunnerName(getName())
+ .setStatus(Status.TIMEOUT)
+ .setExitCode(POSIX_TIMEOUT_EXIT_CODE)
+ .build();
}
- return new SpawnResult.Builder()
- .setRunnerName(getName())
- .setStatus(Status.TIMEOUT)
- .setExitCode(POSIX_TIMEOUT_EXIT_CODE)
- .build();
}
final Status status;
if (exception instanceof RetryException
diff --git a/src/main/java/com/google/devtools/build/lib/remote/TimeoutException.java b/src/main/java/com/google/devtools/build/lib/remote/TimeoutException.java
deleted file mode 100644
index f1ba33a8dc..0000000000
--- a/src/main/java/com/google/devtools/build/lib/remote/TimeoutException.java
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2017 The Bazel Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-package com.google.devtools.build.lib.remote;
-
-import java.io.IOException;
-
-/**
- * Exception to signal that a remote execution has timed out.
- */
-class TimeoutException extends IOException {
-}
diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java
index 150e759718..98b56706d7 100644
--- a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java
+++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java
@@ -63,6 +63,7 @@ import com.google.devtools.common.options.Options;
import com.google.devtools.remoteexecution.v1test.ActionResult;
import com.google.devtools.remoteexecution.v1test.ExecuteRequest;
import com.google.devtools.remoteexecution.v1test.ExecuteResponse;
+import com.google.rpc.Code;
import java.io.IOException;
import java.time.Duration;
import java.util.Collection;
@@ -536,7 +537,18 @@ public class RemoteSpawnRunnerTest {
ActionResult cachedResult = ActionResult.newBuilder().setExitCode(0).build();
when(cache.getCachedActionResult(any(ActionKey.class))).thenReturn(null);
- when(executor.executeRemotely(any(ExecuteRequest.class))).thenThrow(new TimeoutException());
+ ExecuteResponse resp =
+ ExecuteResponse.newBuilder()
+ .setResult(cachedResult)
+ .setStatus(
+ com.google.rpc.Status.newBuilder()
+ .setCode(Code.DEADLINE_EXCEEDED.getNumber())
+ .build())
+ .build();
+ when(executor.executeRemotely(any(ExecuteRequest.class)))
+ .thenThrow(
+ new Retrier.RetryException(
+ "", 1, new ExecutionStatusException(resp.getStatus(), resp)));
Spawn spawn = newSimpleSpawn();
@@ -546,7 +558,7 @@ public class RemoteSpawnRunnerTest {
assertThat(res.status()).isEqualTo(Status.TIMEOUT);
verify(executor).executeRemotely(any(ExecuteRequest.class));
- verify(cache, never()).download(eq(cachedResult), eq(execRoot), any(FileOutErr.class));
+ verify(cache).download(eq(cachedResult), eq(execRoot), any(FileOutErr.class));
}
@Test
@@ -572,7 +584,18 @@ public class RemoteSpawnRunnerTest {
ActionResult cachedResult = ActionResult.newBuilder().setExitCode(0).build();
when(cache.getCachedActionResult(any(ActionKey.class))).thenReturn(null);
- when(executor.executeRemotely(any(ExecuteRequest.class))).thenThrow(new TimeoutException());
+ ExecuteResponse resp =
+ ExecuteResponse.newBuilder()
+ .setResult(cachedResult)
+ .setStatus(
+ com.google.rpc.Status.newBuilder()
+ .setCode(Code.DEADLINE_EXCEEDED.getNumber())
+ .build())
+ .build();
+ when(executor.executeRemotely(any(ExecuteRequest.class)))
+ .thenThrow(
+ new Retrier.RetryException(
+ "", 1, new ExecutionStatusException(resp.getStatus(), resp)));
Spawn spawn = newSimpleSpawn();
@@ -582,7 +605,7 @@ public class RemoteSpawnRunnerTest {
assertThat(res.status()).isEqualTo(Status.TIMEOUT);
verify(executor).executeRemotely(any(ExecuteRequest.class));
- verify(cache, never()).download(eq(cachedResult), eq(execRoot), any(FileOutErr.class));
+ verify(cache).download(eq(cachedResult), eq(execRoot), any(FileOutErr.class));
verify(localRunner, never()).exec(eq(spawn), eq(policy));
}
diff --git a/src/test/shell/bazel/remote_execution_test.sh b/src/test/shell/bazel/remote_execution_test.sh
index dff25048a1..2b800b3d54 100755
--- a/src/test/shell/bazel/remote_execution_test.sh
+++ b/src/test/shell/bazel/remote_execution_test.sh
@@ -418,7 +418,11 @@ EOF
cat > a/sleep.sh <<'EOF'
#!/bin/sh
-sleep 2
+for i in {1..3}
+do
+ echo "Sleeping $i..."
+ sleep 1
+done
EOF
chmod +x a/sleep.sh
bazel test \
@@ -429,6 +433,10 @@ EOF
//a:sleep >& $TEST_log \
&& fail "Test failure (timeout) expected" || true
expect_log "TIMEOUT"
+ expect_log "Sleeping 1..."
+ # The current implementation of the remote worker does not terminate actions
+ # when they time out, therefore we cannot verify that:
+ # expect_not_log "Sleeping 3..."
}
function test_passed_env_user() {
diff --git a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java
index 6202d3deda..098b4adf4b 100644
--- a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java
+++ b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/ExecutionServer.java
@@ -25,6 +25,7 @@ import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.devtools.build.lib.remote.CacheNotFoundException;
+import com.google.devtools.build.lib.remote.ExecutionStatusException;
import com.google.devtools.build.lib.remote.SimpleBlobStoreActionCache;
import com.google.devtools.build.lib.remote.util.DigestUtil;
import com.google.devtools.build.lib.remote.util.DigestUtil.ActionKey;
@@ -40,6 +41,7 @@ import com.google.devtools.remoteexecution.v1test.Action;
import com.google.devtools.remoteexecution.v1test.ActionResult;
import com.google.devtools.remoteexecution.v1test.Command.EnvironmentVariable;
import com.google.devtools.remoteexecution.v1test.ExecuteRequest;
+import com.google.devtools.remoteexecution.v1test.ExecuteResponse;
import com.google.devtools.remoteexecution.v1test.ExecutionGrpc.ExecutionImplBase;
import com.google.devtools.remoteexecution.v1test.Platform;
import com.google.devtools.remoteexecution.v1test.RequestMetadata;
@@ -49,7 +51,6 @@ import com.google.rpc.Code;
import com.google.rpc.Status;
import io.grpc.Context;
import io.grpc.StatusException;
-import io.grpc.protobuf.StatusProto;
import io.grpc.stub.StreamObserver;
import java.io.ByteArrayOutputStream;
import java.io.File;
@@ -251,17 +252,19 @@ final class ExecutionServer extends ExecutionImplBase {
(cmdResult != null && cmdResult.getTerminationStatus().timedOut())
|| wasTimeout(timeoutMillis, System.currentTimeMillis() - startTime);
final int exitCode;
+ ExecuteResponse.Builder resp = ExecuteResponse.newBuilder();
if (wasTimeout) {
final String errMessage =
String.format(
"Command:\n%s\nexceeded deadline of %f seconds.",
Arrays.toString(command.getArgumentsList().toArray()), timeoutMillis / 1000.0);
logger.warning(errMessage);
- throw StatusProto.toStatusException(
+ resp.setStatus(
Status.newBuilder()
.setCode(Code.DEADLINE_EXCEEDED.getNumber())
.setMessage(errMessage)
.build());
+ exitCode = LOCAL_EXEC_ERROR;
} else if (cmdResult == null) {
exitCode = LOCAL_EXEC_ERROR;
} else {
@@ -278,6 +281,10 @@ final class ExecutionServer extends ExecutionImplBase {
ActionKey actionKey = digestUtil.computeActionKey(action);
cache.setCachedActionResult(actionKey, finalResult);
}
+ resp.setResult(finalResult);
+ if (wasTimeout) {
+ throw new ExecutionStatusException(resp.getStatus(), resp.build());
+ }
return finalResult;
}
diff --git a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/WatcherServer.java b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/WatcherServer.java
index 26797dc30c..7d3db46f56 100644
--- a/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/WatcherServer.java
+++ b/src/tools/remote/src/main/java/com/google/devtools/build/remote/worker/WatcherServer.java
@@ -17,6 +17,7 @@ package com.google.devtools.build.remote.worker;
import com.google.common.base.Throwables;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.MoreExecutors;
+import com.google.devtools.build.lib.remote.ExecutionStatusException;
import com.google.devtools.remoteexecution.v1test.ActionResult;
import com.google.devtools.remoteexecution.v1test.ExecuteResponse;
import com.google.longrunning.Operation;
@@ -58,45 +59,57 @@ final class WatcherServer extends WatcherImplBase {
return;
}
- future.addListener(() -> {
- try {
- try {
- ActionResult result = future.get();
- responseObserver.onNext(
- packExists(
- Operation.newBuilder()
- .setName(opName)
- .setDone(true)
- .setResponse(
- Any.pack(ExecuteResponse.newBuilder().setResult(result).build()))));
- responseObserver.onCompleted();
- } catch (ExecutionException e) {
- Throwables.throwIfUnchecked(e.getCause());
- throw (Exception) e.getCause();
- }
- } catch (Exception e) {
- logger.log(Level.SEVERE, "Work failed: " + opName, e);
- responseObserver.onNext(
- ChangeBatch.newBuilder()
- .addChanges(
- Change.newBuilder()
- .setState(Change.State.EXISTS)
- .setData(
- Any.pack(
- Operation.newBuilder()
- .setName(opName)
- .setError(StatusUtils.internalErrorStatus(e))
- .build()))
- .build())
- .build());
- responseObserver.onCompleted();
- if (e instanceof InterruptedException) {
- Thread.currentThread().interrupt();
- }
- } finally {
- operationsCache.remove(opName);
- }
- }, MoreExecutors.directExecutor());
+ future.addListener(
+ () -> {
+ try {
+ try {
+ ActionResult result = future.get();
+ responseObserver.onNext(
+ packExists(
+ Operation.newBuilder()
+ .setName(opName)
+ .setDone(true)
+ .setResponse(
+ Any.pack(ExecuteResponse.newBuilder().setResult(result).build()))));
+ responseObserver.onCompleted();
+ } catch (ExecutionException e) {
+ Throwables.throwIfUnchecked(e.getCause());
+ throw (Exception) e.getCause();
+ }
+ } catch (Exception e) {
+ ExecuteResponse resp;
+ if (e instanceof ExecutionStatusException) {
+ resp = ((ExecutionStatusException) e).getResponse();
+ } else {
+ logger.log(Level.SEVERE, "Work failed: " + opName, e);
+ resp =
+ ExecuteResponse.newBuilder()
+ .setStatus(StatusUtils.internalErrorStatus(e))
+ .build();
+ }
+ responseObserver.onNext(
+ ChangeBatch.newBuilder()
+ .addChanges(
+ Change.newBuilder()
+ .setState(Change.State.EXISTS)
+ .setData(
+ Any.pack(
+ Operation.newBuilder()
+ .setName(opName)
+ .setDone(true)
+ .setResponse(Any.pack(resp))
+ .build()))
+ .build())
+ .build());
+ responseObserver.onCompleted();
+ if (e instanceof InterruptedException) {
+ Thread.currentThread().interrupt();
+ }
+ } finally {
+ operationsCache.remove(opName);
+ }
+ },
+ MoreExecutors.directExecutor());
}
/** Constructs a ChangeBatch with an exists state change that contains the given operation. */