diff options
6 files changed, 207 insertions, 46 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/exec/SpawnResult.java b/src/main/java/com/google/devtools/build/lib/exec/SpawnResult.java index 3951e72d05..e6af42fedb 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/SpawnResult.java +++ b/src/main/java/com/google/devtools/build/lib/exec/SpawnResult.java @@ -15,65 +15,187 @@ package com.google.devtools.build.lib.exec; import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; +import javax.annotation.Nullable; /** * The result of a spawn execution. + * + * <p>DO NOT IMPLEMENT THIS INTERFACE! Use {@link SpawnResult.Builder} to create instances instead. + * This is a temporary situation as long as we still have separate internal and external + * implementations - the plan is to merge the two into a single immutable, final class. */ +// TODO(ulfjack): Change this from an interface to an immutable, final class. public interface SpawnResult { + /** The status of the attempted Spawn execution. */ + public enum Status { + /** + * Subprocess executed successfully, but may have returned a non-zero exit code. See + * {@link #exitCode} for the actual exit code. + */ + SUCCESS, + + /** Subprocess execution timed out. */ + TIMEOUT, + + /** + * Subprocess did not execute for an unknown reason - only use this if none of the more specific + * status codes apply. + */ + EXECUTION_FAILED, + + /** The attempted subprocess was disallowed by a user setting. */ + LOCAL_ACTION_NOT_ALLOWED, + + /** The Spawn referred to an non-existent absolute or relative path. */ + COMMAND_NOT_FOUND, + + /** + * One of the Spawn inputs was a directory. For backwards compatibility, some + * {@link SpawnRunner} implementations may attempt to run the subprocess anyway. Note that this + * leads to incremental correctness issues, as Bazel does not track dependencies on directories. + */ + DIRECTORY_AS_INPUT_DISALLOWED, + + /** + * Too many input files - remote execution systems may refuse to execute subprocesses with an + * excessive number of input files. + */ + TOO_MANY_INPUT_FILES, + + /** + * Total size of inputs is too large - remote execution systems may refuse to execute + * subprocesses if the total size of all inputs exceeds a limit. + */ + INPUTS_TOO_LARGE, + + /** + * One of the input files to the Spawn was modified during the build - some {@link SpawnRunner} + * implementations cache checksums and may detect such modifications on a best effort basis. + */ + FILE_MODIFIED_DURING_BUILD, + + /** + * The {@link SpawnRunner} was unable to establish a required network connection. + */ + CONNECTION_FAILED, + + /** + * The remote execution system is overloaded and had to refuse execution for this Spawn. + */ + REMOTE_EXECUTOR_OVERLOADED, + + /** + * The remote execution system did not allow the request due to missing authorization or + * authentication. + */ + NOT_AUTHORIZED, + + /** + * The Spawn was malformed. + */ + INVALID_ARGUMENT; + } + /** - * Returns whether the spawn was actually run, regardless of the exit code. Returns false if there - * were network errors, missing local files, errors setting up sandboxing, etc. + * Returns whether the spawn was actually run, regardless of the exit code. I.e., returns if + * status == SUCCESS || status == TIMEOUT. Returns false if there were errors that prevented the + * spawn from being run, such as network errors, missing local files, errors setting up + * sandboxing, etc. */ boolean setupSuccess(); + /** The status of the attempted Spawn execution. */ + Status status(); + /** - * The exit code of the subprocess. + * The exit code of the subprocess if the subprocess was executed. Check {@link #status} for + * {@link Status#SUCCESS} before calling this method. */ int exitCode(); /** + * The host name of the executor or {@code null}. This information is intended for debugging + * purposes, especially for remote execution systems. Remote caches usually do not store the + * original host name, so this is generally {@code null} for cache hits. + */ + @Nullable String getExecutorHostName(); + + long getWallTimeMillis(); + + /** * Basic implementation of {@link SpawnResult}. */ @Immutable @ThreadSafe public static final class SimpleSpawnResult implements SpawnResult { - private final boolean setupSuccess; private final int exitCode; + private final Status status; + private final String executorHostName; + private final long wallTimeMillis; SimpleSpawnResult(Builder builder) { - this.setupSuccess = builder.setupSuccess; this.exitCode = builder.exitCode; + this.status = builder.status; + this.executorHostName = builder.executorHostName; + this.wallTimeMillis = builder.wallTimeMillis; } @Override public boolean setupSuccess() { - return setupSuccess; + return status == Status.SUCCESS || status == Status.TIMEOUT; } @Override public int exitCode() { return exitCode; } + + @Override + public Status status() { + return status; + } + + @Override + public String getExecutorHostName() { + return executorHostName; + } + + @Override + public long getWallTimeMillis() { + return wallTimeMillis; + } } /** * Builder class for {@link SpawnResult}. */ public static final class Builder { - private boolean setupSuccess; private int exitCode; + private Status status; + private String executorHostName; + private long wallTimeMillis; public SpawnResult build() { return new SimpleSpawnResult(this); } - public Builder setSetupSuccess(boolean setupSuccess) { - this.setupSuccess = setupSuccess; + public Builder setExitCode(int exitCode) { + this.exitCode = exitCode; return this; } - public Builder setExitCode(int exitCode) { - this.exitCode = exitCode; + public Builder setStatus(Status status) { + this.status = status; + return this; + } + + public Builder setExecutorHostname(String executorHostName) { + this.executorHostName = executorHostName; + return this; + } + + public Builder setWallTimeMillis(long wallTimeMillis) { + this.wallTimeMillis = wallTimeMillis; return this; } } -} +}
\ No newline at end of file diff --git a/src/main/java/com/google/devtools/build/lib/exec/SpawnRunner.java b/src/main/java/com/google/devtools/build/lib/exec/SpawnRunner.java index 1f508277cf..da25744ff4 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/SpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/exec/SpawnRunner.java @@ -42,6 +42,7 @@ import java.util.SortedMap; * * <h2>Process</h2> * <ul> + * <li>Implementations MUST be thread-safe. * <li>Implementations MUST ensure that all child processes (including transitive) exit in all * cases, including successful completion, interruption, and timeout * <li>Implementations MUST return the exit code as observed from the subprocess if the subprocess @@ -78,9 +79,44 @@ import java.util.SortedMap; */ public interface SpawnRunner { /** + * Used to report progress on the current spawn. This is mainly used to report the current state + * of the subprocess to the user, but may also be used to trigger parallel execution. For example, + * a dynamic scheduler may use the signal that there was a cache miss to start parallel execution + * of the same Spawn - also see the {@link SpawnRunner} documentation section on "optimistic + * concurrency". + * + * <p>{@link SpawnRunner} implementations should post a progress status before any potentially + * long-running operation. + */ + public enum ProgressStatus { + /** Spawn is waiting for local or remote resources to become available. */ + SCHEDULING, + + /** The {@link SpawnRunner} is looking for a cache hit. */ + CHECKING_CACHE, + + /** + * Resources are acquired, and there was probably no cache hit. This MUST be posted before + * attempting to execute the subprocess. + * + * <p>Caching {@link SpawnRunner} implementations should only post this after a failed cache + * lookup, but may post this if cache lookup and execution happen within the same step, e.g. as + * part of a single RPC call with no mechanism to report cache misses. + */ + EXECUTING, + + /** Downloading outputs from a remote machine. */ + DOWNLOADING; + } + + /** * A helper class to provide additional tools and methods to {@link SpawnRunner} implementations. * * <p>This interface may change without notice. + * + * <p>Implementations must be at least thread-compatible, i.e., they must be safe as long as + * each instance is only used within a single thread. Different instances of the same class may + * be used by different threads, so they MUST not call any shared non-thread-safe objects. */ public interface SpawnExecutionPolicy { /** @@ -90,9 +126,7 @@ public interface SpawnRunner { // TODO(ulfjack): Use an execution info value instead. boolean shouldPrefetchInputsForLocalExecution(Spawn spawn); - /** - * The input file cache for this specific spawn. - */ + /** The input file cache for this specific spawn. */ ActionInputFileCache getActionInputFileCache(); /** @@ -102,17 +136,16 @@ public interface SpawnRunner { */ void lockOutputFiles() throws InterruptedException; - /** - * Returns the timeout that should be applied for the given {@link Spawn} instance. - */ + /** Returns the timeout that should be applied for the given {@link Spawn} instance. */ long getTimeoutMillis(); - /** - * The files to which to write stdout and stderr. - */ + /** The files to which to write stdout and stderr. */ FileOutErr getFileOutErr(); SortedMap<PathFragment, ActionInput> getInputMapping() throws IOException; + + /** Reports a progress update to the Spawn strategy. */ + void report(ProgressStatus state); } /** @@ -130,4 +163,4 @@ public interface SpawnRunner { Spawn spawn, SpawnExecutionPolicy policy) throws InterruptedException, IOException, ExecException; -} +}
\ No newline at end of file diff --git a/src/main/java/com/google/devtools/build/lib/remote/CachedLocalSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/CachedLocalSpawnRunner.java index a4daf8e7eb..3a10aa490d 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/CachedLocalSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/CachedLocalSpawnRunner.java @@ -21,7 +21,9 @@ import com.google.devtools.build.lib.actions.ActionInput; import com.google.devtools.build.lib.actions.ExecException; import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.actions.UserExecException; +import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.exec.SpawnResult; +import com.google.devtools.build.lib.exec.SpawnResult.Status; import com.google.devtools.build.lib.exec.SpawnRunner; import com.google.devtools.build.lib.remote.ContentDigests.ActionKey; import com.google.devtools.build.lib.remote.RemoteProtocol.Action; @@ -47,6 +49,7 @@ import java.util.TreeSet; * A {@link SpawnRunner} implementation that adds a remote cache on top of an underlying local * {@link SpawnRunner} implementation. */ +@ThreadSafe // If RemoteActionCache and SpawnRunner implementations are thread-safe. final class CachedLocalSpawnRunner implements SpawnRunner { private final Path execRoot; private final RemoteOptions options; @@ -113,7 +116,7 @@ final class CachedLocalSpawnRunner implements SpawnRunner { actionCache.downloadAllResults(result, execRoot); passRemoteOutErr(result, policy.getFileOutErr()); return new SpawnResult.Builder() - .setSetupSuccess(true) + .setStatus(Status.SUCCESS) .setExitCode(result.getReturnCode()) .build(); } catch (CacheNotFoundException e) { diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index 51ff22fc67..7484dafae9 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -19,12 +19,12 @@ import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.eventbus.EventBus; import com.google.devtools.build.lib.actions.ActionExecutionMetadata; import com.google.devtools.build.lib.actions.ActionInput; -import com.google.devtools.build.lib.actions.ActionStatusMessage; import com.google.devtools.build.lib.actions.Spawn; +import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.exec.SpawnResult; +import com.google.devtools.build.lib.exec.SpawnResult.Status; import com.google.devtools.build.lib.exec.SpawnRunner; import com.google.devtools.build.lib.remote.ContentDigests.ActionKey; import com.google.devtools.build.lib.remote.RemoteProtocol.Action; @@ -53,8 +53,8 @@ import java.util.TreeSet; /** * A client for the remote execution service. */ +@ThreadSafe final class RemoteSpawnRunner implements SpawnRunner { - private final EventBus eventBus; private final Path execRoot; private final RemoteOptions options; // TODO(olaola): This will be set on a per-action basis instead. @@ -64,11 +64,9 @@ final class RemoteSpawnRunner implements SpawnRunner { RemoteSpawnRunner( Path execRoot, - EventBus eventBus, RemoteOptions options, GrpcRemoteExecutor executor) { this.execRoot = execRoot; - this.eventBus = eventBus; this.options = options; if (options.experimentalRemotePlatformOverride != null) { Platform.Builder platformBuilder = Platform.newBuilder(); @@ -86,9 +84,8 @@ final class RemoteSpawnRunner implements SpawnRunner { RemoteSpawnRunner( Path execRoot, - EventBus eventBus, RemoteOptions options) { - this(execRoot, eventBus, options, connect(options)); + this(execRoot, options, connect(options)); } private static GrpcRemoteExecutor connect(RemoteOptions options) { @@ -108,7 +105,7 @@ final class RemoteSpawnRunner implements SpawnRunner { SpawnExecutionPolicy policy) throws InterruptedException, IOException { ActionExecutionMetadata owner = spawn.getResourceOwner(); if (owner.getOwner() != null) { - eventBus.post(ActionStatusMessage.runningStrategy(owner, "remote")); + policy.report(ProgressStatus.EXECUTING); } try { @@ -147,7 +144,8 @@ final class RemoteSpawnRunner implements SpawnRunner { if (!status.getSucceeded() && (status.getError() != ExecutionStatus.ErrorCode.EXEC_FAILED)) { return new SpawnResult.Builder() - .setSetupSuccess(false) + // TODO(ulfjack): Improve the translation of the error status. + .setStatus(Status.EXECUTION_FAILED) .setExitCode(-1) .build(); } @@ -159,12 +157,10 @@ final class RemoteSpawnRunner implements SpawnRunner { passRemoteOutErr(executor, result, policy.getFileOutErr()); executor.downloadAllResults(result, execRoot); return new SpawnResult.Builder() - .setSetupSuccess(true) + .setStatus(Status.SUCCESS) .setExitCode(result.getReturnCode()) .build(); - } catch (StatusRuntimeException e) { - throw new IOException(e); - } catch (CacheNotFoundException e) { + } catch (StatusRuntimeException | CacheNotFoundException e) { throw new IOException(e); } } diff --git a/src/test/java/com/google/devtools/build/lib/remote/CachedLocalSpawnRunnerTest.java b/src/test/java/com/google/devtools/build/lib/remote/CachedLocalSpawnRunnerTest.java index 74b7bc9ba0..d27e7c06b3 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/CachedLocalSpawnRunnerTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/CachedLocalSpawnRunnerTest.java @@ -30,7 +30,9 @@ import com.google.devtools.build.lib.actions.SimpleSpawn; import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.exec.SpawnInputExpander; import com.google.devtools.build.lib.exec.SpawnResult; +import com.google.devtools.build.lib.exec.SpawnResult.Status; import com.google.devtools.build.lib.exec.SpawnRunner; +import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus; import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionPolicy; import com.google.devtools.build.lib.remote.ContentDigests.ActionKey; import com.google.devtools.build.lib.remote.RemoteProtocol.ActionResult; @@ -101,6 +103,11 @@ public class CachedLocalSpawnRunnerTest { return new SpawnInputExpander(/*strict*/false) .getInputMapping(simpleSpawn, SIMPLE_ARTIFACT_EXPANDER, fakeFileCache, "workspace"); } + + @Override + public void report(ProgressStatus state) { + // TODO(ulfjack): Test that the right calls are made. + } }; @Before @@ -213,7 +220,7 @@ public class CachedLocalSpawnRunnerTest { when(cache.getCachedActionResult(any(ActionKey.class))).thenReturn(null); SpawnResult delegateResult = new SpawnResult.Builder() .setExitCode(0) - .setSetupSuccess(true) + .setStatus(Status.SUCCESS) .build(); when(delegate.exec(any(Spawn.class), any(SpawnExecutionPolicy.class))) .thenReturn(delegateResult); diff --git a/src/test/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutionClientTest.java b/src/test/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutionClientTest.java index 7f9620f0d4..5957ef385f 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutionClientTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/GrpcRemoteExecutionClientTest.java @@ -20,7 +20,6 @@ import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.eventbus.EventBus; import com.google.devtools.build.lib.actions.ActionInput; import com.google.devtools.build.lib.actions.ActionInputFileCache; import com.google.devtools.build.lib.actions.ActionInputHelper; @@ -31,6 +30,7 @@ import com.google.devtools.build.lib.actions.SimpleSpawn; import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.exec.SpawnInputExpander; import com.google.devtools.build.lib.exec.SpawnResult; +import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus; import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionPolicy; import com.google.devtools.build.lib.remote.RemoteProtocol.ActionResult; import com.google.devtools.build.lib.remote.RemoteProtocol.ContentDigest; @@ -70,7 +70,6 @@ public class GrpcRemoteExecutionClientTest { private FileSystem fs; private Path execRoot; - private EventBus eventBus; private SimpleSpawn simpleSpawn; private FakeActionInputFileCache fakeFileCache; @@ -108,6 +107,11 @@ public class GrpcRemoteExecutionClientTest { return new SpawnInputExpander(/*strict*/false) .getInputMapping(simpleSpawn, SIMPLE_ARTIFACT_EXPANDER, fakeFileCache, "workspace"); } + + @Override + public void report(ProgressStatus state) { + // TODO(ulfjack): Test that the right calls are made. + } }; @Before @@ -115,7 +119,6 @@ public class GrpcRemoteExecutionClientTest { fs = new InMemoryFileSystem(); execRoot = fs.getPath("/exec/root"); FileSystemUtils.createDirectoryAndParents(execRoot); - eventBus = new EventBus(); fakeFileCache = new FakeActionInputFileCache(execRoot); simpleSpawn = new SimpleSpawn( new FakeOwner("Mnemonic", "Progress Message"), @@ -149,8 +152,7 @@ public class GrpcRemoteExecutionClientTest { RemoteOptions options = Options.getDefaults(RemoteOptions.class); GrpcRemoteExecutor executor = new GrpcRemoteExecutor(options, casIface, cacheIface, executionIface); - RemoteSpawnRunner client = - new RemoteSpawnRunner(execRoot, eventBus, options, executor); + RemoteSpawnRunner client = new RemoteSpawnRunner(execRoot, options, executor); scratch(simpleSpawn.getInputFiles().get(0), "xyz"); @@ -176,8 +178,7 @@ public class GrpcRemoteExecutionClientTest { RemoteOptions options = Options.getDefaults(RemoteOptions.class); GrpcRemoteExecutor executor = new GrpcRemoteExecutor(options, casIface, cacheIface, executionIface); - RemoteSpawnRunner client = - new RemoteSpawnRunner(execRoot, eventBus, options, executor); + RemoteSpawnRunner client = new RemoteSpawnRunner(execRoot, options, executor); scratch(simpleSpawn.getInputFiles().get(0), "xyz"); byte[] cacheStdOut = "stdout".getBytes(StandardCharsets.UTF_8); @@ -210,8 +211,7 @@ public class GrpcRemoteExecutionClientTest { RemoteOptions options = Options.getDefaults(RemoteOptions.class); GrpcRemoteExecutor executor = new GrpcRemoteExecutor(options, casIface, cacheIface, executionIface); - RemoteSpawnRunner client = - new RemoteSpawnRunner(execRoot, eventBus, options, executor); + RemoteSpawnRunner client = new RemoteSpawnRunner(execRoot, options, executor); scratch(simpleSpawn.getInputFiles().get(0), "xyz"); byte[] cacheStdOut = "stdout".getBytes(StandardCharsets.UTF_8); |