src/main/java/com/google/devtools/build/lib/exec/SpawnRunner.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218

// Copyright 2017 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.exec;

import com.google.devtools.build.lib.actions.ActionInput;
import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander;
import com.google.devtools.build.lib.actions.ArtifactPathResolver;
import com.google.devtools.build.lib.actions.ExecException;
import com.google.devtools.build.lib.actions.MetadataProvider;
import com.google.devtools.build.lib.actions.Spawn;
import com.google.devtools.build.lib.actions.SpawnResult;
import com.google.devtools.build.lib.util.io.FileOutErr;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.io.IOException;
import java.time.Duration;
import java.util.SortedMap;

/**
 * A runner for spawns. Implementations can execute spawns on the local machine as a subprocess with
 * or without sandboxing, on a remote machine, or only consult a remote cache.
 *
 * <h2>Environment Variables</h2>
 *
 * <ul>
 *   <li>Implementations MUST set the specified environment variables.
 *   <li>Implementations MAY add TMPDIR as an additional env variable, if it is not set already.
 *   <li>If an implementation sets TMPDIR, it MUST be set to an absolute path.
 *   <li>Implementations MUST NOT add any other environment variables.
 * </ul>
 *
 * <h2>Command line</h2>
 *
 * <ul>
 *   <li>Implementations MUST use the specified command line unmodified by default.
 *   <li>Implementations MAY modify the specified command line if explicitly requested by the user.
 * </ul>
 *
 * <h2>Process</h2>
 *
 * <ul>
 *   <li>Implementations MUST be thread-safe.
 *   <li>Implementations MUST ensure that all child processes (including transitive) exit in all
 *       cases, including successful completion, interruption, and timeout
 *   <li>Implementations MUST return the exit code as observed from the subprocess if the subprocess
 *       exits naturally; they MUST not throw an exception for non-zero exit codes
 *   <li>Implementations MUST be interruptible; they MUST throw {@link InterruptedException} from
 *       {@link #exec} when interrupted
 *   <li>Implementations MUST apply the specified timeout to the execution of the subprocess
 *       <ul>
 *         <li>If no timeout is specified, the implementation MAY apply an implementation-specific
 *             timeout
 *         <li>If the specified timeout is larger than an implementation-dependent maximum, then the
 *             implementation MUST throw {@link IllegalArgumentException}; it MUST not silently
 *             change the timeout to a smaller value
 *         <li>If the timeout is exceeded, the implementation MUST throw TimeoutException, with the
 *             timeout that was applied to the subprocess (TODO)
 *       </ul>
 * </ul>
 *
 * <h2>Optimistic Concurrency</h2>
 *
 * Bazel may choose to execute a spawn using multiple {@link SpawnRunner} implementations
 * simultaneously in order to minimize total latency. This is especially useful for builds with few
 * actions where remotely executing the actions incurs high round trip times.
 *
 * <ul>
 *   <li>All implementations MUST call {@link SpawnExecutionContext#lockOutputFiles} before writing
 *       to any of the output files, but may write to stdout and stderr without calling it. Instead,
 *       all callers must provide temporary locations for stdout & stderr if they ever call multiple
 *       {@link SpawnRunner} implementations concurrently. Spawn runners that use the local machine
 *       MUST either call it before starting the subprocess, or ensure that subprocesses write to
 *       temporary locations (for example by running in a mount namespace) and then copy or move the
 *       outputs into place.
 *   <li>Implementations SHOULD delay calling {@link SpawnExecutionContext#lockOutputFiles} until
 *       just before writing.
 * </ul>
 */
public interface SpawnRunner {
  /**
   * Used to report progress on the current spawn. This is mainly used to report the current state
   * of the subprocess to the user, but may also be used to trigger parallel execution. For example,
   * a dynamic scheduler may use the signal that there was a cache miss to start parallel execution
   * of the same Spawn - also see the {@link SpawnRunner} documentation section on "optimistic
   * concurrency".
   *
   * <p>{@link SpawnRunner} implementations should post a progress status before any potentially
   * long-running operation.
   */
  public enum ProgressStatus {
    /** Spawn is waiting for local or remote resources to become available. */
    SCHEDULING,

    /** The {@link SpawnRunner} is looking for a cache hit. */
    CHECKING_CACHE,

    /**
     * Resources are acquired, and there was probably no cache hit. This MUST be posted before
     * attempting to execute the subprocess.
     *
     * <p>Caching {@link SpawnRunner} implementations should only post this after a failed cache
     * lookup, but may post this if cache lookup and execution happen within the same step, e.g. as
     * part of a single RPC call with no mechanism to report cache misses.
     */
    EXECUTING,

    /** Downloading outputs from a remote machine. */
    DOWNLOADING;
  }

  /**
   * A context that binds a {@link Spawn} to a {@link SpawnRunner}.
   *
   * <p>This interface may change without notice.
   *
   * <p>Implementations must be at least thread-compatible, i.e., they must be safe as long as each
   * instance is only used within a single thread. Different instances of the same class may be used
   * by different threads, so they MUST not call any shared non-thread-safe objects.
   */
  interface SpawnExecutionContext {
    /**
     * Returns a unique id for this spawn, to be used for logging. Note that a single spawn may be
     * passed to multiple {@link SpawnRunner} implementations, so any log entries should also
     * contain the identity of the spawn runner implementation.
     */
    int getId();

    /**
     * Prefetches the Spawns input files to the local machine. There are cases where Bazel runs on a
     * network file system, and prefetching the files in parallel is a significant performance win.
     * This should only be called by local strategies when local execution is imminent.
     *
     * <p>Should be called with the equivalent of:
     * <code>
     * policy.prefetchInputs(
     *      Iterables.filter(policy.getInputMapping().values(), Predicates.notNull()));
     * </code>
     *
     * <p>Note in particular that {@link #getInputMapping} may return {@code null} values, but
     * this method does not accept {@code null} values.
     *
     * <p>The reason why this method requires passing in the inputs is that getInputMapping may be
     * slow to compute, so if the implementation already called it, we don't want to compute it
     * again. I suppose we could require implementations to memoize getInputMapping (but not compute
     * it eagerly), and that may change in the future.
     */
    void prefetchInputs() throws IOException;

    /**
     * The input file metadata cache for this specific spawn, which can be used to efficiently
     * obtain file digests and sizes.
     */
    MetadataProvider getMetadataProvider();

    /** An artifact expander. */
    // TODO(ulfjack): This is only used for the sandbox runners to compute a set of empty
    // directories. We shouldn't have this and the getInputMapping method; maybe there's a way to
    // unify the two? Alternatively, maybe the input mapping should (optionally?) contain
    // directories? Or maybe we need a separate method to return the set of directories?
    ArtifactExpander getArtifactExpander();

    /** The {@link ArtifactPathResolver} to use when directly writing output files. */
    default ArtifactPathResolver getPathResolver() {
      return ArtifactPathResolver.IDENTITY;
    }

    /**
     * All implementations must call this method before writing to the provided stdout / stderr or
     * to any of the output file locations. This method is used to coordinate - implementations
     * must throw an {@link InterruptedException} for all but one caller.
     */
    void lockOutputFiles() throws InterruptedException;

    /**
     * Returns whether this spawn may be executing concurrently under multiple spawn runners. If so,
     * {@link #lockOutputFiles} may raise {@link InterruptedException}.
     */
    boolean speculating();

    /** Returns the timeout that should be applied for the given {@link Spawn} instance. */
    Duration getTimeout();

    /** The files to which to write stdout and stderr. */
    FileOutErr getFileOutErr();

    SortedMap<PathFragment, ActionInput> getInputMapping() throws IOException;

    /** Reports a progress update to the Spawn strategy. */
    void report(ProgressStatus state, String name);
  }

  /**
   * Run the given spawn.
   *
   * @param spawn the spawn to run
   * @param context the spawn execution context
   * @return the result from running the spawn
   * @throws InterruptedException if the calling thread was interrupted, or if the runner could not
   *     lock the output files (see {@link SpawnExecutionContext#lockOutputFiles()})
   * @throws IOException if something went wrong reading or writing to the local file system
   * @throws ExecException if the request is malformed
   */
  SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
      throws InterruptedException, IOException, ExecException;

  /* Name of the SpawnRunner. */
  String getName();
}