From 8b76fe5a1aaf96f849ea5543ca3858014d25f5a4 Mon Sep 17 00:00:00 2001 From: Philipp Wollermann Date: Mon, 14 Sep 2015 11:50:01 +0000 Subject: workers: A multitude of bug fixes and improved logging. I know this should have been split up, but I was a bit on fire today and did it all in one go ^^; Fixes spurious "Stream closed: Stream closed" errors, by noticing dead workers and retrying with a fresh one. (Configurable with the --worker_max_retries flag.) Fixes an "IllegalArgumentException" when a non-worker compatible Spawn is given to the strategy. We fall back to StandaloneSpawnStrategy now. Redirect the stderr of worker processes to separate log files in a common sub-directory and print a message that tells you about the location on worker start-up for easier debugging. The log can be found in /worker-logs/*.log. Adds the mnemonic of the Spawn to log messages and the log filename. Adds verbose messages on worker start-up and shutdown. (Enable it with --worker_verbose!) Shuts down the worker pool after a build finished by default, until we sort out one last remaining correctness issue. This also conserves resources, though makes incremental builds a bit slower. Want the maximum performance anyway? Try --experimental_workers_keep_running. Adds stack traces to errors that are caused by buggy workers to aid development. Fixes weird dupli..tripli..quadruple error messages ("Compiling failed: Stream closed: Stream closed: Stream closed: Stream closed."). -- MOS_MIGRATED_REVID=102983853 --- .../com/google/devtools/build/lib/worker/BUILD | 4 + .../google/devtools/build/lib/worker/Worker.java | 45 ++++++- .../lib/worker/WorkerActionContextConsumer.java | 41 ++++++ .../lib/worker/WorkerActionContextProvider.java | 16 ++- .../devtools/build/lib/worker/WorkerFactory.java | 40 +++++- .../devtools/build/lib/worker/WorkerKey.java | 15 ++- .../devtools/build/lib/worker/WorkerModule.java | 105 ++++++++++++--- .../devtools/build/lib/worker/WorkerOptions.java | 53 ++++++-- .../devtools/build/lib/worker/WorkerPool.java | 29 ++++- .../build/lib/worker/WorkerSpawnStrategy.java | 145 ++++++++++++++------- 10 files changed, 403 insertions(+), 90 deletions(-) create mode 100644 src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextConsumer.java (limited to 'src/main/java') diff --git a/src/main/java/com/google/devtools/build/lib/worker/BUILD b/src/main/java/com/google/devtools/build/lib/worker/BUILD index aee455cc65..4d6cb1a4e5 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/BUILD +++ b/src/main/java/com/google/devtools/build/lib/worker/BUILD @@ -9,11 +9,15 @@ java_library( srcs = glob(["*.java"]), deps = [ "//src/main/java:actions", + "//src/main/java:analysis-exec-rules-skyframe", "//src/main/java:buildtool-runtime", "//src/main/java:common", "//src/main/java:concurrent", + "//src/main/java:events", "//src/main/java:options", + "//src/main/java:packages", "//src/main/java:vfs", + "//src/main/java/com/google/devtools/build/lib/standalone", "//src/main/protobuf:proto_worker_protocol", "//third_party:apache_commons_pool2", "//third_party:guava", diff --git a/src/main/java/com/google/devtools/build/lib/worker/Worker.java b/src/main/java/com/google/devtools/build/lib/worker/Worker.java index 409394da20..d76b9fe8f1 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/Worker.java +++ b/src/main/java/com/google/devtools/build/lib/worker/Worker.java @@ -14,11 +14,15 @@ package com.google.devtools.build.lib.worker; import com.google.common.base.Preconditions; +import com.google.devtools.build.lib.events.Event; +import com.google.devtools.build.lib.events.Reporter; +import com.google.devtools.build.lib.vfs.Path; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.lang.ProcessBuilder.Redirect; +import java.util.concurrent.atomic.AtomicInteger; /** * Interface to a worker process running as a child process. @@ -32,19 +36,29 @@ import java.lang.ProcessBuilder.Redirect; * class. */ final class Worker { + private static final AtomicInteger pidCounter = new AtomicInteger(); + private final int workerId; private final Process process; private final Thread shutdownHook; - private Worker(Process process, Thread shutdownHook) { + private Worker(Process process, Thread shutdownHook, int pid) { this.process = process; this.shutdownHook = shutdownHook; + this.workerId = pid; } - static Worker create(WorkerKey key) throws IOException { + static Worker create(WorkerKey key, Path logDir, Reporter reporter, boolean verbose) + throws IOException { Preconditions.checkNotNull(key); - ProcessBuilder processBuilder = new ProcessBuilder(key.getArgs().toArray(new String[0])) - .directory(key.getWorkDir().getPathFile()) - .redirectError(Redirect.INHERIT); + Preconditions.checkNotNull(logDir); + + int workerId = pidCounter.getAndIncrement(); + Path logFile = logDir.getRelative("worker-" + workerId + "-" + key.getMnemonic() + ".log"); + + ProcessBuilder processBuilder = + new ProcessBuilder(key.getArgs().toArray(new String[0])) + .directory(key.getWorkDir().getPathFile()) + .redirectError(Redirect.appendTo(logFile.getPathFile())); processBuilder.environment().putAll(key.getEnv()); final Process process = processBuilder.start(); @@ -57,7 +71,18 @@ final class Worker { }; Runtime.getRuntime().addShutdownHook(shutdownHook); - return new Worker(process, shutdownHook); + if (verbose) { + reporter.handle( + Event.info( + "Created new " + + key.getMnemonic() + + " worker (id " + + workerId + + "), logging to " + + logFile)); + } + + return new Worker(process, shutdownHook, workerId); } void destroy() { @@ -65,6 +90,14 @@ final class Worker { process.destroy(); } + /** + * Returns a unique id for this worker. This is used to distinguish different worker processes in + * logs and messages. + */ + int getWorkerId() { + return this.workerId; + } + boolean isAlive() { // This is horrible, but Process.isAlive() is only available from Java 8 on and this is the // best we can do prior to that. diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextConsumer.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextConsumer.java new file mode 100644 index 0000000000..4269ed5417 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextConsumer.java @@ -0,0 +1,41 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.worker; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableMap.Builder; +import com.google.devtools.build.lib.actions.ActionContextConsumer; +import com.google.devtools.build.lib.actions.Executor.ActionContext; +import com.google.devtools.build.lib.actions.SpawnActionContext; + +import java.util.Map; + +/** + * {@link ActionContextConsumer} that requests the action contexts necessary for worker process + * execution. + */ +public class WorkerActionContextConsumer implements ActionContextConsumer { + + @Override + public Map getSpawnActionContexts() { + return ImmutableMap.of(); + } + + @Override + public Map, String> getActionContexts() { + Builder, String> contexts = ImmutableMap.builder(); + contexts.put(SpawnActionContext.class, "worker"); + return contexts.build(); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextProvider.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextProvider.java index 0db3a46595..66d0208d08 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextProvider.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerActionContextProvider.java @@ -18,6 +18,8 @@ import com.google.common.eventbus.EventBus; import com.google.devtools.build.lib.actions.ActionContextProvider; import com.google.devtools.build.lib.actions.Executor.ActionContext; import com.google.devtools.build.lib.buildtool.BuildRequest; +import com.google.devtools.build.lib.exec.ExecutionOptions; +import com.google.devtools.build.lib.runtime.BlazeRuntime; /** * Factory for the Worker-based execution strategy. @@ -26,9 +28,19 @@ final class WorkerActionContextProvider extends ActionContextProvider { private final ImmutableList strategies; public WorkerActionContextProvider( - BuildRequest buildRequest, WorkerPool workers, EventBus eventBus) { + BlazeRuntime runtime, BuildRequest buildRequest, WorkerPool workers, EventBus eventBus) { + boolean verboseFailures = buildRequest.getOptions(ExecutionOptions.class).verboseFailures; + int maxRetries = buildRequest.getOptions(WorkerOptions.class).workerMaxRetries; + this.strategies = - ImmutableList.of(new WorkerSpawnStrategy(buildRequest, workers, eventBus)); + ImmutableList.of( + new WorkerSpawnStrategy( + runtime.getDirectories(), + buildRequest, + eventBus, + workers, + verboseFailures, + maxRetries)); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java index 8248e1c062..208b88c05d 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java @@ -13,6 +13,10 @@ // limitations under the License. package com.google.devtools.build.lib.worker; +import com.google.devtools.build.lib.events.Event; +import com.google.devtools.build.lib.events.Reporter; +import com.google.devtools.build.lib.vfs.Path; + import org.apache.commons.pool2.BaseKeyedPooledObjectFactory; import org.apache.commons.pool2.PooledObject; import org.apache.commons.pool2.impl.DefaultPooledObject; @@ -21,9 +25,25 @@ import org.apache.commons.pool2.impl.DefaultPooledObject; * Factory used by the pool to create / destroy / validate worker processes. */ final class WorkerFactory extends BaseKeyedPooledObjectFactory { + private Path logDir; + private Reporter reporter; + private boolean verbose; + + public void setLogDirectory(Path logDir) { + this.logDir = logDir; + } + + public void setReporter(Reporter reporter) { + this.reporter = reporter; + } + + public void setVerbose(boolean verbose) { + this.verbose = verbose; + } + @Override public Worker create(WorkerKey key) throws Exception { - return Worker.create(key); + return Worker.create(key, logDir, reporter, verbose); } /** @@ -39,6 +59,15 @@ final class WorkerFactory extends BaseKeyedPooledObjectFactory p) throws Exception { + if (verbose) { + reporter.handle( + Event.info( + "Destroying " + + key.getMnemonic() + + " worker (id " + + p.getObject().getWorkerId() + + ").")); + } p.getObject().destroy(); } @@ -47,6 +76,15 @@ final class WorkerFactory extends BaseKeyedPooledObjectFactory p) { + if (verbose) { + reporter.handle( + Event.info( + "Validating " + + key.getMnemonic() + + " worker (id " + + p.getObject().getWorkerId() + + ").")); + } return p.getObject().isAlive(); } } diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java index e0ce5cf0bf..b9247ce686 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerKey.java @@ -30,11 +30,13 @@ final class WorkerKey { private final ImmutableList args; private final ImmutableMap env; private final Path workDir; + private final String mnemonic; - WorkerKey(List args, Map env, Path workDir) { + WorkerKey(List args, Map env, Path workDir, String mnemonic) { this.args = ImmutableList.copyOf(Preconditions.checkNotNull(args)); this.env = ImmutableMap.copyOf(Preconditions.checkNotNull(env)); this.workDir = Preconditions.checkNotNull(workDir); + this.mnemonic = Preconditions.checkNotNull(mnemonic); } public ImmutableList getArgs() { @@ -49,6 +51,10 @@ final class WorkerKey { return workDir; } + public String getMnemonic() { + return mnemonic; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -66,7 +72,11 @@ final class WorkerKey { if (!env.equals(workerKey.env)) { return false; } - return workDir.equals(workerKey.workDir); + if (!workDir.equals(workerKey.workDir)) { + return false; + } + return mnemonic.equals(workerKey.mnemonic); + } @Override @@ -74,6 +84,7 @@ final class WorkerKey { int result = args.hashCode(); result = 31 * result + env.hashCode(); result = 31 * result + workDir.hashCode(); + result = 31 * result + mnemonic.hashCode(); return result; } diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java index b7de57fee1..07943ec315 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java @@ -13,38 +13,34 @@ // limitations under the License. package com.google.devtools.build.lib.worker; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.eventbus.Subscribe; +import com.google.devtools.build.lib.actions.ActionContextConsumer; import com.google.devtools.build.lib.actions.ActionContextProvider; import com.google.devtools.build.lib.buildtool.BuildRequest; +import com.google.devtools.build.lib.buildtool.buildevent.BuildCompleteEvent; +import com.google.devtools.build.lib.buildtool.buildevent.BuildInterruptedEvent; import com.google.devtools.build.lib.buildtool.buildevent.BuildStartingEvent; +import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.runtime.BlazeModule; import com.google.devtools.build.lib.runtime.BlazeRuntime; import com.google.devtools.build.lib.runtime.Command; +import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.common.options.OptionsBase; import org.apache.commons.pool2.impl.GenericKeyedObjectPoolConfig; +import java.io.IOException; + /** * A module that adds the WorkerActionContextProvider to the available action context providers. */ public class WorkerModule extends BlazeModule { - private final WorkerPool workers; - - private BuildRequest buildRequest; private BlazeRuntime blazeRuntime; - - public WorkerModule() { - GenericKeyedObjectPoolConfig config = new GenericKeyedObjectPoolConfig(); - config.setTimeBetweenEvictionRunsMillis(10 * 1000); - workers = new WorkerPool(new WorkerFactory(), config); - } - - @Override - public Iterable getActionContextProviders() { - return ImmutableList.of( - new WorkerActionContextProvider(buildRequest, workers, blazeRuntime.getEventBus())); - } + private BuildRequest buildRequest; + private WorkerPool workers; + private boolean verbose; @Override public Iterable> getCommandOptions(Command command) { @@ -55,17 +51,90 @@ public class WorkerModule extends BlazeModule { @Override public void beforeCommand(BlazeRuntime blazeRuntime, Command command) { - this.blazeRuntime = blazeRuntime; + this.blazeRuntime = Preconditions.checkNotNull(blazeRuntime); blazeRuntime.getEventBus().register(this); + + if (workers == null) { + Path logDir = blazeRuntime.getOutputBase().getRelative("worker-logs"); + try { + logDir.createDirectory(); + } catch (IOException e) { + blazeRuntime + .getReporter() + .handle(Event.error("Could not create directory for worker logs: " + logDir)); + } + + GenericKeyedObjectPoolConfig config = new GenericKeyedObjectPoolConfig(); + config.setTimeBetweenEvictionRunsMillis(10 * 1000); + + workers = new WorkerPool(new WorkerFactory(), config); + workers.setReporter(blazeRuntime.getReporter()); + workers.setLogDirectory(logDir); + } } @Subscribe public void buildStarting(BuildStartingEvent event) { - buildRequest = event.getRequest(); + Preconditions.checkNotNull(workers); + + this.buildRequest = event.getRequest(); + + WorkerOptions options = buildRequest.getOptions(WorkerOptions.class); + workers.setMaxTotalPerKey(options.workerMaxInstances); + workers.setMaxIdlePerKey(options.workerMaxInstances); + workers.setMinIdlePerKey(options.workerMaxInstances); + workers.setVerbose(options.workerVerbose); + this.verbose = options.workerVerbose; + } + + @Override + public Iterable getActionContextProviders() { + Preconditions.checkNotNull(blazeRuntime); + Preconditions.checkNotNull(buildRequest); + Preconditions.checkNotNull(workers); + + return ImmutableList.of( + new WorkerActionContextProvider( + blazeRuntime, buildRequest, workers, blazeRuntime.getEventBus())); + } + + @Override + public Iterable getActionContextConsumers() { + return ImmutableList.of(new WorkerActionContextConsumer()); + } + + @Subscribe + public void buildComplete(BuildCompleteEvent event) { + if (workers != null && buildRequest.getOptions(WorkerOptions.class).workerQuitAfterBuild) { + if (verbose) { + blazeRuntime + .getReporter() + .handle(Event.info("Build completed, shutting down worker pool...")); + } + workers.close(); + workers = null; + } + } + + // Kill workers on Ctrl-C to quickly end the interrupted build. + // TODO(philwo) - make sure that this actually *kills* the workers and not just politely waits + // for them to finish. + @Subscribe + public void buildInterrupted(BuildInterruptedEvent event) { + if (workers != null) { + if (verbose) { + blazeRuntime + .getReporter() + .handle(Event.info("Build interrupted, shutting down worker pool...")); + } + workers.close(); + workers = null; + } } @Override public void afterCommand() { - buildRequest = null; + this.blazeRuntime = null; + this.buildRequest = null; } } diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerOptions.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerOptions.java index b4471c0400..2947a661e6 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerOptions.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerOptions.java @@ -23,20 +23,25 @@ import com.google.devtools.common.options.OptionsBase; public class WorkerOptions extends OptionsBase { public static final WorkerOptions DEFAULTS = Options.getDefaults(WorkerOptions.class); - @Option(name = "worker_max_instances", - defaultValue = "4", - category = "strategy", - help = "How many instances of a worker process (like the persistent Java compiler) may be " - + "launched if you use the 'worker' strategy.") - public int workerMaxInstances; - - @Option(name = "experimental_persistent_javac", - defaultValue = "null", - category = "undocumented", - help = "Enable the experimental persistent Java compiler.", - expansion = {"--strategy=Javac=worker", "--strategy=JavaIjar=local"}) + @Option( + name = "experimental_persistent_javac", + defaultValue = "null", + category = "strategy", + help = "Enable the experimental persistent Java compiler.", + expansion = {"--strategy=Javac=worker", "--strategy=JavaIjar=local"} + ) public Void experimentalPersistentJavac; + @Option( + name = "worker_max_instances", + defaultValue = "4", + category = "strategy", + help = + "How many instances of a worker process (like the persistent Java compiler) may be " + + "launched if you use the 'worker' strategy." + ) + public int workerMaxInstances; + @Option( name = "worker_max_changed_files", defaultValue = "0", @@ -46,4 +51,28 @@ public class WorkerOptions extends OptionsBase { + "workers." ) public int workerMaxChangedFiles; + + @Option( + name = "worker_max_retries", + defaultValue = "3", + category = "strategy", + help = "If a worker fails during work, retry times before giving up." + ) + public int workerMaxRetries; + + @Option( + name = "worker_quit_after_build", + defaultValue = "false", + category = "strategy", + help = "If enabled, all workers quit after a build is done." + ) + public boolean workerQuitAfterBuild; + + @Option( + name = "worker_verbose", + defaultValue = "true", + category = "strategy", + help = "If enabled, prints verbose messages when workers are started, shutdown, ..." + ) + public boolean workerVerbose; } diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java index b834d54b15..4df8ddc6f4 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerPool.java @@ -13,10 +13,15 @@ // limitations under the License. package com.google.devtools.build.lib.worker; -import org.apache.commons.pool2.KeyedPooledObjectFactory; +import com.google.devtools.build.lib.events.Reporter; +import com.google.devtools.build.lib.vfs.Path; + import org.apache.commons.pool2.impl.GenericKeyedObjectPool; import org.apache.commons.pool2.impl.GenericKeyedObjectPoolConfig; +import java.util.HashSet; +import java.util.Set; + import javax.annotation.concurrent.ThreadSafe; /** @@ -27,12 +32,28 @@ import javax.annotation.concurrent.ThreadSafe; */ @ThreadSafe final class WorkerPool extends GenericKeyedObjectPool { - public WorkerPool(KeyedPooledObjectFactory factory) { + final WorkerFactory workerFactory; + final Set workers = new HashSet<>(); + + public WorkerPool(WorkerFactory factory) { super(factory); + this.workerFactory = factory; } - public WorkerPool(KeyedPooledObjectFactory factory, - GenericKeyedObjectPoolConfig config) { + public WorkerPool(WorkerFactory factory, GenericKeyedObjectPoolConfig config) { super(factory, config); + this.workerFactory = factory; + } + + public void setLogDirectory(Path logDir) { + this.workerFactory.setLogDirectory(logDir); + } + + public void setReporter(Reporter reporter) { + this.workerFactory.setReporter(reporter); + } + + public void setVerbose(boolean verbose) { + this.workerFactory.setVerbose(verbose); } } diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java index bfac250546..77b38c458d 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnStrategy.java @@ -23,10 +23,17 @@ import com.google.devtools.build.lib.actions.ActionExecutionContext; import com.google.devtools.build.lib.actions.ChangedFilesMessage; import com.google.devtools.build.lib.actions.ExecException; import com.google.devtools.build.lib.actions.ExecutionStrategy; +import com.google.devtools.build.lib.actions.Executor; import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.actions.SpawnActionContext; import com.google.devtools.build.lib.actions.UserExecException; +import com.google.devtools.build.lib.analysis.BlazeDirectories; import com.google.devtools.build.lib.concurrent.ThreadSafety; +import com.google.devtools.build.lib.events.Event; +import com.google.devtools.build.lib.events.EventHandler; +import com.google.devtools.build.lib.standalone.StandaloneSpawnStrategy; +import com.google.devtools.build.lib.syntax.Label; +import com.google.devtools.build.lib.util.CommandFailureUtils; import com.google.devtools.build.lib.util.io.FileOutErr; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.worker.WorkerProtocol.WorkRequest; @@ -43,31 +50,44 @@ import java.util.concurrent.atomic.AtomicBoolean; final class WorkerSpawnStrategy implements SpawnActionContext { private final WorkerPool workers; private final IncrementalHeuristic incrementalHeuristic; + private final StandaloneSpawnStrategy standaloneStrategy; + private final boolean verboseFailures; + private final int maxRetries; public WorkerSpawnStrategy( - OptionsClassProvider optionsProvider, WorkerPool workers, EventBus eventBus) { + BlazeDirectories blazeDirs, + OptionsClassProvider optionsProvider, + EventBus eventBus, + WorkerPool workers, + boolean verboseFailures, + int maxRetries) { Preconditions.checkNotNull(optionsProvider); WorkerOptions options = optionsProvider.getOptions(WorkerOptions.class); - workers.setMaxTotalPerKey(options.workerMaxInstances); - workers.setMaxIdlePerKey(options.workerMaxInstances); - workers.setMinIdlePerKey(options.workerMaxInstances); - this.workers = workers; this.incrementalHeuristic = new IncrementalHeuristic(options.workerMaxChangedFiles); eventBus.register(incrementalHeuristic); + this.workers = Preconditions.checkNotNull(workers); + this.standaloneStrategy = new StandaloneSpawnStrategy(blazeDirs.getExecRoot(), verboseFailures); + this.verboseFailures = verboseFailures; + this.maxRetries = maxRetries; } @Override public void exec(Spawn spawn, ActionExecutionContext actionExecutionContext) throws ExecException, InterruptedException { - if (!incrementalHeuristic.shouldUseWorkers()) { - SpawnActionContext context = actionExecutionContext.getExecutor().getSpawnActionContext(""); - if (context != this) { - context.exec(spawn, actionExecutionContext); - return; - } + Executor executor = actionExecutionContext.getExecutor(); + if (executor.reportsSubcommands()) { + executor.reportSubcommand( + Label.print(spawn.getOwner().getLabel()) + + " [" + + spawn.getResourceOwner().prettyPrint() + + "]", + spawn.asShellCommand(executor.getExecRoot())); } - String paramFile = Iterables.getLast(spawn.getArguments()); + if (!incrementalHeuristic.shouldUseWorkers()) { + standaloneStrategy.exec(spawn, actionExecutionContext); + return; + } // We assume that the spawn to be executed always gets a single argument, which is a flagfile // prefixed with @ and that it will start in persistent mode when we don't pass it one. @@ -75,10 +95,11 @@ final class WorkerSpawnStrategy implements SpawnActionContext { // persistent mode and then pass it the flagfile via a WorkRequest to make it actually do the // work. if (!Iterables.getLast(spawn.getArguments()).startsWith("@")) { - throw new IllegalStateException( - "Must have parameter file as last arg, got args: " + spawn.getArguments()); + standaloneStrategy.exec(spawn, actionExecutionContext); + return; } + String paramFile = Iterables.getLast(spawn.getArguments()); FileOutErr outErr = actionExecutionContext.getFileOutErr(); ImmutableList args = ImmutableList.builder() @@ -87,42 +108,76 @@ final class WorkerSpawnStrategy implements SpawnActionContext { .build(); ImmutableMap env = spawn.getEnvironment(); Path workDir = actionExecutionContext.getExecutor().getExecRoot(); - WorkerKey key = new WorkerKey(args, env, workDir); + WorkerKey key = new WorkerKey(args, env, workDir, spawn.getMnemonic()); try { - Worker worker = workers.borrowObject(key); - try { - WorkRequest.newBuilder() - .addArguments(paramFile) - .build() - .writeDelimitedTo(worker.getOutputStream()); - worker.getOutputStream().flush(); - - WorkResponse response = WorkResponse.parseDelimitedFrom(worker.getInputStream()); - - if (response == null) { - throw new UserExecException( - "Worker process did not return a correct WorkResponse. This is probably caused by a " - + "bug in the worker, writing unexpected other data to stdout."); - } - - String trimmedOutput = response.getOutput().trim(); - if (!trimmedOutput.isEmpty()) { - outErr.getErrorStream().write(trimmedOutput.getBytes()); - } - - if (response.getExitCode() != 0) { - throw new UserExecException( - String.format("Worker process failed with exit code: %d.", response.getExitCode())); - } - } finally { - if (worker != null) { - workers.returnObject(key, worker); - } + WorkResponse response = execInWorker(executor.getEventHandler(), paramFile, key, maxRetries); + + outErr.getErrorStream().write(response.getOutputBytes().toByteArray()); + + if (response.getExitCode() != 0) { + throw new UserExecException( + String.format("Worker process failed with exit code: %d.", response.getExitCode())); } } catch (Exception e) { - throw new UserExecException(e.getMessage(), e); + String message = + CommandFailureUtils.describeCommandFailure( + verboseFailures, spawn.getArguments(), env, workDir.getPathString()); + throw new UserExecException(message, e); + } + } + + private WorkResponse execInWorker( + EventHandler eventHandler, String paramFile, WorkerKey key, int retriesLeft) + throws Exception { + Worker worker = null; + WorkResponse response = null; + + try { + worker = workers.borrowObject(key); + WorkRequest.newBuilder() + .addArguments(paramFile) + .build() + .writeDelimitedTo(worker.getOutputStream()); + worker.getOutputStream().flush(); + + response = WorkResponse.parseDelimitedFrom(worker.getInputStream()); + + if (response == null) { + throw new UserExecException( + "Worker process did not return a correct WorkResponse. This is probably caused by a " + + "bug in the worker, writing unexpected other data to stdout."); + } + } catch (InterruptedException e) { + // The user pressed Ctrl-C. Get out here quick. + if (worker != null) { + workers.invalidateObject(key, worker); + worker = null; + } + throw e; + } catch (Exception e) { + // "Something" failed - let's retry with a fresh worker. + if (worker != null) { + workers.invalidateObject(key, worker); + worker = null; + } + if (retriesLeft > 0) { + eventHandler.handle( + Event.warn( + key.getMnemonic() + + " worker failed (" + + e + + "), invalidating and retrying with new worker...")); + return execInWorker(eventHandler, paramFile, key, retriesLeft - 1); + } else { + throw e; + } + } finally { + if (worker != null) { + workers.returnObject(key, worker); + } } + return response; } @Override -- cgit v1.2.3