aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/tools
diff options
context:
space:
mode:
authorGravatar ulfjack <ulfjack@google.com>2017-07-14 17:11:48 +0200
committerGravatar Jakob Buchgraber <buchgr@google.com>2017-07-17 10:10:42 +0200
commite0b6a21750f93739de4e0ff50eb13b90c60cc286 (patch)
tree42bcc076dd62e30028c5cefb1014aa86e81eac30 /src/tools
parenta76c94be7c56b93fc5a2f9ececfba7ac1f61f69c (diff)
Workaround for the Linux-induced race that causes ETXTBSY
Retry execution when we think it's ETXTBSY. Fixed #3356. PiperOrigin-RevId: 161958095
Diffstat (limited to 'src/tools')
-rw-r--r--src/tools/remote_worker/src/main/java/com/google/devtools/build/remote/ExecutionServer.java43
1 files changed, 34 insertions, 9 deletions
diff --git a/src/tools/remote_worker/src/main/java/com/google/devtools/build/remote/ExecutionServer.java b/src/tools/remote_worker/src/main/java/com/google/devtools/build/remote/ExecutionServer.java
index 0d38e4039e..3e58092cdf 100644
--- a/src/tools/remote_worker/src/main/java/com/google/devtools/build/remote/ExecutionServer.java
+++ b/src/tools/remote_worker/src/main/java/com/google/devtools/build/remote/ExecutionServer.java
@@ -200,15 +200,40 @@ final class ExecutionServer extends ExecutionImplBase {
execRoot.getPathString());
long startTime = System.currentTimeMillis();
CommandResult cmdResult = null;
- try {
- cmdResult =
- cmd.execute(Command.NO_INPUT, Command.NO_OBSERVER, stdoutBuffer, stderrBuffer, true);
- } catch (AbnormalTerminationException e) {
- cmdResult = e.getResult();
- } catch (CommandException e) {
- // At the time this comment was written, this must be a ExecFailedException encapsulating
- // an IOException from the underlying Subprocess.Factory.
- cmdResult = null;
+ // Linux does not provide a safe API for a multi-threaded program to fork a subprocess. Consider
+ // the case where two threads both write an executable file and then try to execute it. It can
+ // happen that the first thread writes its executable file, with the file descriptor still
+ // being open when the second thread forks, with the fork inheriting a copy of the file
+ // descriptor. Then the first thread closes the original file descriptor, and proceeds to
+ // execute the file. At that point Linux sees an open file descriptor to the file and returns
+ // ETXTBSY (Text file busy) as an error. This race is inherent in the fork / exec duality, with
+ // fork always inheriting a copy of the file descriptor table; if there was a way to fork
+ // without copying the entire file descriptor table (e.g., only copy specific entries), we could
+ // avoid this race.
+ //
+ // I was able to reproduce this problem reliably by running significantly more threads than
+ // there are CPU cores on my workstation - the more threads the more likely it happens.
+ //
+ // As a workaround, we retry up to two times before we let the exception propagate.
+ int attempt = 0;
+ while (true) {
+ try {
+ cmdResult =
+ cmd.execute(Command.NO_INPUT, Command.NO_OBSERVER, stdoutBuffer, stderrBuffer, true);
+ } catch (AbnormalTerminationException e) {
+ cmdResult = e.getResult();
+ } catch (CommandException e) {
+ // As of this writing, the cause can only be an IOException from the underlying library.
+ IOException cause = (IOException) e.getCause();
+ if ((attempt++ < 3) && cause.getMessage().endsWith("Text file busy")) {
+ // We wait a bit to give the other forks some time to close their open file descriptors.
+ Thread.sleep(10);
+ continue;
+ } else {
+ throw cause;
+ }
+ }
+ break;
}
long timeoutMillis =
action.hasTimeout()