Introduce a new SpawnCache API; add a RemoteSpawnCache implementation

AbstractSpawnRunner now uses a SpawnCache if one is registered, this allows adding caching to any spawn runner without having to be aware of the implementations. I will delete the old CachedLocalSpawnRunner in a follow-up CL. PiperOrigin-RevId: 165024382
author: ulfjack <ulfjack@google.com> 2017-08-11 23:19:48 +0200
committer: Irina Iancu <elenairina@google.com> 2017-08-14 14:16:00 +0200
commit: 9274cba2540d1d1c7824147f1d999ac785eeed85 (patch)
tree: 8b7a6bf0409162ffbc1999bff8f2aa932ab23fae /src/main/java/com/google/devtools/build/lib/exec
parent: 95f7fba8394644623d121fc6765356d46dbd043b (diff)
2 files changed, 214 insertions, 1 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java
index 9ad2d468f3..45322e471c 100644
--- a/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java
+++ b/src/main/java/com/google/devtools/build/lib/exec/AbstractSpawnStrategy.java
@@ -14,6 +14,7 @@
 
 package com.google.devtools.build.lib.exec;
 
+import com.google.common.base.Preconditions;
 import com.google.common.base.Predicates;
 import com.google.common.collect.Iterables;
 import com.google.common.eventbus.EventBus;
@@ -28,15 +29,19 @@ import com.google.devtools.build.lib.actions.SandboxedSpawnActionContext;
 import com.google.devtools.build.lib.actions.Spawn;
 import com.google.devtools.build.lib.actions.SpawnActionContext;
 import com.google.devtools.build.lib.actions.Spawns;
+import com.google.devtools.build.lib.exec.SpawnCache.CacheHandle;
 import com.google.devtools.build.lib.exec.SpawnResult.Status;
 import com.google.devtools.build.lib.exec.SpawnRunner.ProgressStatus;
 import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionPolicy;
 import com.google.devtools.build.lib.rules.fileset.FilesetActionContext;
 import com.google.devtools.build.lib.util.CommandFailureUtils;
 import com.google.devtools.build.lib.util.io.FileOutErr;
+import com.google.devtools.build.lib.vfs.Path;
 import com.google.devtools.build.lib.vfs.PathFragment;
 import java.io.IOException;
 import java.time.Duration;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.SortedMap;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
@@ -71,9 +76,29 @@ public abstract class AbstractSpawnStrategy implements SandboxedSpawnActionConte
     SpawnExecutionPolicy policy =
         new SpawnExecutionPolicyImpl(
             spawn, actionExecutionContext, writeOutputFiles, timeout);
+    // TODO(ulfjack): Provide a way to disable the cache. We don't want the RemoteSpawnStrategy to
+    // check the cache twice. Right now that can't happen because this is hidden behind an
+    // experimental flag.
+    SpawnCache cache = actionExecutionContext.getContext(SpawnCache.class);
+    // In production, the getContext method guarantees that we never get null back. However, our
+    // integration tests don't set it up correctly, so cache may be null in testing.
+    if (cache == null || !Spawns.mayBeCached(spawn)) {
+      cache = SpawnCache.NO_CACHE;
+    }
     SpawnResult result;
     try {
-      result = spawnRunner.exec(spawn, policy);
+      try (CacheHandle cacheHandle = cache.lookup(spawn, policy)) {
+        if (cacheHandle.hasResult()) {
+          result = Preconditions.checkNotNull(cacheHandle.getResult());
+        } else {
+          // Actual execution.
+          result = spawnRunner.exec(spawn, policy);
+          if (cacheHandle.willStore()) {
+            cacheHandle.store(
+                result, listExistingOutputFiles(spawn, actionExecutionContext.getExecRoot()));
+          }
+        }
+      }
     } catch (IOException e) {
       throw new EnvironmentalExecException("Unexpected IO error.", e);
     }
@@ -91,6 +116,19 @@ public abstract class AbstractSpawnStrategy implements SandboxedSpawnActionConte
     }
   }
 
+  private List<Path> listExistingOutputFiles(Spawn spawn, Path execRoot) {
+    ArrayList<Path> outputFiles = new ArrayList<>();
+    for (ActionInput output : spawn.getOutputFiles()) {
+      Path outputPath = execRoot.getRelative(output.getExecPathString());
+      // TODO(ulfjack): Store the actual list of output files in SpawnResult and use that instead
+      // of statting the files here again.
+      if (outputPath.exists()) {
+        outputFiles.add(outputPath);
+      }
+    }
+    return outputFiles;
+  }
+
   private final class SpawnExecutionPolicyImpl implements SpawnExecutionPolicy {
     private final Spawn spawn;
     private final ActionExecutionContext actionExecutionContext;
diff --git a/src/main/java/com/google/devtools/build/lib/exec/SpawnCache.java b/src/main/java/com/google/devtools/build/lib/exec/SpawnCache.java
new file mode 100644
index 0000000000..20ea4211a6
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/exec/SpawnCache.java
@@ -0,0 +1,175 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.exec;
+
+import com.google.devtools.build.lib.actions.ActionContext;
+import com.google.devtools.build.lib.actions.ExecException;
+import com.google.devtools.build.lib.actions.ExecutionStrategy;
+import com.google.devtools.build.lib.actions.Spawn;
+import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionPolicy;
+import com.google.devtools.build.lib.vfs.Path;
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.NoSuchElementException;
+
+/**
+ * A cache that can lookup a {@link SpawnResult} given a {@link Spawn}, and can also upload the
+ * results of an executed spawn to the cache.
+ *
+ * <p>This is an experimental interface to implement caching with sandboxed local execution.
+ */
+public interface SpawnCache extends ActionContext {
+  /** A no-op implementation that has no result, and performs no upload. */
+  public static CacheHandle NO_RESULT_NO_STORE = new CacheHandle() {
+    @Override
+    public boolean hasResult() {
+      return false;
+    }
+
+    @Override
+    public SpawnResult getResult() {
+      throw new NoSuchElementException();
+    }
+
+    @Override
+    public boolean willStore() {
+      return false;
+    }
+
+    @Override
+    public void store(SpawnResult result, Collection<Path> files)
+        throws InterruptedException, IOException {
+      // Do nothing.
+    }
+
+    @Override
+    public void close() {
+    }
+  };
+
+  /**
+   * Helper method to create a {@link CacheHandle} from a successful {@link SpawnResult} instance.
+   */
+  public static CacheHandle success(final SpawnResult result) {
+    return new CacheHandle() {
+      @Override
+      public boolean hasResult() {
+        return true;
+      }
+
+      @Override
+      public SpawnResult getResult() {
+        return result;
+      }
+
+      @Override
+      public boolean willStore() {
+        return false;
+      }
+
+      @Override
+      public void store(SpawnResult result, Collection<Path> files)
+          throws InterruptedException, IOException {
+        throw new IllegalStateException();
+      }
+
+      @Override
+      public void close() {
+      }
+    };
+  }
+
+  /** A no-op spawn cache. */
+  @ExecutionStrategy(
+      name = {"no-cache"},
+      contextType = SpawnCache.class
+  )
+  public static class NoSpawnCache implements SpawnCache {
+    @Override
+    public CacheHandle lookup(Spawn spawn, SpawnExecutionPolicy context) {
+      return SpawnCache.NO_RESULT_NO_STORE;
+    }
+  }
+
+  /** A no-op implementation that has no results and performs no stores. */
+  public static SpawnCache NO_CACHE = new NoSpawnCache();
+
+  /**
+   * This object represents both a successful and an unsuccessful cache lookup. If
+   * {@link #hasResult} returns true, then {@link #getResult} must successfully return a non-null
+   * instance (use the {@link #success} helper method). Otherwise {@link #getResult} should throw an
+   * {@link IllegalStateException}.
+   *
+   * <p>If {@link #hasResult} returns false, then {@link #store} may upload the result to the cache
+   * after successful execution.
+   *
+   * <p>Note that this interface extends {@link Closeable}, and callers must guarantee that
+   * {@link #close} is called on this entry (e.g., by using try-with-resources) to free up any
+   * acquired resources.
+   */
+  interface CacheHandle extends Closeable {
+    /** Returns whether the cache lookup was successful. */
+    boolean hasResult();
+
+    /**
+     * Returns the cached result.
+     *
+     * @throws NoSuchElementException if there is no result in this cache entry
+     */
+    SpawnResult getResult();
+
+    /**
+     * Returns true if the store call will actually do work. Use this to avoid unnecessary work
+     * before store if it won't do anything.
+     */
+    boolean willStore();
+
+    /**
+     * Called after successful {@link Spawn} execution, which may or may not store the result in the
+     * cache.
+     *
+     * <p>A cache may silently return from a failed store operation. We recommend to err on the side
+     * of raising an exception rather than returning silently, and to offer command-line flags to
+     * tweak this default policy as needed.
+     *
+     * <p>If the current thread is interrupted, then this method should return as quickly as
+     * possible with an {@link InterruptedException}.
+     */
+    void store(SpawnResult result, Collection<Path> files)
+        throws InterruptedException, IOException;
+  }
+
+  /**
+   * Perform a spawn lookup. This method is similar to {@link SpawnRunner#exec}, taking the same
+   * parameters and being allowed to throw the same exceptions. The intent for this method is to
+   * compute a cache lookup key for the given spawn, looking it up in an implementation-dependent
+   * cache (can be either on the local or remote machine), and returning a non-null
+   * {@link CacheHandle} instance.
+   *
+   * <p>If the lookup was successful, this method should write the cached outputs to their
+   * corresponding output locations in the output tree, as well as stdout and stderr, after
+   * notifying {@link SpawnExecutionPolicy#lockOutputFiles}.
+   *
+   * <p>If the lookup was unsuccessful, this method can return a {@link CacheHandle} instance that
+   * has no result, but uploads the results of the execution to the cache. The reason for a callback
+   * object is for the cache to store expensive intermediate values (such as the cache key) that are
+   * needed both for the lookup and the subsequent store operation.
+   *
+   * <p>Note that cache stores may be disabled, in which case the returned {@link CacheHandle}
+   * instance's {@link CacheHandle#store} is a no-op.
+   */
+  CacheHandle lookup(Spawn spawn, SpawnExecutionPolicy context)
+      throws ExecException, IOException, InterruptedException;
+}
author	ulfjack <ulfjack@google.com>	2017-08-11 23:19:48 +0200
committer	Irina Iancu <elenairina@google.com>	2017-08-14 14:16:00 +0200
commit	9274cba2540d1d1c7824147f1d999ac785eeed85 (patch)
tree	8b7a6bf0409162ffbc1999bff8f2aa932ab23fae /src/main/java/com/google/devtools/build/lib/exec
parent	95f7fba8394644623d121fc6765356d46dbd043b (diff)