aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib')
-rw-r--r--src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java144
-rw-r--r--src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java1
-rw-r--r--src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java13
-rw-r--r--src/main/java/com/google/devtools/build/lib/runtime/CacheFileDigestsModule.java93
4 files changed, 248 insertions, 3 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
index 508ee8920a..aaf8b1d459 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
@@ -13,14 +13,21 @@
// limitations under the License.
package com.google.devtools.build.lib.actions.cache;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheStats;
import com.google.common.io.BaseEncoding;
+import com.google.common.primitives.Longs;
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.util.BlazeClock;
import com.google.devtools.build.lib.util.Fingerprint;
import com.google.devtools.build.lib.util.LoggingUtil;
+import com.google.devtools.build.lib.util.Preconditions;
import com.google.devtools.build.lib.util.VarInt;
+import com.google.devtools.build.lib.vfs.FileStatus;
import com.google.devtools.build.lib.vfs.Path;
+import com.google.devtools.build.lib.vfs.PathFragment;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
@@ -31,6 +38,12 @@ import java.util.logging.Level;
/**
* Utility class for getting md5 digests of files.
*
+ * <p>This class implements an optional cache of file digests when the computation of the digests is
+ * costly (i.e. when {@link Path#getFastDigest()} is not available). The cache can be enabled via
+ * the {@link #configureCache(long)} function, but note that enabling this cache might have an
+ * impact on correctness because not all changes to files can be purely detected from their
+ * metadata.
+ *
* <p>Note that this class is responsible for digesting file metadata in an order-independent
* manner. Care must be taken to do this properly. The digest must be a function of the set of
* (path, metadata) tuples. While the order of these pairs must not matter, it would <b>not</b> be
@@ -45,6 +58,76 @@ public class DigestUtils {
private static final Object DIGEST_LOCK = new Object();
private static final AtomicBoolean MULTI_THREADED_DIGEST = new AtomicBoolean(false);
+ /**
+ * Keys used to cache the values of the digests for files where we don't have fast digests.
+ *
+ * <p>The cache keys are derived from many properties of the file metadata in an attempt to be
+ * able to detect most file changes.
+ */
+ private static class CacheKey {
+ /** Path to the file. */
+ private final PathFragment path;
+
+ /** File system identifier of the file (typically the inode number). */
+ private final long nodeId;
+
+ /** Last modification time of the file. */
+ private final long modifiedTime;
+
+ /** Size of the file. */
+ private final long size;
+
+ /**
+ * Constructs a new cache key.
+ *
+ * @param path path to the file
+ * @param status file status data from which to obtain the cache key properties
+ * @throws IOException if reading the file status data fails
+ */
+ public CacheKey(Path path, FileStatus status) throws IOException {
+ this.path = path.asFragment();
+ this.nodeId = status.getNodeId();
+ this.modifiedTime = status.getLastModifiedTime();
+ this.size = status.getSize();
+ }
+
+ @Override
+ public boolean equals(Object object) {
+ if (object == this) {
+ return true;
+ } else if (!(object instanceof CacheKey)) {
+ return false;
+ } else {
+ CacheKey key = (CacheKey) object;
+ return path.equals(key.path)
+ && nodeId == key.nodeId
+ && modifiedTime == key.modifiedTime
+ && size == key.size;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ int result = 17;
+ result = 31 * result + path.hashCode();
+ result = 31 * result + Longs.hashCode(nodeId);
+ result = 31 * result + Longs.hashCode(modifiedTime);
+ result = 31 * result + Longs.hashCode(size);
+ return result;
+ }
+ }
+
+ /**
+ * Global cache of files to their digests.
+ *
+ * <p>This is null when the cache is disabled.
+ *
+ * <p>Note that we do not use a {@link com.google.common.cache.LoadingCache} because our keys
+ * represent the paths as strings, not as {@link Path} instances. As a result, the loading
+ * function cannot actually compute the digests of the files so we have to handle this externally.
+ */
+ private static volatile Cache<CacheKey, byte[]> globalCache = null;
+
/** Private constructor to prevent instantiation of utility class. */
private DigestUtils() {}
@@ -76,6 +159,35 @@ public class DigestUtils {
}
/**
+ * Enables the caching of file digests based on file status data.
+ *
+ * <p>If the cache was already enabled, this causes the cache to be reinitialized thus losing all
+ * contents. If the given size is zero, the cache is disabled altogether.
+ *
+ * @param maximumSize maximumSize of the cache in number of entries
+ */
+ public static void configureCache(long maximumSize) {
+ if (maximumSize == 0) {
+ globalCache = null;
+ } else {
+ globalCache = CacheBuilder.newBuilder().maximumSize(maximumSize).recordStats().build();
+ }
+ }
+
+ /**
+ * Obtains cache statistics.
+ *
+ * <p>The cache must have previously been enabled by a call to {@link #configureCache(long)}.
+ *
+ * @return an immutable snapshot of the cache statistics
+ */
+ public static CacheStats getCacheStats() {
+ Cache<CacheKey, byte[]> cache = globalCache;
+ Preconditions.checkNotNull(cache, "configureCache() must have been called with a size >= 0");
+ return cache.stats();
+ }
+
+ /**
* Enable or disable multi-threaded digesting even for large files.
*/
public static void setMultiThreadedDigest(boolean multiThreadedDigest) {
@@ -104,19 +216,45 @@ public class DigestUtils {
digest = null;
}
+ // At this point, either we could not get a fast digest or the fast digest we got is corrupt.
+ // Attempt a cache lookup if the cache is enabled and return the cached digest if found.
+ Cache<CacheKey, byte[]> cache = globalCache;
+ CacheKey key = null;
+ if (cache != null && digest == null) {
+ key = new CacheKey(path, path.stat());
+ digest = cache.getIfPresent(key);
+ }
if (digest != null) {
return digest;
- } else if (fileSize > 4096 && !MULTI_THREADED_DIGEST.get()) {
+ }
+
+ // All right, we have neither a fast nor a cached digest. Let's go through the costly process of
+ // computing it from the file contents.
+ if (fileSize > 4096 && !MULTI_THREADED_DIGEST.get()) {
// We'll have to read file content in order to calculate the digest. In that case
// it would be beneficial to serialize those calculations since there is a high
// probability that MD5 will be requested for multiple output files simultaneously.
// Exception is made for small (<=4K) files since they will not likely to introduce
// significant delays (at worst they will result in two extra disk seeks by
// interrupting other reads).
- return getDigestInExclusiveMode(path);
+ digest = getDigestInExclusiveMode(path);
} else {
- return getDigestInternal(path);
+ digest = getDigestInternal(path);
}
+
+ Preconditions.checkNotNull(
+ digest,
+ "We should have gotten a digest for %s at this point but we still don't have one",
+ path);
+ if (cache != null) {
+ Preconditions.checkNotNull(
+ key,
+ "We should have computed a cache key earlier for %s because the cache is enabled and we"
+ + " did not get a fast digest for this file, but we don't have a key here",
+ path);
+ cache.put(key, digest);
+ }
+ return digest;
}
/**
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java b/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java
index c32e3835ef..336715a479 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java
@@ -45,6 +45,7 @@ public final class BazelMain {
com.google.devtools.build.lib.ssd.SsdModule.class,
com.google.devtools.build.lib.worker.WorkerModule.class,
com.google.devtools.build.lib.remote.RemoteModule.class,
+ com.google.devtools.build.lib.runtime.CacheFileDigestsModule.class,
com.google.devtools.build.lib.standalone.StandaloneModule.class,
com.google.devtools.build.lib.sandbox.SandboxModule.class,
com.google.devtools.build.lib.runtime.BuildSummaryStatsModule.class,
diff --git a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java
index b8b2ac2509..7766b6c7a0 100644
--- a/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java
+++ b/src/main/java/com/google/devtools/build/lib/exec/ExecutionOptions.java
@@ -205,4 +205,17 @@ public class ExecutionOptions extends OptionsBase {
help = "Print the contents of the SpawnActionContext and ContextProviders maps."
)
public boolean debugPrintActionContexts;
+
+ @Option(
+ name = "cache_computed_file_digests",
+ defaultValue = "50000",
+ category = "undocumented",
+ help =
+ "If greater than 0, configures Blaze to cache file digests in memory based on their "
+ + "metadata instead of recomputing the digests from disk every time they are needed. "
+ + "Setting this to 0 ensures correctness because not all file changes can be noted "
+ + "from file metadata. When not 0, the number indicates the size of the cache as the "
+ + "number of file digests to be cached."
+ )
+ public long cacheSizeForComputedFileDigests;
}
diff --git a/src/main/java/com/google/devtools/build/lib/runtime/CacheFileDigestsModule.java b/src/main/java/com/google/devtools/build/lib/runtime/CacheFileDigestsModule.java
new file mode 100644
index 0000000000..1c94bd8a17
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/runtime/CacheFileDigestsModule.java
@@ -0,0 +1,93 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.runtime;
+
+import com.google.common.cache.CacheStats;
+import com.google.devtools.build.lib.actions.cache.DigestUtils;
+import com.google.devtools.build.lib.buildtool.BuildRequest;
+import com.google.devtools.build.lib.exec.ExecutionOptions;
+import com.google.devtools.build.lib.exec.ExecutorBuilder;
+import com.google.devtools.build.lib.util.Preconditions;
+import java.util.logging.Logger;
+
+/** Enables the caching of file digests in {@link DigestUtils}. */
+public class CacheFileDigestsModule extends BlazeModule {
+
+ private static final Logger log = Logger.getLogger(CacheFileDigestsModule.class.getName());
+
+ /** Stats gathered at the beginning of a command, to compute deltas on completion. */
+ private CacheStats stats;
+
+ /**
+ * Last known size of the cache. Changes to this value cause the cache to be reinitialized. null
+ * if we don't know anything about the last value yet (i.e. before any command has been run).
+ */
+ private Long lastKnownCacheSize;
+
+ public CacheFileDigestsModule() {}
+
+ /**
+ * Adds a line to the log with cache statistics.
+ *
+ * @param message message to prefix to the written line
+ * @param stats the cache statistics to be logged
+ */
+ private static void logStats(String message, CacheStats stats) {
+ log.info(
+ message
+ + ": hit count="
+ + stats.hitCount()
+ + ", miss count="
+ + stats.missCount()
+ + ", hit rate="
+ + stats.hitRate()
+ + ", eviction count="
+ + stats.evictionCount());
+ }
+
+ @Override
+ public void executorInit(CommandEnvironment env, BuildRequest request, ExecutorBuilder builder) {
+ super.executorInit(env, request, builder);
+
+ ExecutionOptions options = request.getOptions(ExecutionOptions.class);
+ if (lastKnownCacheSize == null
+ || options.cacheSizeForComputedFileDigests != lastKnownCacheSize) {
+ log.info("Reconfiguring cache with size=" + options.cacheSizeForComputedFileDigests);
+ DigestUtils.configureCache(options.cacheSizeForComputedFileDigests);
+ lastKnownCacheSize = options.cacheSizeForComputedFileDigests;
+ }
+
+ if (options.cacheSizeForComputedFileDigests == 0) {
+ stats = null;
+ log.info("Disabled cache");
+ } else {
+ stats = DigestUtils.getCacheStats();
+ logStats("Accumulated cache stats before command", stats);
+ }
+ }
+
+ @Override
+ public void afterCommand() {
+ super.afterCommand();
+
+ if (stats != null) {
+ CacheStats newStats = DigestUtils.getCacheStats();
+ Preconditions.checkNotNull(newStats, "The cache is enabled so we must get some stats back");
+ logStats("Accumulated cache stats after command", newStats);
+ logStats("Cache stats for finished command", newStats.minus(stats));
+ stats = null; // Silence stats until next command that uses the executor.
+ }
+ }
+}