From 8896d2e211c57453359566c4520615c1c1f75b66 Mon Sep 17 00:00:00 2001 From: ulfjack Date: Fri, 19 Jan 2018 02:55:21 -0800 Subject: Prevent broken cache entries on concurrent file changes Local execution has an inherent race condition: if a user modifies a file while an action is executed, then it is impossible for Bazel to tell which version of the file was actually read during action execution. The file may have been modified before or after the tool has read it, or, in the worst case, the tool may have read both the original and the modified version. In addition, the file may be changed back to the original state before Bazel can check the file, so computing the digest before / after may not be sufficient. This is a concern for both local and remote caches, although the cost of poisoning a shared remote cache is significantly higher, and is what has triggered this work. Fixes #3360. We solve this by keeping a reference to the FileContentsProxy, and using that to check for modificaitons before storing the cache entry. We output a warning if this check fails. This change does not increase memory consumption; Java objects are always allocated in multiples of 8 bytes, we use compressed oops, and the FileArtifactValue currently has 12 bytes worth of fields (excl. object overhead), so adding another pointer is effectively free. As a possible performance optimization on purely local builds, we could also consider not computing digests at all, and only use the FileContentsProxy for caching. PiperOrigin-RevId: 182510358 --- .../build/lib/exec/SingleBuildFileCache.java | 24 ++--- .../devtools/build/lib/remote/RemoteOptions.java | 12 +++ .../build/lib/remote/RemoteSpawnCache.java | 23 +++++ .../build/lib/skyframe/ArtifactFunction.java | 2 +- .../build/lib/skyframe/FileArtifactValue.java | 104 ++++++++++++++++----- 5 files changed, 129 insertions(+), 36 deletions(-) (limited to 'src/main/java/com/google/devtools') diff --git a/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java b/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java index c393332fe3..76479d0377 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java +++ b/src/main/java/com/google/devtools/build/lib/exec/SingleBuildFileCache.java @@ -68,24 +68,20 @@ public class SingleBuildFileCache implements ActionInputFileCache { ? ((Artifact) input).getPath() : execRoot.getRelative(input.getExecPath()); try { - byte[] digest = path.getDigest(); + FileArtifactValue metadata = FileArtifactValue.create(path); + if (metadata.getType().isDirectory()) { + throw new DigestOfDirectoryException( + "Input is a directory: " + input.getExecPathString()); + } BaseEncoding hex = BaseEncoding.base16().lowerCase(); ByteString hexDigest = - ByteString.copyFrom(hex.encode(digest).getBytes(US_ASCII)); + ByteString.copyFrom(hex.encode(metadata.getDigest()).getBytes(US_ASCII)); // Inject reverse mapping. Doing this unconditionally in getDigest() showed up // as a hotspot in CPU profiling. digestToPath.put(hexDigest, input); - return new ActionInputMetadata(digest, path.getFileSize()); + return new ActionInputMetadata(metadata); } catch (IOException e) { - if (path.isDirectory()) { - // TODO(bazel-team): This is rather presumptuous- it could have been another - // type of IOException. - return new ActionInputMetadata( - new DigestOfDirectoryException( - "Input is a directory: " + input.getExecPathString())); - } else { - return new ActionInputMetadata(e); - } + return new ActionInputMetadata(e); } } }); @@ -119,8 +115,8 @@ public class SingleBuildFileCache implements ActionInputFileCache { private final IOException exceptionOnAccess; /** Constructor for a successful lookup. */ - ActionInputMetadata(byte[] digest, long size) { - this.metadata = FileArtifactValue.createNormalFile(digest, size); + ActionInputMetadata(Metadata metadata) { + this.metadata = metadata; this.exceptionOnAccess = null; } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteOptions.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteOptions.java index b9b1f55d79..21ab0a5d0b 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteOptions.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteOptions.java @@ -211,4 +211,16 @@ public final class RemoteOptions extends OptionsBase { help = "A file path to a local disk cache." ) public PathFragment experimentalLocalDiskCachePath; + + @Option( + name = "experimental_guard_against_concurrent_changes", + defaultValue = "true", + category = "remote", + documentationCategory = OptionDocumentationCategory.UNCATEGORIZED, + effectTags = {OptionEffectTag.UNKNOWN}, + help = "Turn this off to disable checking the ctime of input files of an action before " + + "uploading it to a remote cache. There may be cases where the Linux kernel delays " + + "writing of files, which could cause false positives." + ) + public boolean experimentalGuardAgainstConcurrentChanges; } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java index e8af197907..04977ed157 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnCache.java @@ -20,6 +20,7 @@ import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.actions.SpawnResult; import com.google.devtools.build.lib.actions.SpawnResult.Status; import com.google.devtools.build.lib.actions.Spawns; +import com.google.devtools.build.lib.actions.cache.Metadata; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.events.Reporter; @@ -27,6 +28,7 @@ import com.google.devtools.build.lib.exec.SpawnCache; import com.google.devtools.build.lib.exec.SpawnRunner.SpawnExecutionPolicy; import com.google.devtools.build.lib.remote.DigestUtil.ActionKey; import com.google.devtools.build.lib.remote.TreeNodeRepository.TreeNode; +import com.google.devtools.build.lib.skyframe.FileArtifactValue; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.remoteexecution.v1test.Action; @@ -157,6 +159,14 @@ final class RemoteSpawnCache implements SpawnCache { @Override public void store(SpawnResult result, Collection files) throws InterruptedException, IOException { + if (options.experimentalGuardAgainstConcurrentChanges) { + try { + checkForConcurrentModifications(); + } catch (IOException e) { + report(Event.warn(e.getMessage())); + return; + } + } boolean uploadAction = Spawns.mayBeCached(spawn) && Status.SUCCESS.equals(result.status()) @@ -179,6 +189,19 @@ final class RemoteSpawnCache implements SpawnCache { @Override public void close() {} + + private void checkForConcurrentModifications() throws IOException { + for (ActionInput input : inputMap.values()) { + Metadata metadata = policy.getActionInputFileCache().getMetadata(input); + if (metadata instanceof FileArtifactValue) { + FileArtifactValue artifactValue = (FileArtifactValue) metadata; + Path path = execRoot.getRelative(input.getExecPath()); + if (artifactValue.wasModifiedSinceDigest(path)) { + throw new IOException(path + " was modified during execution"); + } + } + } + } }; } else { return SpawnCache.NO_RESULT_NO_STORE; diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/ArtifactFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/ArtifactFunction.java index 627184df44..2fe2b69ef2 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/ArtifactFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/ArtifactFunction.java @@ -268,7 +268,7 @@ class ArtifactFunction implements SkyFunction { // Directories are special-cased because their mtimes are used, so should have been constructed // during execution of the action (in ActionMetadataHandler#maybeStoreAdditionalData). Preconditions.checkState(data.isFile(), "Unexpected not file %s (%s)", artifact, data); - return FileArtifactValue.createNormalFile(data.getDigest(), data.getSize()); + return FileArtifactValue.createNormalFile(data); } private static AggregatingArtifactValue createAggregatingValue( diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/FileArtifactValue.java b/src/main/java/com/google/devtools/build/lib/skyframe/FileArtifactValue.java index 06b2aefcc9..aaba2245dc 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/FileArtifactValue.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/FileArtifactValue.java @@ -24,6 +24,7 @@ import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.vfs.FileStatus; import com.google.devtools.build.lib.vfs.Path; +import com.google.devtools.build.lib.vfs.Symlinks; import com.google.devtools.build.skyframe.SkyValue; import java.io.IOException; import java.util.Arrays; @@ -67,6 +68,11 @@ public abstract class FileArtifactValue implements SkyValue, Metadata { return 0; } + @Override + public boolean wasModifiedSinceDigest(Path path) throws IOException { + return false; + } + @Override public String toString() { return "singleton marker artifact value (" + hashCode() + ")"; @@ -94,6 +100,11 @@ public abstract class FileArtifactValue implements SkyValue, Metadata { throw new UnsupportedOperationException(); } + @Override + public boolean wasModifiedSinceDigest(Path path) throws IOException { + return false; + } + @Override public String toString() { return "OMITTED_FILE_MARKER"; @@ -139,6 +150,11 @@ public abstract class FileArtifactValue implements SkyValue, Metadata { return 0; } + @Override + public boolean wasModifiedSinceDigest(Path path) throws IOException { + return false; + } + @Override public String toString() { return MoreObjects.toStringHelper(this).add("mtime", mtime).toString(); @@ -147,10 +163,12 @@ public abstract class FileArtifactValue implements SkyValue, Metadata { private static final class RegularFileArtifactValue extends FileArtifactValue { private final byte[] digest; + @Nullable private final FileContentsProxy proxy; private final long size; - private RegularFileArtifactValue(byte[] digest, long size) { + private RegularFileArtifactValue(byte[] digest, @Nullable FileContentsProxy proxy, long size) { this.digest = Preconditions.checkNotNull(digest); + this.proxy = proxy; this.size = size; } @@ -169,6 +187,15 @@ public abstract class FileArtifactValue implements SkyValue, Metadata { return size; } + @Override + public boolean wasModifiedSinceDigest(Path path) throws IOException { + if (proxy == null) { + return false; + } + FileStatus stat = path.statIfFound(Symlinks.FOLLOW); + return stat == null || !stat.isFile() || !proxy.equals(FileContentsProxy.create(stat)); + } + @Override public long getModifiedTime() { throw new UnsupportedOperationException( @@ -181,51 +208,77 @@ public abstract class FileArtifactValue implements SkyValue, Metadata { } } - @VisibleForTesting - public static FileArtifactValue create(Artifact artifact) throws IOException { - return create(artifact.getPath()); - } - static FileArtifactValue create(Artifact artifact, FileValue fileValue) throws IOException { boolean isFile = fileValue.isFile(); - return create(artifact.getPath(), isFile, isFile ? fileValue.getSize() : 0, + FileContentsProxy proxy = getProxyFromFileStateValue(fileValue.realFileStateValue()); + return create(artifact.getPath(), isFile, isFile ? fileValue.getSize() : 0, proxy, isFile ? fileValue.getDigest() : null); } static FileArtifactValue create( Artifact artifact, FileValue fileValue, @Nullable byte[] injectedDigest) throws IOException { boolean isFile = fileValue.isFile(); - return create(artifact.getPath(), isFile, isFile ? fileValue.getSize() : 0, injectedDigest); + FileContentsProxy proxy = getProxyFromFileStateValue(fileValue.realFileStateValue()); + return create(artifact.getPath(), isFile, isFile ? fileValue.getSize() : 0, proxy, + injectedDigest); + } + + @VisibleForTesting + public static FileArtifactValue create(Artifact artifact) throws IOException { + return create(artifact.getPath()); } @VisibleForTesting - static FileArtifactValue create(Path path) throws IOException { - FileStatus stat = path.stat(); - boolean isFile = stat.isFile(); - return create(path, isFile, isFile ? stat.getSize() : 0, null); + public static FileArtifactValue create(Path path) throws IOException { + // Caution: there's a race condition between stating the file and computing the + // digest. We need to stat first, since we're using the stat to detect changes. + // We follow symlinks here to be consistent with getDigest. + FileStatus stat = path.stat(Symlinks.FOLLOW); + return create(path, stat.isFile(), stat.getSize(), FileContentsProxy.create(stat), null); } private static FileArtifactValue create( - Path path, boolean isFile, long size, @Nullable byte[] digest) throws IOException { - if (isFile && digest == null) { - digest = DigestUtils.getDigestOrFail(path, size); - } + Path path, boolean isFile, long size, FileContentsProxy proxy, @Nullable byte[] digest) + throws IOException { if (!isFile) { // In this case, we need to store the mtime because the action cache uses mtime for // directories to determine if this artifact has changed. We want this code path to go away // somehow (maybe by implementing FileSet in Skyframe). return new DirectoryArtifactValue(path.getLastModifiedTime()); } + if (digest == null) { + digest = DigestUtils.getDigestOrFail(path, size); + } Preconditions.checkState(digest != null, path); - return createNormalFile(digest, size); + return new RegularFileArtifactValue(digest, proxy, size); } - public static FileArtifactValue createNormalFile(byte[] digest, long size) { - return new RegularFileArtifactValue(digest, size); + public static FileArtifactValue createForVirtualActionInput(byte[] digest, long size) { + return new RegularFileArtifactValue(digest, /*proxy=*/ null, size); + } + + public static FileArtifactValue createNormalFile( + byte[] digest, @Nullable FileContentsProxy proxy, long size) { + return new RegularFileArtifactValue(digest, proxy, size); } static FileArtifactValue createNormalFile(FileValue fileValue) { - return new RegularFileArtifactValue(fileValue.getDigest(), fileValue.getSize()); + FileContentsProxy proxy = getProxyFromFileStateValue(fileValue.realFileStateValue()); + return new RegularFileArtifactValue(fileValue.getDigest(), proxy, fileValue.getSize()); + } + + private static FileContentsProxy getProxyFromFileStateValue(FileStateValue value) { + if (value instanceof FileStateValue.RegularFileStateValue) { + return ((FileStateValue.RegularFileStateValue) value).getContentsProxy(); + } else if (value instanceof FileStateValue.SpecialFileStateValue) { + return ((FileStateValue.SpecialFileStateValue) value).getContentsProxy(); + } + return null; + } + + @VisibleForTesting + public static FileArtifactValue createNormalFile(byte[] digest, long size) { + return createNormalFile(digest, /*proxy=*/null, size); } public static FileArtifactValue createDirectory(long mtime) { @@ -238,7 +291,7 @@ public abstract class FileArtifactValue implements SkyValue, Metadata { */ static FileArtifactValue createProxy(byte[] digest) { Preconditions.checkNotNull(digest); - return createNormalFile(digest, /*size=*/ 0); + return createNormalFile(digest, /*proxy=*/ null, /*size=*/ 0); } @Override @@ -254,6 +307,15 @@ public abstract class FileArtifactValue implements SkyValue, Metadata { @Override public abstract long getModifiedTime(); + /** + * Provides a best-effort determination whether the file was changed since the digest was + * computed. This method performs file system I/O, so may be expensive. It's primarily intended to + * avoid storing bad cache entries in an action cache. It should return true if there is a chance + * that the file was modified since the digest was computed. Better not upload if we are not sure + * that the cache entry is reliable. + */ + public abstract boolean wasModifiedSinceDigest(Path path) throws IOException; + @Override public boolean equals(Object o) { if (this == o) { -- cgit v1.2.3