// Copyright 2014 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.lib.skyframe; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.ActionInput; import com.google.devtools.build.lib.actions.ActionInputHelper; import com.google.devtools.build.lib.actions.Artifact; import com.google.devtools.build.lib.actions.Artifact.TreeFileArtifact; import com.google.devtools.build.lib.actions.cache.Md5Digest; import com.google.devtools.build.lib.actions.cache.Metadata; import com.google.devtools.build.lib.actions.cache.MetadataHandler; import com.google.devtools.build.lib.util.Preconditions; import com.google.devtools.build.lib.util.io.TimestampGranularityMonitor; import com.google.devtools.build.lib.vfs.FileStatus; import com.google.devtools.build.lib.vfs.FileStatusWithDigest; import com.google.devtools.build.lib.vfs.FileStatusWithDigestAdapter; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.RootedPath; import com.google.devtools.build.lib.vfs.Symlinks; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.Nullable; /** * Cache provided by an {@link ActionExecutionFunction}, allowing Blaze to obtain data from the * graph and to inject data (e.g. file digests) back into the graph. The cache can be in one of two * modes. After construction it acts as a cache for input and output metadata for the purpose of * checking for an action cache hit. When {@link #discardOutputMetadata} is called, it switches to * a mode where it calls chmod on output files before statting them. This is done here to ensure * that the chmod always comes before the stat in order to ensure that the stat is up to date. * *
Data for the action's inputs is injected into this cache on construction, using the Skyframe * graph as the source of truth. * *
As well, this cache collects data about the action's output files, which is used in three * ways. First, it is served as requested during action execution, primarily by the {@code * ActionCacheChecker} when determining if the action must be rerun, and then after the action is * run, to gather information about the outputs. Second, it is accessed by {@link ArtifactFunction}s * in order to construct {@link FileArtifactValue}s, and by this class itself to generate {@link * TreeArtifactValue}s. Third, the {@link FilesystemValueChecker} uses it to determine the set of * output files to check for inter-build modifications. Because all these use cases are slightly * different, we must occasionally store two versions of the data for a value. See {@link * #getAdditionalOutputData} for elaboration on the difference between these cases, and see the * javadoc for the various internal maps to see what is stored where. */ @VisibleForTesting public class ActionMetadataHandler implements MetadataHandler { /** * Data for input artifacts. Immutable. * *
This should never be read directly. Use {@link #getInputFileArtifactValue} instead.
*/ private final MapIf the artifact is of type "constant metadata", this returns null so that changes to such * artifacts do not tickle the timestamp granularity monitor, delaying the build for no reason. * * @param artifact the artifact for which to fetch the timestamp granularity monitor * @return the timestamp granularity monitor to use, which may be null */ private TimestampGranularityMonitor getTimestampGranularityMonitor(Artifact artifact) { return artifact.isConstantMetadata() ? null : tsgm; } private static Metadata metadataFromValue(FileArtifactValue value) throws FileNotFoundException { if (value == FileArtifactValue.MISSING_FILE_MARKER || value == FileArtifactValue.OMITTED_FILE_MARKER) { throw new FileNotFoundException(); } // If the file is a directory, we need to return the mtime because the action cache uses mtime // to determine if this artifact has changed. We want this code path to go away somehow // for directories (maybe by implementing FileSet in Skyframe). return value.isFile() ? new Metadata(value.getDigest()) : new Metadata(value.getModifiedTime()); } @Override public Metadata getMetadata(Artifact artifact) throws IOException { Metadata metadata = getRealMetadata(artifact); return artifact.isConstantMetadata() ? Metadata.CONSTANT_METADATA : metadata; } @Nullable private FileArtifactValue getInputFileArtifactValue(Artifact input) { if (outputs.contains(input)) { return null; } if (input.hasParent() && outputs.contains(input.getParent())) { return null; } return Preconditions.checkNotNull(inputArtifactData.get(input), input); } /** * Get the real (viz. on-disk) metadata for an Artifact. * A key assumption is that getRealMetadata() will be called for every Artifact in this * ActionMetadataHandler, to populate additionalOutputData and outputTreeArtifactData. * *
We cache data for constant-metadata artifacts, even though it is technically unnecessary,
* because the data stored in this cache is consumed by various parts of Blaze via the {@link
* ActionExecutionValue} (for now, {@link FilesystemValueChecker} and {@link ArtifactFunction}).
* It is simpler for those parts if every output of the action is present in the cache. However,
* we must not return the actual metadata for a constant-metadata artifact.
*/
private Metadata getRealMetadata(Artifact artifact) throws IOException {
FileArtifactValue value = getInputFileArtifactValue(artifact);
if (value != null) {
return metadataFromValue(value);
}
if (artifact.isSourceArtifact()) {
// A discovered input we didn't have data for.
// TODO(bazel-team): Change this to an assertion once Skyframe has native input discovery, so
// all inputs will already have metadata known.
return null;
} else if (artifact.isMiddlemanArtifact()) {
// A middleman artifact's data was either already injected from the action cache checker using
// #setDigestForVirtualArtifact, or it has the default middleman value.
value = additionalOutputData.get(artifact);
if (value != null) {
return metadataFromValue(value);
}
value = FileArtifactValue.DEFAULT_MIDDLEMAN;
FileArtifactValue oldValue = additionalOutputData.putIfAbsent(artifact, value);
checkInconsistentData(artifact, oldValue, value);
return metadataFromValue(value);
} else if (artifact.isTreeArtifact()) {
TreeArtifactValue setValue = getTreeArtifactValue(artifact);
if (setValue != null && setValue != TreeArtifactValue.MISSING_TREE_ARTIFACT) {
return setValue.getMetadata();
}
// We use FileNotFoundExceptions to determine if an Artifact was or wasn't found.
// Calling code depends on this particular exception.
throw new FileNotFoundException(artifact + " not found");
}
// It's an ordinary artifact.
FileValue fileValue = outputArtifactData.get(artifact);
if (fileValue != null) {
// Non-middleman artifacts should only have additionalOutputData if they have
// outputArtifactData. We don't assert this because of concurrency possibilities, but at least
// we don't check additionalOutputData unless we expect that we might see the artifact there.
value = additionalOutputData.get(artifact);
// If additional output data is present for this artifact, we use it in preference to the
// usual calculation.
if (value != null) {
return metadataFromValue(value);
}
if (!fileValue.exists()) {
throw new FileNotFoundException(artifact.prettyPrint() + " does not exist");
}
return new Metadata(Preconditions.checkNotNull(fileValue.getDigest(), artifact));
}
// We do not cache exceptions besides nonexistence here, because it is unlikely that the file
// will be requested from this cache too many times.
fileValue = constructFileValue(artifact, null);
return maybeStoreAdditionalData(artifact, fileValue, null);
}
/**
* Check that the new {@code data} we just calculated for an {@link Artifact} agrees with the
* {@code oldData} (presumably calculated concurrently), if it was present.
*/
// Not private only because used by SkyframeActionExecutor's metadata handler.
static void checkInconsistentData(Artifact artifact,
@Nullable Object oldData, Object data) throws IOException {
if (oldData != null && !oldData.equals(data)) {
// Another thread checked this file since we looked at the map, and got a different answer
// than we did. Presumably the user modified the file between reads.
throw new IOException("Data for " + artifact.prettyPrint() + " changed to " + data
+ " after it was calculated as " + oldData);
}
}
/**
* See {@link #getAdditionalOutputData} for why we sometimes need to store additional data, even
* for normal (non-middleman) artifacts.
*/
@Nullable
private Metadata maybeStoreAdditionalData(Artifact artifact, FileValue data,
@Nullable byte[] injectedDigest) throws IOException {
if (!data.exists()) {
// Nonexistent files should only occur before executing an action.
throw new FileNotFoundException(artifact.prettyPrint() + " does not exist");
}
boolean isFile = data.isFile();
if (isFile && !artifact.hasParent() && data.getDigest() != null) {
// We do not need to store the FileArtifactValue separately -- the digest is in the file value
// and that is all that is needed for this file's metadata.
return new Metadata(data.getDigest());
}
// Unfortunately, the FileValue does not contain enough information for us to calculate the
// corresponding FileArtifactValue -- either the metadata must use the modified time, which we
// do not expose in the FileValue, or the FileValue didn't store the digest So we store the
// metadata separately.
// Use the FileValue's digest if no digest was injected, or if the file can't be digested.
injectedDigest = injectedDigest != null || !isFile ? injectedDigest : data.getDigest();
FileArtifactValue value =
FileArtifactValue.create(artifact, isFile, isFile ? data.getSize() : 0, injectedDigest);
FileArtifactValue oldValue = additionalOutputData.putIfAbsent(artifact, value);
checkInconsistentData(artifact, oldValue, value);
return metadataFromValue(value);
}
@Override
public void setDigestForVirtualArtifact(Artifact artifact, Md5Digest md5Digest) {
Preconditions.checkArgument(artifact.isMiddlemanArtifact(), artifact);
Preconditions.checkNotNull(md5Digest, artifact);
additionalOutputData.put(
artifact, FileArtifactValue.createProxy(md5Digest.getDigestBytesUnsafe()));
}
private Set There are three reasons why we might not be able to compute metadata for an artifact from
* the FileValue. First, middleman artifacts have no corresponding FileValues. Second, if
* computing a file's digest is not fast, the FileValue does not do so, so a file on a filesystem
* without fast digests has to have its metadata stored separately. Third, some files' metadata
* (directories, empty files) contain their mtimes, which the FileValue does not expose, so that
* has to be stored separately.
*
* Note that for files that need digests, we can't easily inject the digest in the FileValue
* because it would complicate equality-checking on subsequent builds -- if our filesystem doesn't
* do fast digests, the comparison value would not have a digest.
*/
Map