// Copyright 2014 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.skyframe;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.google.common.io.BaseEncoding;
import com.google.devtools.build.lib.actions.ActionInput;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.cache.Digest;
import com.google.devtools.build.lib.actions.cache.DigestUtils;
import com.google.devtools.build.lib.actions.cache.Metadata;
import com.google.devtools.build.lib.actions.cache.MetadataHandler;
import com.google.devtools.build.lib.util.io.TimestampGranularityMonitor;
import com.google.devtools.build.lib.vfs.FileStatus;
import com.google.devtools.build.lib.vfs.FileStatusWithDigest;
import com.google.devtools.build.lib.vfs.FileStatusWithDigestAdapter;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.RootedPath;
import com.google.devtools.build.lib.vfs.Symlinks;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import javax.annotation.Nullable;
/**
* Cache provided by an {@link ActionExecutionFunction}, allowing Blaze to obtain data from the
* graph and to inject data (e.g. file digests) back into the graph.
*
*
Data for the action's inputs is injected into this cache on construction, using the graph as
* the source of truth.
*
*
As well, this cache collects data about the action's output files, which is used in three
* ways. First, it is served as requested during action execution, primarily by the {@code
* ActionCacheChecker} when determining if the action must be rerun, and then after the action is
* run, to gather information about the outputs. Second, it is accessed by {@link
* ArtifactFunction}s in order to construct {@link ArtifactValue}s. Third, the {@link
* FilesystemValueChecker} uses it to determine the set of output files to check for inter-build
* modifications. Because all these use cases are slightly different, we must occasionally store two
* versions of the data for a value (see {@link #getAdditionalOutputData} for more.
*/
@VisibleForTesting
public class ActionMetadataHandler implements MetadataHandler {
/** This should never be read directly. Use {@link #getInputFileArtifactValue} instead. */
private final Map inputArtifactData;
private final ConcurrentMap outputArtifactData =
new ConcurrentHashMap<>();
private final Set omittedOutputs = Sets.newConcurrentHashSet();
// See #getAdditionalOutputData for documentation of this field.
private final ConcurrentMap additionalOutputData =
new ConcurrentHashMap<>();
private final Set injectedArtifacts = Sets.newConcurrentHashSet();
private final ImmutableSet outputs;
private final TimestampGranularityMonitor tsgm;
@VisibleForTesting
public ActionMetadataHandler(Map inputArtifactData,
Iterable outputs,
TimestampGranularityMonitor tsgm) {
this.inputArtifactData = Preconditions.checkNotNull(inputArtifactData);
this.outputs = ImmutableSet.copyOf(outputs);
this.tsgm = tsgm;
}
@Override
public Metadata getMetadataMaybe(Artifact artifact) {
try {
return getMetadata(artifact);
} catch (IOException e) {
return null;
}
}
private static Metadata metadataFromValue(FileArtifactValue value) throws FileNotFoundException {
if (value == FileArtifactValue.MISSING_FILE_MARKER
|| value == FileArtifactValue.OMITTED_FILE_MARKER) {
throw new FileNotFoundException();
}
// If the file is empty or a directory, we need to return the mtime because the action cache
// uses mtime to determine if this artifact has changed. We do not optimize for this code
// path (by storing the mtime somewhere) because we eventually may be switching to use digests
// for empty files. We want this code path to go away somehow too for directories (maybe by
// implementing FileSet in Skyframe).
return value.getSize() > 0
? new Metadata(value.getDigest())
: new Metadata(value.getModifiedTime());
}
@Override
public Metadata getMetadata(Artifact artifact) throws IOException {
Metadata metadata = getRealMetadata(artifact);
return artifact.isConstantMetadata() ? Metadata.CONSTANT_METADATA : metadata;
}
@Nullable
private FileArtifactValue getInputFileArtifactValue(ActionInput input) {
if (outputs.contains(input) || !(input instanceof Artifact)) {
return null;
}
return Preconditions.checkNotNull(inputArtifactData.get(input), input);
}
/**
* We cache data for constant-metadata artifacts, even though it is technically unnecessary,
* because the data stored in this cache is consumed by various parts of Blaze via the {@link
* ActionExecutionValue} (for now, {@link FilesystemValueChecker} and {@link ArtifactFunction}).
* It is simpler for those parts if every output of the action is present in the cache. However,
* we must not return the actual metadata for a constant-metadata artifact.
*/
private Metadata getRealMetadata(Artifact artifact) throws IOException {
FileArtifactValue value = getInputFileArtifactValue(artifact);
if (value != null) {
return metadataFromValue(value);
}
if (artifact.isSourceArtifact()) {
// A discovered input we didn't have data for.
// TODO(bazel-team): Change this to an assertion once Skyframe has native input discovery, so
// all inputs will already have metadata known.
return null;
} else if (artifact.isMiddlemanArtifact()) {
// A middleman artifact's data was either already injected from the action cache checker using
// #setDigestForVirtualArtifact, or it has the default middleman value.
value = additionalOutputData.get(artifact);
if (value != null) {
return metadataFromValue(value);
}
value = FileArtifactValue.DEFAULT_MIDDLEMAN;
FileArtifactValue oldValue = additionalOutputData.putIfAbsent(artifact, value);
checkInconsistentData(artifact, oldValue, value);
return metadataFromValue(value);
}
FileValue fileValue = outputArtifactData.get(artifact);
if (fileValue != null) {
// Non-middleman artifacts should only have additionalOutputData if they have
// outputArtifactData. We don't assert this because of concurrency possibilities, but at least
// we don't check additionalOutputData unless we expect that we might see the artifact there.
value = additionalOutputData.get(artifact);
// If additional output data is present for this artifact, we use it in preference to the
// usual calculation.
if (value != null) {
return metadataFromValue(value);
}
if (!fileValue.exists()) {
throw new FileNotFoundException(artifact.prettyPrint() + " does not exist");
}
return new Metadata(Preconditions.checkNotNull(fileValue.getDigest(), artifact));
}
// We do not cache exceptions besides nonexistence here, because it is unlikely that the file
// will be requested from this cache too many times.
fileValue = fileValueFromArtifact(artifact, null, tsgm);
FileValue oldFileValue = outputArtifactData.putIfAbsent(artifact, fileValue);
checkInconsistentData(artifact, oldFileValue, value);
return maybeStoreAdditionalData(artifact, fileValue, null);
}
/**
* Check that the new {@code data} we just calculated for an {@code artifact} agrees with the
* {@code oldData} (presumably calculated concurrently), if it was present.
*/
// Not private only because used by SkyframeActionExecutor's metadata handler.
static void checkInconsistentData(Artifact artifact,
@Nullable Object oldData, Object data) throws IOException {
if (oldData != null && !oldData.equals(data)) {
// Another thread checked this file since we looked at the map, and got a different answer
// than we did. Presumably the user modified the file between reads.
throw new IOException("Data for " + artifact.prettyPrint() + " changed to " + data
+ " after it was calculated as " + oldData);
}
}
/**
* See {@link #getAdditionalOutputData} for why we sometimes need to store additional data, even
* for normal (non-middleman) artifacts.
*/
@Nullable
private Metadata maybeStoreAdditionalData(Artifact artifact, FileValue data,
@Nullable byte[] injectedDigest) throws IOException {
if (!data.exists()) {
// Nonexistent files should only occur before executing an action.
throw new FileNotFoundException(artifact.prettyPrint() + " does not exist");
}
boolean isFile = data.isFile();
boolean useDigest = DigestUtils.useFileDigest(isFile, isFile ? data.getSize() : 0);
if (useDigest && data.getDigest() != null) {
// We do not need to store the FileArtifactValue separately -- the digest is in the file value
// and that is all that is needed for this file's metadata.
return new Metadata(data.getDigest());
}
// Unfortunately, the FileValue does not contain enough information for us to calculate the
// corresponding FileArtifactValue -- either the metadata must use the modified time, which we
// do not expose in the FileValue, or the FileValue didn't store the digest So we store the
// metadata separately.
// Use the FileValue's digest if no digest was injected, or if the file can't be digested.
injectedDigest = injectedDigest != null || !isFile ? injectedDigest : data.getDigest();
FileArtifactValue value =
FileArtifactValue.create(artifact, isFile, isFile ? data.getSize() : 0, injectedDigest);
FileArtifactValue oldValue = additionalOutputData.putIfAbsent(artifact, value);
checkInconsistentData(artifact, oldValue, value);
return metadataFromValue(value);
}
@Override
public void setDigestForVirtualArtifact(Artifact artifact, Digest digest) {
Preconditions.checkState(artifact.isMiddlemanArtifact(), artifact);
Preconditions.checkNotNull(digest, artifact);
additionalOutputData.put(artifact,
FileArtifactValue.createMiddleman(digest.asMetadata().digest));
}
@Override
public void injectDigest(ActionInput output, FileStatus statNoFollow, byte[] digest) {
if (output instanceof Artifact) {
Artifact artifact = (Artifact) output;
Preconditions.checkState(injectedArtifacts.add(artifact), artifact);
FileValue fileValue;
try {
// This call may do an unnecessary call to Path#getFastDigest to see if the digest is
// readily available. We cannot pass the digest in, though, because if it is not available
// from the filesystem, this FileValue will not compare equal to another one created for the
// same file, because the other one will be missing its digest.
fileValue = fileValueFromArtifact(artifact, FileStatusWithDigestAdapter.adapt(statNoFollow),
tsgm);
// Ensure the digest supplied matches the actual digest if it exists.
byte[] fileDigest = fileValue.getDigest();
if (fileDigest != null && !Arrays.equals(digest, fileDigest)) {
BaseEncoding base16 = BaseEncoding.base16();
String digestString = (digest != null) ? base16.encode(digest) : "null";
String fileDigestString = (fileDigest != null) ? base16.encode(fileDigest) : "null";
throw new IllegalStateException("Expected digest " + digestString + " for artifact "
+ artifact + ", but got " + fileDigestString);
}
outputArtifactData.put(artifact, fileValue);
} catch (IOException e) {
// Do nothing - we just failed to inject metadata. Real error handling will be done later,
// when somebody will try to access that file.
return;
}
// If needed, insert additional data. Note that this can only be true if the file is empty or
// the filesystem does not support fast digests. Since we usually only inject digests when
// running with a filesystem that supports fast digests, this is fairly unlikely.
try {
maybeStoreAdditionalData(artifact, fileValue, digest);
} catch (IOException e) {
if (fileValue.getSize() != 0) {
// Empty files currently have their mtimes examined, and so could throw. No other files
// should throw, since all filesystem access has already been done.
throw new IllegalStateException(
"Filesystem should not have been accessed while injecting data for "
+ artifact.prettyPrint(), e);
}
// Ignore exceptions for empty files, as above.
}
}
}
@Override
public void markOmitted(ActionInput output) {
if (output instanceof Artifact) {
Artifact artifact = (Artifact) output;
Preconditions.checkState(omittedOutputs.add(artifact), artifact);
additionalOutputData.put(artifact, FileArtifactValue.OMITTED_FILE_MARKER);
}
}
@Override
public boolean artifactOmitted(Artifact artifact) {
return omittedOutputs.contains(artifact);
}
@Override
public void discardOutputMetadata() {
Preconditions.checkState(injectedArtifacts.isEmpty(),
"Artifacts cannot be injected before action execution: %s", injectedArtifacts);
Preconditions.checkState(omittedOutputs.isEmpty(),
"Artifacts cannot be marked omitted before action execution: %s", omittedOutputs);
outputArtifactData.clear();
additionalOutputData.clear();
}
@Override
public boolean artifactExists(Artifact artifact) {
Preconditions.checkState(!artifactOmitted(artifact), artifact);
return getMetadataMaybe(artifact) != null;
}
@Override
public boolean isRegularFile(Artifact artifact) {
// Currently this method is used only for genrule input directory checks. If we need to call
// this on output artifacts too, this could be more efficient.
FileArtifactValue value = getInputFileArtifactValue(artifact);
if (value != null && value.isFile()) {
return true;
}
return artifact.getPath().isFile();
}
@Override
public boolean isInjected(Artifact artifact) {
return injectedArtifacts.contains(artifact);
}
/**
* @return data for output files that was computed during execution. Should include data for all
* non-middleman artifacts.
*/
Map getOutputData() {
return outputArtifactData;
}
/**
* Returns data for any output files whose metadata was not computable from the corresponding
* entry in {@link #getOutputData}.
*
* There are three reasons why we might not be able to compute metadata for an artifact from
* the FileValue. First, middleman artifacts have no corresponding FileValues. Second, if
* computing a file's digest is not fast, the FileValue does not do so, so a file on a filesystem
* without fast digests has to have its metadata stored separately. Third, some files' metadata
* (directories, empty files) contain their mtimes, which the FileValue does not expose, so that
* has to be stored separately.
*
*
Note that for files that need digests, we can't easily inject the digest in the FileValue
* because it would complicate equality-checking on subsequent builds -- if our filesystem doesn't
* do fast digests, the comparison value would not have a digest.
*/
Map getAdditionalOutputData() {
return additionalOutputData;
}
static FileValue fileValueFromArtifact(Artifact artifact,
@Nullable FileStatusWithDigest statNoFollow, TimestampGranularityMonitor tsgm)
throws IOException {
Path path = artifact.getPath();
RootedPath rootedPath =
RootedPath.toRootedPath(artifact.getRoot().getPath(), artifact.getRootRelativePath());
if (statNoFollow == null) {
statNoFollow = FileStatusWithDigestAdapter.adapt(path.statIfFound(Symlinks.NOFOLLOW));
if (statNoFollow == null) {
return FileValue.value(rootedPath, FileStateValue.NONEXISTENT_FILE_STATE_NODE,
rootedPath, FileStateValue.NONEXISTENT_FILE_STATE_NODE);
}
}
Path realPath = path;
// We use FileStatus#isSymbolicLink over Path#isSymbolicLink to avoid the unnecessary stat
// done by the latter.
if (statNoFollow.isSymbolicLink()) {
realPath = path.resolveSymbolicLinks();
// We need to protect against symlink cycles since FileValue#value assumes it's dealing with a
// file that's not in a symlink cycle.
if (realPath.equals(path)) {
throw new IOException("symlink cycle");
}
}
RootedPath realRootedPath = RootedPath.toRootedPathMaybeUnderRoot(realPath,
ImmutableList.of(artifact.getRoot().getPath()));
FileStateValue fileStateValue;
FileStateValue realFileStateValue;
try {
fileStateValue = FileStateValue.createWithStatNoFollow(rootedPath, statNoFollow, tsgm);
// TODO(bazel-team): consider avoiding a 'stat' here when the symlink target hasn't changed
// and is a source file (since changes to those are checked separately).
realFileStateValue = realPath.equals(path) ? fileStateValue
: FileStateValue.create(realRootedPath, tsgm);
} catch (InconsistentFilesystemException e) {
throw new IOException(e);
}
return FileValue.value(rootedPath, fileStateValue, realRootedPath, realFileStateValue);
}
}