// Copyright 2016 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.lib.remote; import static java.nio.charset.StandardCharsets.US_ASCII; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Interner; import com.google.common.collect.Iterables; import com.google.common.graph.Traverser; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.actions.ActionInput; import com.google.devtools.build.lib.actions.ActionInputHelper; import com.google.devtools.build.lib.actions.DigestOfDirectoryException; import com.google.devtools.build.lib.actions.FileArtifactValue; import com.google.devtools.build.lib.actions.MetadataProvider; import com.google.devtools.build.lib.actions.cache.VirtualActionInput; import com.google.devtools.build.lib.concurrent.BlazeInterners; import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.remote.util.DigestUtil; import com.google.devtools.build.lib.vfs.Dirent; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.Symlinks; import com.google.devtools.remoteexecution.v1test.Digest; import com.google.devtools.remoteexecution.v1test.Directory; import com.google.protobuf.ByteString; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.SortedMap; import java.util.concurrent.ConcurrentHashMap; import javax.annotation.Nullable; /** * A factory and repository for {@link TreeNode} objects. Provides directory structure traversals, * computing and caching Merkle hashes on all objects. */ @ThreadSafe public final class TreeNodeRepository { private static final BaseEncoding LOWER_CASE_HEX = BaseEncoding.base16().lowerCase(); // In this implementation, symlinks are NOT followed when expanding directory artifacts public static final Symlinks SYMLINK_POLICY = Symlinks.NOFOLLOW; private final Traverser traverser = Traverser.forTree((TreeNode node) -> children(node)); /** * A single node in a hierarchical directory structure. Leaves are the Artifacts, although we only * use the ActionInput interface. We assume that the objects used for the ActionInputs are unique * (same data corresponds to a canonical object in memory). * *

There are three cases: * *

    *
  1. The node is a leaf that represents an artifact file. *
  2. The node is a directory optionally associated with an artifact (an "artifact directory"). *
  3. The node is a leaf that is the descendant of an artifact directory. In this case, the * node is associated with a BasicActionInput, not a full Artifact. *
*/ @Immutable @ThreadSafe public static final class TreeNode { private final int hashCode; private final ImmutableList childEntries; // no need to make it a map thus far. @Nullable private final ActionInput actionInput; private final boolean isLeaf; /** A pair of path segment, TreeNode. */ @Immutable public static final class ChildEntry { private final String segment; private final TreeNode child; public ChildEntry(String segment, TreeNode child) { this.segment = segment; this.child = child; } public TreeNode getChild() { return child; } public String getSegment() { return segment; } @Override @SuppressWarnings("ReferenceEquality") public boolean equals(Object o) { if (o == this) { return true; } if (!(o instanceof ChildEntry)) { return false; } ChildEntry other = (ChildEntry) o; // Pointer comparison for the TreeNode as it is interned return other.segment.equals(segment) && other.child == child; } @Override public int hashCode() { return Objects.hash(segment, child); } public static Comparator segmentOrder = Comparator.comparing(ChildEntry::getSegment); } // Should only be called by the TreeNodeRepository. private TreeNode(Iterable childEntries, @Nullable ActionInput actionInput) { isLeaf = false; this.actionInput = actionInput; this.childEntries = ImmutableList.copyOf(childEntries); if (actionInput != null) { hashCode = actionInput.hashCode(); // This will ensure efficient interning of TreeNodes as // long as all ActionInputs either implement data-based hashCode or are interned themselves. } else { hashCode = Arrays.hashCode(this.childEntries.toArray()); } } // Should only be called by the TreeNodeRepository. private TreeNode(ActionInput actionInput) { isLeaf = true; this.actionInput = Preconditions.checkNotNull(actionInput, "a TreeNode leaf should have an ActionInput"); this.childEntries = ImmutableList.of(); hashCode = actionInput.hashCode(); // This will ensure efficient interning of TreeNodes as // long as all ActionInputs either implement data-based hashCode or are interned themselves. } public ActionInput getActionInput() { return actionInput; } public ImmutableList getChildEntries() { return childEntries; } public boolean isLeaf() { return isLeaf; } @Override public int hashCode() { return hashCode; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (!(o instanceof TreeNode)) { return false; } TreeNode otherNode = (TreeNode) o; // Full comparison of ActionInputs. If pointers are different, will compare paths. return Objects.equals(otherNode.actionInput, actionInput) && childEntries.equals(otherNode.childEntries); } private String toDebugStringAtLevel(int level) { char[] prefix = new char[level]; Arrays.fill(prefix, ' '); StringBuilder sb = new StringBuilder(); if (isLeaf()) { sb.append('\n'); sb.append(prefix); sb.append("leaf: "); sb.append(actionInput); } else { for (ChildEntry entry : childEntries) { sb.append('\n'); sb.append(prefix); sb.append(entry.segment); sb.append(entry.child.toDebugStringAtLevel(level + 1)); } } return sb.toString(); } public String toDebugString() { return toDebugStringAtLevel(0); } } private static final TreeNode EMPTY_NODE = new TreeNode(ImmutableList.of(), null); // Keep only one canonical instance of every TreeNode in the repository. private final Interner interner = BlazeInterners.newWeakInterner(); // Merkle hashes are computed and cached by the repository, therefore execRoot must // be part of the state. private final Path execRoot; private final MetadataProvider inputFileCache; private final Map reverseInputMap = new ConcurrentHashMap<>(); // For directories that are themselves artifacts, map of the ActionInput to the Merkle hash private final Map inputDirectoryDigestCache = new HashMap<>(); private final Map treeNodeDigestCache = new HashMap<>(); private final Map digestTreeNodeCache = new HashMap<>(); private final Map directoryCache = new HashMap<>(); private final Map virtualInputDigestCache = new HashMap<>(); private final Map digestVirtualInputCache = new HashMap<>(); private final DigestUtil digestUtil; public TreeNodeRepository(Path execRoot, MetadataProvider inputFileCache, DigestUtil digestUtil) { this.execRoot = execRoot; this.inputFileCache = inputFileCache; this.digestUtil = digestUtil; } public MetadataProvider getInputFileCache() { return inputFileCache; } public Iterable children(TreeNode node) { return Iterables.transform(node.getChildEntries(), TreeNode.ChildEntry::getChild); } /** Traverse the directory structure in order (pre-order tree traversal). */ public Iterable descendants(TreeNode node) { return traverser.depthFirstPreOrder(node); } /** * Traverse the directory structure in order (pre-order tree traversal), return only the leaves. */ public Iterable leaves(TreeNode node) { return Iterables.filter( descendants(node), new Predicate() { @Override public boolean apply(TreeNode node) { return node.isLeaf(); } }); } /** * This function is a temporary and highly inefficient hack! It builds the tree from a ready list * of input files. TODO(olaola): switch to creating and maintaining the TreeNodeRepository based * on the build graph structure. */ public TreeNode buildFromActionInputs(SortedMap sortedMap) throws IOException { ImmutableList.Builder> segments = ImmutableList.builder(); for (PathFragment path : sortedMap.keySet()) { segments.add(path.getSegments()); } List inputs = new ArrayList<>(); for (Map.Entry e : sortedMap.entrySet()) { inputs.add(e.getValue()); } return buildParentNode(inputs, segments.build(), 0, inputs.size(), 0); } // Expand the descendant of an artifact (input) directory private List buildInputDirectoryEntries(Path path) throws IOException { List sortedDirent = new ArrayList<>(path.readdir(SYMLINK_POLICY)); sortedDirent.sort(Comparator.comparing(Dirent::getName)); List entries = new ArrayList<>(sortedDirent.size()); for (Dirent dirent : sortedDirent) { String name = dirent.getName(); Path child = path.getRelative(name); TreeNode childNode; if (dirent.getType() == Dirent.Type.DIRECTORY) { childNode = interner.intern(new TreeNode(buildInputDirectoryEntries(child), null)); } else { childNode = interner.intern(new TreeNode(ActionInputHelper.fromPath(child.asFragment()))); } entries.add(new TreeNode.ChildEntry(name, childNode)); } return entries; } @SuppressWarnings("ReferenceEquality") // Segments are interned. private TreeNode buildParentNode( List inputs, ImmutableList> segments, int inputsStart, int inputsEnd, int segmentIndex) throws IOException { if (segments.isEmpty()) { // We sometimes have actions with no inputs (e.g., echo "xyz" > $@), so we need to handle that // case here. Preconditions.checkState(inputs.isEmpty()); return EMPTY_NODE; } if (segmentIndex == segments.get(inputsStart).size()) { // Leaf node reached. Must be unique. Preconditions.checkArgument( inputsStart == inputsEnd - 1, "Encountered two inputs with the same path."); ActionInput input = inputs.get(inputsStart); try { if (!(input instanceof VirtualActionInput) && getInputMetadata(input).getType().isDirectory()) { Path leafPath = execRoot.getRelative(input.getExecPathString()); return interner.intern(new TreeNode(buildInputDirectoryEntries(leafPath), input)); } } catch (DigestOfDirectoryException e) { Path leafPath = execRoot.getRelative(input.getExecPathString()); return interner.intern(new TreeNode(buildInputDirectoryEntries(leafPath), input)); } return interner.intern(new TreeNode(input)); } ArrayList entries = new ArrayList<>(); String segment = segments.get(inputsStart).get(segmentIndex); for (int inputIndex = inputsStart; inputIndex < inputsEnd; ++inputIndex) { if (inputIndex + 1 == inputsEnd || !segment.equals(segments.get(inputIndex + 1).get(segmentIndex))) { entries.add( new TreeNode.ChildEntry( segment, buildParentNode(inputs, segments, inputsStart, inputIndex + 1, segmentIndex + 1))); if (inputIndex + 1 < inputsEnd) { inputsStart = inputIndex + 1; segment = segments.get(inputsStart).get(segmentIndex); } } } Collections.sort(entries, TreeNode.ChildEntry.segmentOrder); return interner.intern(new TreeNode(entries, null)); } private synchronized Directory getOrComputeDirectory(TreeNode node) throws IOException { // Assumes all child digests have already been computed! Preconditions.checkArgument(!node.isLeaf()); Directory directory = directoryCache.get(node); if (directory == null) { Directory.Builder b = Directory.newBuilder(); for (TreeNode.ChildEntry entry : node.getChildEntries()) { TreeNode child = entry.getChild(); if (child.isLeaf()) { ActionInput input = child.getActionInput(); final Digest digest; if (input instanceof VirtualActionInput) { VirtualActionInput virtualInput = (VirtualActionInput) input; digest = digestUtil.compute(virtualInput); virtualInputDigestCache.put(virtualInput, digest); // There may be multiple inputs with the same digest. In that case, we don't care which // one we get back from the digestVirtualInputCache later. digestVirtualInputCache.put(digest, virtualInput); } else { digest = DigestUtil.getFromInputCache(input, inputFileCache); } b.addFilesBuilder().setName(entry.getSegment()).setDigest(digest).setIsExecutable(true); } else { Digest childDigest = Preconditions.checkNotNull(treeNodeDigestCache.get(child)); if (child.getActionInput() != null) { inputDirectoryDigestCache.put(child.getActionInput(), childDigest); } b.addDirectoriesBuilder().setName(entry.getSegment()).setDigest(childDigest); } } directory = b.build(); directoryCache.put(node, directory); Digest digest = digestUtil.compute(directory); treeNodeDigestCache.put(node, digest); digestTreeNodeCache.put(digest, node); } return directory; } // Recursively traverses the tree, expanding and computing Merkle digests for nodes for which // they have not yet been computed and cached. public void computeMerkleDigests(TreeNode root) throws IOException { synchronized (this) { if (directoryCache.get(root) != null) { // Strong assumption: the cache is valid, i.e. parent present implies children present. return; } } if (!root.isLeaf()) { for (TreeNode child : children(root)) { computeMerkleDigests(child); } getOrComputeDirectory(root); } } /** * Should only be used after computeMerkleDigests has been called on one of the node ancestors. * Returns the precomputed digest. */ public Digest getMerkleDigest(TreeNode node) throws IOException { return node.isLeaf() ? actionInputToDigest(node.getActionInput()) : treeNodeDigestCache.get(node); } /** * Returns the precomputed digests for both data and metadata. Should only be used after * computeMerkleDigests has been called on one of the node ancestors. */ public ImmutableCollection getAllDigests(TreeNode root) throws IOException { ImmutableSet.Builder digests = ImmutableSet.builder(); for (TreeNode node : descendants(root)) { digests.add( node.isLeaf() ? actionInputToDigest(node.getActionInput()) : Preconditions.checkNotNull(treeNodeDigestCache.get(node))); } return digests.build(); } private Digest actionInputToDigest(ActionInput input) throws IOException { if (input instanceof VirtualActionInput) { return Preconditions.checkNotNull(virtualInputDigestCache.get(input)); } FileArtifactValue metadata = getInputMetadata(input); byte[] digest = metadata.getDigest(); if (digest == null) { // If the artifact does not have a digest, it is because it is a directory. // We get the digest from the set of Merkle hashes computed in this TreeNodeRepository. return Preconditions.checkNotNull( inputDirectoryDigestCache.get(input), "a directory should have a precomputed Merkle hash (instead of a digest)"); } return DigestUtil.getFromInputCache(input, inputFileCache); } /** * Serializes all of the subtree to a Directory list. TODO(olaola): add a version that only copies * a part of the tree that we are interested in. Should only be used after computeMerkleDigests * has been called on one of the node ancestors. */ // Note: this is not, strictly speaking, thread safe. If someone is deleting cached Merkle hashes // while this is executing, it will trigger an exception. But I think this is WAI. public ImmutableList treeToDirectories(TreeNode root) { ImmutableList.Builder directories = ImmutableList.builder(); for (TreeNode node : descendants(root)) { if (!node.isLeaf()) { directories.add(Preconditions.checkNotNull(directoryCache.get(node))); } } return directories.build(); } /** * Should only be used on digests created by a call to computeMerkleDigests. Looks up ActionInputs * or Directory messages by cached digests and adds them to the lists. */ public void getDataFromDigests( Iterable digests, Map actionInputs, Map nodes) { for (Digest digest : digests) { TreeNode treeNode = digestTreeNodeCache.get(digest); if (treeNode != null) { nodes.put(digest, Preconditions.checkNotNull(directoryCache.get(treeNode))); } else { // If not there, it must be an ActionInput. ByteString hexDigest = ByteString.copyFromUtf8(digest.getHash()); ActionInput input = reverseInputMap.get(hexDigest); if (input == null) { // ... or a VirtualActionInput. input = digestVirtualInputCache.get(digest); } actionInputs.put(digest, Preconditions.checkNotNull(input)); } } } private FileArtifactValue getInputMetadata(ActionInput input) throws IOException { FileArtifactValue metadata = Preconditions.checkNotNull( inputFileCache.getMetadata(input), "Missing metadata for: %s", input); if (metadata.getDigest() != null) { reverseInputMap.put( ByteString.copyFrom(LOWER_CASE_HEX.encode(metadata.getDigest()).getBytes(US_ASCII)), input); } return metadata; } }