From ce372c36a3c1c90197a69bb2f3870babfc1eef5e Mon Sep 17 00:00:00 2001 From: Janak Ramakrishnan Date: Mon, 14 Mar 2016 16:19:18 +0000 Subject: Roll-forward of commit 4bf0018ed1cf8616297b951dc03dbde3f0db2503 with code to preserve order of glob matches: Parallelize fetches of symlink file values, subdirectory globs, and subdirectory package lookup values. This should improve change pruning speed when we have to check a glob. It also keeps GlobFunction closer to the contract of Skyframe, because in order to avoid quadratic restarts, it wasn't checking for missing deps between getValue calls. -- MOS_MIGRATED_REVID=117139471 --- .../lib/skyframe/DirectoryListingStateValue.java | 2 +- .../devtools/build/lib/skyframe/GlobFunction.java | 254 +++++++++++++++------ .../com/google/devtools/build/lib/vfs/Dirent.java | 12 +- 3 files changed, 194 insertions(+), 74 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/DirectoryListingStateValue.java b/src/main/java/com/google/devtools/build/lib/skyframe/DirectoryListingStateValue.java index 34b20225db..7664fd2117 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/DirectoryListingStateValue.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/DirectoryListingStateValue.java @@ -107,7 +107,7 @@ public final class DirectoryListingStateValue implements SkyValue { new Comparator() { @Override public int compare(Integer o1, Integer o2) { - return direntArray[o1].getName().compareTo(direntArray[o2].getName()); + return direntArray[o1].compareTo(direntArray[o2]); } }); String[] names = new String[dirents.size()]; diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/GlobFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/GlobFunction.java index e233cdfdd5..06518314e8 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/GlobFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/GlobFunction.java @@ -15,9 +15,12 @@ package com.google.devtools.build.lib.skyframe; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; +import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; import com.google.devtools.build.lib.cmdline.PackageIdentifier; import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder; +import com.google.devtools.build.lib.util.Preconditions; import com.google.devtools.build.lib.vfs.Dirent; import com.google.devtools.build.lib.vfs.Dirent.Type; import com.google.devtools.build.lib.vfs.PathFragment; @@ -29,6 +32,7 @@ import com.google.devtools.build.skyframe.SkyFunctionException.Transience; import com.google.devtools.build.skyframe.SkyKey; import com.google.devtools.build.skyframe.SkyValue; +import java.util.Map; import java.util.regex.Pattern; import javax.annotation.Nullable; @@ -57,10 +61,13 @@ public final class GlobFunction implements SkyFunction { // exists which implies that the package's directory exists. PathFragment globSubdir = glob.getSubdir(); if (!globSubdir.equals(PathFragment.EMPTY_FRAGMENT)) { - PackageLookupValue globSubdirPkgLookupValue = (PackageLookupValue) env.getValue( - PackageLookupValue.key(PackageIdentifier.create( - glob.getPackageId().getRepository(), - glob.getPackageId().getPackageFragment().getRelative(globSubdir)))); + PackageLookupValue globSubdirPkgLookupValue = + (PackageLookupValue) + env.getValue( + PackageLookupValue.key( + PackageIdentifier.create( + glob.getPackageId().getRepository(), + glob.getPackageId().getPackageFragment().getRelative(globSubdir)))); if (globSubdirPkgLookupValue == null) { return null; } @@ -87,16 +94,23 @@ public final class GlobFunction implements SkyFunction { NestedSetBuilder matches = NestedSetBuilder.stableOrder(); + boolean globMatchesBareFile = patternTail == null; + // "**" also matches an empty segment, so try the case where it is not present. if ("**".equals(patternHead)) { - if (patternTail == null) { + if (globMatchesBareFile) { // Recursive globs aren't supposed to match the package's directory. if (!glob.excludeDirs() && !globSubdir.equals(PathFragment.EMPTY_FRAGMENT)) { matches.add(globSubdir); } } else { - SkyKey globKey = GlobValue.internalKey(glob.getPackageId(), glob.getPackageRoot(), - globSubdir, patternTail, glob.excludeDirs()); + SkyKey globKey = + GlobValue.internalKey( + glob.getPackageId(), + glob.getPackageRoot(), + globSubdir, + patternTail, + glob.excludeDirs()); GlobValue globValue = (GlobValue) env.getValue(globKey); if (globValue == null) { return null; @@ -108,6 +122,7 @@ public final class GlobFunction implements SkyFunction { PathFragment dirPathFragment = glob.getPackageId().getPackageFragment().getRelative(globSubdir); RootedPath dirRootedPath = RootedPath.toRootedPath(glob.getPackageRoot(), dirPathFragment); if (alwaysUseDirListing || containsGlobs(patternHead)) { + String subdirPattern = "**".equals(patternHead) ? glob.getPattern() : patternTail; // Pattern contains globs, so a directory listing is required. // // Note that we have good reason to believe the directory exists: if this is the @@ -121,12 +136,21 @@ public final class GlobFunction implements SkyFunction { return null; } + // In order to batch Skyframe requests, we do three passes over the directory: + // (1) Process every dirent, keeping track of values we need to request if the dirent cannot + // be processed with current information (symlink targets and subdirectory globs/package + // lookups for some subdirectories). + // (2) Get those values and process the symlinks, keeping track of subdirectory globs/package + // lookups we may need to request in case the symlink's target is a directory. + // (3) Process the necessary subdirectories. + int direntsSize = listingValue.getDirents().size(); + Map symlinkFileMap = Maps.newHashMapWithExpectedSize(direntsSize); + Map firstPassSubdirMap = Maps.newHashMapWithExpectedSize(direntsSize); + Map sortedResultMap = Maps.newTreeMap(); + // First pass: do normal files and collect SkyKeys to request. for (Dirent dirent : listingValue.getDirents()) { Type direntType = dirent.getType(); String fileName = dirent.getName(); - - boolean isDirectory = (direntType == Dirent.Type.DIRECTORY); - if (!UnixGlob.matches(patternHead, fileName, regexPatternCache)) { continue; } @@ -135,46 +159,112 @@ public final class GlobFunction implements SkyFunction { // TODO(bazel-team): Consider extracting the symlink resolution logic. // For symlinks, look up the corresponding FileValue. This ensures that if the symlink // changes and "switches types" (say, from a file to a directory), this value will be - // invalidated. - RootedPath symlinkRootedPath = RootedPath.toRootedPath(glob.getPackageRoot(), - dirPathFragment.getRelative(fileName)); - FileValue symlinkFileValue = (FileValue) env.getValue(FileValue.key(symlinkRootedPath)); - if (symlinkFileValue == null) { - continue; - } - if (!symlinkFileValue.isSymlink()) { - throw new GlobFunctionException(new InconsistentFilesystemException( - "readdir and stat disagree about whether " + symlinkRootedPath.asPath() - + " is a symlink."), Transience.TRANSIENT); + // invalidated. We also need the target's type to properly process the symlink. + symlinkFileMap.put( + dirent, + FileValue.key( + RootedPath.toRootedPath( + glob.getPackageRoot(), dirPathFragment.getRelative(fileName)))); + continue; + } + + if (direntType == Dirent.Type.DIRECTORY) { + SkyKey keyToRequest = getSkyKeyForSubdir(fileName, glob, subdirPattern); + if (keyToRequest != null) { + firstPassSubdirMap.put(dirent, keyToRequest); } - if (!symlinkFileValue.exists()) { - continue; + } else if (globMatchesBareFile) { + sortedResultMap.put(dirent, glob.getSubdir().getRelative(fileName)); + } + } + + Map firstPassAndSymlinksResult = + env.getValues(Iterables.concat(firstPassSubdirMap.values(), symlinkFileMap.values())); + if (env.valuesMissing()) { + return null; + } + // Second pass: do symlinks, and maybe collect further SkyKeys if targets are directories. + Map symlinkSubdirMap = Maps.newHashMapWithExpectedSize(symlinkFileMap.size()); + for (Map.Entry direntAndKey : symlinkFileMap.entrySet()) { + Dirent dirent = direntAndKey.getKey(); + String fileName = dirent.getName(); + Preconditions.checkState(dirent.getType() == Dirent.Type.SYMLINK, direntAndKey); + FileValue symlinkFileValue = + Preconditions.checkNotNull( + (FileValue) firstPassAndSymlinksResult.get(direntAndKey.getValue()), direntAndKey); + if (!symlinkFileValue.isSymlink()) { + throw new GlobFunctionException( + new InconsistentFilesystemException( + "readdir and stat disagree about whether " + + ((RootedPath) direntAndKey.getValue().argument()).asPath() + + " is a symlink."), + Transience.TRANSIENT); + } + if (!symlinkFileValue.exists()) { + continue; + } + if (symlinkFileValue.isDirectory()) { + SkyKey keyToRequest = getSkyKeyForSubdir(fileName, glob, subdirPattern); + if (keyToRequest != null) { + symlinkSubdirMap.put(dirent, keyToRequest); } - isDirectory = symlinkFileValue.isDirectory(); + } else if (globMatchesBareFile) { + sortedResultMap.put(dirent, glob.getSubdir().getRelative(fileName)); } + } - String subdirPattern = "**".equals(patternHead) ? glob.getPattern() : patternTail; - addFile(fileName, glob, subdirPattern, patternTail == null, isDirectory, - matches, env); + Map secondResult = env.getValues(symlinkSubdirMap.values()); + if (env.valuesMissing()) { + return null; + } + // Third pass: do needed subdirectories. + for (Map.Entry direntAndKey : + Iterables.concat(firstPassSubdirMap.entrySet(), symlinkSubdirMap.entrySet())) { + Dirent dirent = direntAndKey.getKey(); + String fileName = dirent.getName(); + SkyKey key = direntAndKey.getValue(); + SkyValue valueRequested = + symlinkSubdirMap.containsKey(dirent) + ? secondResult.get(key) + : firstPassAndSymlinksResult.get(key); + Preconditions.checkNotNull(valueRequested, direntAndKey); + Object dirMatches = getSubdirMatchesFromSkyValue(fileName, glob, valueRequested); + if (dirMatches != null) { + sortedResultMap.put(dirent, dirMatches); + } + } + for (Map.Entry fileMatches : sortedResultMap.entrySet()) { + addToMatches(fileMatches.getValue(), matches); } } else { // Pattern does not contain globs, so a direct stat is enough. String fileName = patternHead; - RootedPath fileRootedPath = RootedPath.toRootedPath(glob.getPackageRoot(), - dirPathFragment.getRelative(fileName)); + RootedPath fileRootedPath = + RootedPath.toRootedPath(glob.getPackageRoot(), dirPathFragment.getRelative(fileName)); FileValue fileValue = (FileValue) env.getValue(FileValue.key(fileRootedPath)); if (fileValue == null) { return null; } if (fileValue.exists()) { - addFile(fileName, glob, patternTail, patternTail == null, - fileValue.isDirectory(), matches, env); + if (fileValue.isDirectory()) { + SkyKey keyToRequest = getSkyKeyForSubdir(fileName, glob, patternTail); + if (keyToRequest != null) { + SkyValue valueRequested = env.getValue(keyToRequest); + if (env.valuesMissing()) { + return null; + } + Object fileMatches = getSubdirMatchesFromSkyValue(fileName, glob, valueRequested); + if (fileMatches != null) { + addToMatches(fileMatches, matches); + } + } + } else if (globMatchesBareFile) { + matches.add(glob.getSubdir().getRelative(fileName)); + } } } - if (env.valuesMissing()) { - return null; - } + Preconditions.checkState(!env.valuesMissing(), skyKey); NestedSet matchesBuilt = matches.build(); // Use the same value to represent that we did not match anything. @@ -184,13 +274,20 @@ public final class GlobFunction implements SkyFunction { return new GlobValue(matchesBuilt); } - /** - * Returns true if the given pattern contains globs. - */ - private boolean containsGlobs(String pattern) { + /** Returns true if the given pattern contains globs. */ + private static boolean containsGlobs(String pattern) { return pattern.contains("*") || pattern.contains("?"); } + @SuppressWarnings("unchecked") // cast to NestedSet + private static void addToMatches(Object toAdd, NestedSetBuilder matches) { + if (toAdd instanceof PathFragment) { + matches.add((PathFragment) toAdd); + } else { + matches.addTransitive((NestedSet) toAdd); + } + } + /** * Includes the given file/directory in the glob. * @@ -198,42 +295,63 @@ public final class GlobFunction implements SkyFunction { * *

{@code isDirectory} must be true iff the file is a directory. * - *

{@code directResult} must be set if the file should be included in the result set - * directly rather than recursed into if it is a directory. + *

Returns a {@link SkyKey} for a value that is needed to compute the files that will be added + * to {@code matches}, or {@code null} if no additional value is needed. The returned value should + * be opaquely passed to {@link #getSubdirMatchesFromSkyValue}. */ - private void addFile(String fileName, GlobDescriptor glob, String subdirPattern, - boolean directResult, boolean isDirectory, NestedSetBuilder matches, - Environment env) { - if (isDirectory && subdirPattern != null) { - // This is a directory, and the pattern covers files under that directory. - SkyKey subdirGlobKey = GlobValue.internalKey(glob.getPackageId(), glob.getPackageRoot(), - glob.getSubdir().getRelative(fileName), subdirPattern, glob.excludeDirs()); - GlobValue subdirGlob = (GlobValue) env.getValue(subdirGlobKey); - if (subdirGlob == null) { - return; + private static SkyKey getSkyKeyForSubdir( + String fileName, GlobDescriptor glob, String subdirPattern) { + if (subdirPattern == null) { + if (glob.excludeDirs()) { + return null; + } else { + return PackageLookupValue.key( + PackageIdentifier.create( + glob.getPackageId().getRepository(), + glob.getPackageId() + .getPackageFragment() + .getRelative(glob.getSubdir()) + .getRelative(fileName))); } - matches.addTransitive(subdirGlob.getMatches()); + } else { + // There is some more pattern to match. Get the glob for the subdirectory. Note that this + // directory may also match directly in the case of a pattern that starts with "**", but that + // match will be found in the subdirectory glob. + return GlobValue.internalKey( + glob.getPackageId(), + glob.getPackageRoot(), + glob.getSubdir().getRelative(fileName), + subdirPattern, + glob.excludeDirs()); } + } - if (directResult && !(isDirectory && glob.excludeDirs())) { - if (isDirectory) { - // TODO(bazel-team): Refactor. This is basically inlined code from the next recursion level. - // Ensure that subdirectories that contain other packages are not picked up. - PathFragment directory = glob.getPackageId().getPackageFragment() - .getRelative(glob.getSubdir()).getRelative(fileName); - PackageLookupValue pkgLookupValue = (PackageLookupValue) - env.getValue(PackageLookupValue.key(PackageIdentifier.create( - glob.getPackageId().getRepository(), directory))); - if (pkgLookupValue == null) { - return; - } - if (pkgLookupValue.packageExists()) { - // The file is a directory and contains another package. - return; - } + /** + * Returns matches coming from the directory {@code fileName} if appropriate, either an individual + * file or a nested set of files. + * + *

{@code valueRequested} must be the SkyValue whose key was returned by + * {@link #getSkyKeyForSubdir} for these parameters. + */ + @Nullable + private static Object getSubdirMatchesFromSkyValue( + String fileName, + GlobDescriptor glob, + SkyValue valueRequested) { + if (valueRequested instanceof GlobValue) { + return ((GlobValue) valueRequested).getMatches(); + } else { + Preconditions.checkState( + valueRequested instanceof PackageLookupValue, + "%s is not a GlobValue or PackageLookupValue (%s %s)", + valueRequested, + fileName, + glob); + if (!((PackageLookupValue) valueRequested).packageExists()) { + return glob.getSubdir().getRelative(fileName); } - matches.add(glob.getSubdir().getRelative(fileName)); } + return null; } @Nullable diff --git a/src/main/java/com/google/devtools/build/lib/vfs/Dirent.java b/src/main/java/com/google/devtools/build/lib/vfs/Dirent.java index d76756dd9c..c62ff442bc 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/Dirent.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/Dirent.java @@ -18,11 +18,8 @@ import com.google.devtools.build.lib.util.Preconditions; import java.io.Serializable; import java.util.Objects; -/** - * Directory entry representation returned by {@link Path#readdir}. - */ -public class Dirent implements Serializable { - +/** Directory entry representation returned by {@link Path#readdir}. */ +public final class Dirent implements Serializable, Comparable { /** Type of the directory entry */ public enum Type { // A regular file. @@ -73,4 +70,9 @@ public class Dirent implements Serializable { public String toString() { return name + "[" + type.toString().toLowerCase() + "]"; } + + @Override + public int compareTo(Dirent other) { + return this.getName().compareTo(other.getName()); + } } -- cgit v1.2.3