diff options
author | tomlu <tomlu@google.com> | 2018-02-08 15:32:00 -0800 |
---|---|---|
committer | Copybara-Service <copybara-piper@google.com> | 2018-02-08 15:34:11 -0800 |
commit | a729b9b4c3d7844a7d44934bf3365f92633c0a60 (patch) | |
tree | 6329f4baf5b0b83ea6e3bd577b78b8d49afea9f1 /src/main/java/com/google/devtools/build/lib/vfs/PathFragment.java | |
parent | 0ab46f0dd95f735056add4dd8a90a76944b81d00 (diff) |
Replace path implementation.
Path and PathFragment have been replaced with String-based implementations. They are pretty similar, but each method is dissimilar enough that I did not feel sharing code was appropriate.
A summary of changes:
PATH
====
* Subsumes LocalPath (deleted, its tests repurposed)
* Use a simple string to back Path
* Path instances are no longer interned; Reference equality will no longer work
* Always normalized (same as before)
* Some operations will now be slower, like instance compares (which were previously just a reference check)
* Multiple identical paths will now consume more memory since they are not interned
PATH FRAGMENT
=============
* Use a simple string to back PathFragment
* No more segment arrays with interned strings
* Always normalized
* Remove isNormalized
* Replace some isNormalizied uses with containsUpLevelReferences() to check if path fragments try to escape their scope
* To check if user input is normalized, supply static methods on PathFragment to validate the string before constructing a PathFragment
* Because PathFragments are always normalized, we have to replace checks for literal "." from PathFragment#getPathString to PathFragment#getSafePathString. The latter returns "." for the empty string.
* The previous implementation supported efficient segment semantics (segment count, iterating over segments). This is now expensive since we do longer have a segment array.
ARTIFACT
========
* Remove Path instance. It is instead dynamically constructed on request. This is necessary to avoid this CL becoming a memory regression.
RELNOTES: None
PiperOrigin-RevId: 185062932
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/vfs/PathFragment.java')
-rw-r--r-- | src/main/java/com/google/devtools/build/lib/vfs/PathFragment.java | 1097 |
1 files changed, 520 insertions, 577 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/vfs/PathFragment.java b/src/main/java/com/google/devtools/build/lib/vfs/PathFragment.java index 572042a7d7..88ae732590 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/PathFragment.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/PathFragment.java @@ -1,4 +1,4 @@ -// Copyright 2014 The Bazel Authors. All rights reserved. +// Copyright 2017 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,630 +16,470 @@ package com.google.devtools.build.lib.vfs; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.analysis.actions.CommandLineItem; -import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable; -import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; import com.google.devtools.build.lib.skyframe.serialization.SerializationException; import com.google.devtools.build.lib.skyframe.serialization.strings.StringCodecs; import com.google.devtools.build.lib.skylarkinterface.SkylarkPrintable; import com.google.devtools.build.lib.skylarkinterface.SkylarkPrinter; import com.google.devtools.build.lib.util.FileType; -import com.google.devtools.build.lib.util.OS; -import com.google.devtools.build.lib.util.StringCanonicalizer; import com.google.protobuf.CodedInputStream; import com.google.protobuf.CodedOutputStream; -import java.io.File; import java.io.IOException; -import java.io.InvalidObjectException; -import java.io.ObjectInputStream; import java.io.Serializable; -import java.util.Arrays; -import java.util.List; import java.util.Set; +import javax.annotation.Nullable; /** - * This class represents an immutable filesystem path, which may be absolute or relative. The path - * is maintained as a simple ordered list of path segment strings. + * A path segment representing a path fragment using the host machine's path style. That is; If you + * are running on a Unix machine, the path style will be unix, on Windows it is the windows path + * style. * - * <p>This class is independent from other VFS classes, especially anything requiring native code. - * It is safe to use in places that need simple segmented string path functionality. + * <p>Path fragments are either absolute or relative. + * + * <p>Strings are normalized with '.' and '..' removed and resolved (if possible), any multiple + * slashes ('/') removed, and any trailing slash also removed. Windows drive letters are uppercased. + * The current implementation does not touch the incoming path string unless the string actually + * needs to be normalized. * * <p>There is some limited support for Windows-style paths. Most importantly, drive identifiers in * front of a path (c:/abc) are supported and such paths are correctly recognized as absolute, as * are paths with backslash separators (C:\\foo\\bar). However, advanced Windows-style features like - * \\\\network\\paths and \\\\?\\unc\\paths are not supported. + * \\\\network\\paths and \\\\?\\unc\\paths are not supported. We are currently using forward + * slashes ('/') even on Windows. + * + * <p>Mac and Windows path fragments are case insensitive. */ -@Immutable -@javax.annotation.concurrent.Immutable -@ThreadSafe -public abstract class PathFragment +public final class PathFragment implements Comparable<PathFragment>, Serializable, - SkylarkPrintable, FileType.HasFileType, + SkylarkPrintable, CommandLineItem { - private static final Helper HELPER = - OS.getCurrent() == OS.WINDOWS ? WindowsPathFragment.HELPER : UnixPathFragment.HELPER; - - public static final char SEPARATOR_CHAR = HELPER.getPrimarySeparatorChar(); + private static final OsPathPolicy OS = OsPathPolicy.getFilePathOs(); - public static final int INVALID_SEGMENT = -1; - - public static final String ROOT_DIR = "/"; - - /** An empty path fragment. */ public static final PathFragment EMPTY_FRAGMENT = create(""); + public static final char SEPARATOR_CHAR = OS.getSeparator(); + public static final int INVALID_SEGMENT = -1; - /** The path fragment representing the root directory. */ - public static final PathFragment ROOT_FRAGMENT = create(ROOT_DIR); + private final String path; + private final int driveStrLength; // 0 for relative paths, 1 on Unix, 3 on Windows public static final ObjectCodec<PathFragment> CODEC = new PathFragmentCodec(); - /** - * A helper object for manipulating the various internal {@link PathFragment} implementations. - * - * <p>There will be exactly one {@link Helper} instance used to manipulate all the {@link - * PathFragment} instances (see {@link PathFragment#HELPER}). All of the various {@link Helper} - * and {@link PathFragment} implementations may assume this property. - */ - protected abstract static class Helper { - /** - * Returns whether the two given arrays of segments have the same length and should be - * considered have logically equal contents. - */ - protected final boolean segmentsEqual(String[] segments1, String[] segments2) { - return segments1.length == segments2.length - && segmentsEqual(segments1.length, segments1, 0, segments2); - } - - /** - * Returns whether the {@code length} segments in {@code segments1}, starting at {@code offset1} - * should be considered to be logically equal to the first {@code length} segments in {@code - * segments2}. - */ - abstract boolean segmentsEqual(int length, String[] segments1, int offset1, String[] segments2); - - /** Returns the comparison result of two {@link PathFragment} instances. */ - protected abstract int compare(PathFragment pathFragment1, PathFragment pathFragment2); - - /** Returns a fresh {@link PathFragment} instance from the given path string. */ - abstract PathFragment create(String path); - /** - * Returns a fresh {@link PathFragment} instance from the given information, taking ownership of - * {@code segments} and assuming the {@link String}s within have already been interned. - */ - abstract PathFragment createAlreadyInterned( - char driveLetter, boolean isAbsolute, String[] segments); - - /** Returns whether {@code c} is a path separator. */ - abstract boolean isSeparator(char c); - /** Returns the primary path separator. */ - abstract char getPrimarySeparatorChar(); - /** Return whether the given {@code path} contains a path separator. */ - abstract boolean containsSeparatorChar(String path); - - /** - * Splits the given {@code toSegment} into path segments, starting at the given {@code offset}. - */ - protected final String[] segment(String toSegment, int offset) { - int length = toSegment.length(); - - // We make two passes through the array of characters: count & alloc, - // because simply using ArrayList was a bottleneck showing up during profiling. - int seg = 0; - int start = offset; - for (int i = offset; i < length; i++) { - if (isSeparator(toSegment.charAt(i))) { - if (i > start) { // to skip repeated separators - seg++; - } - start = i + 1; - } - } - if (start < length) { - seg++; - } - String[] result = new String[seg]; - seg = 0; - start = offset; - for (int i = offset; i < length; i++) { - if (isSeparator(toSegment.charAt(i))) { - if (i > start) { // to skip repeated separators - result[seg] = StringCanonicalizer.intern(toSegment.substring(start, i)); - seg++; - } - start = i + 1; - } - } - if (start < length) { - result[seg] = StringCanonicalizer.intern(toSegment.substring(start, length)); - } - return result; - } - } - - /** Lower-level API. Create a PathFragment, interning segments. */ - public static PathFragment create(char driveLetter, boolean isAbsolute, String[] segments) { - String[] internedSegments = new String[segments.length]; - for (int i = 0; i < segments.length; i++) { - internedSegments[i] = StringCanonicalizer.intern(segments[i]); - } - return createAlreadyInterned(driveLetter, isAbsolute, internedSegments); - } - - /** Same as {@link #create(char, boolean, String[])}, except for {@link List}s of segments. */ - public static PathFragment create(char driveLetter, boolean isAbsolute, List<String> segments) { - String[] internedSegments = new String[segments.size()]; - for (int i = 0; i < segments.size(); i++) { - internedSegments[i] = StringCanonicalizer.intern(segments.get(i)); - } - return createAlreadyInterned(driveLetter, isAbsolute, internedSegments); - } - - /** - * Construct a PathFragment from a java.io.File, which is an absolute or - * relative UNIX path. Does not support Windows-style Files. - */ - public static PathFragment create(File path) { - return HELPER.create(path.getPath()); - } - - /** - * Construct a PathFragment from a string, which is an absolute or relative UNIX or Windows path. - */ + /** Creates a new normalized path fragment. */ public static PathFragment create(String path) { - return HELPER.create(path); + int normalizationLevel = OS.needsToNormalize(path); + String normalizedPath = + normalizationLevel != OsPathPolicy.NORMALIZED + ? OS.normalize(path, normalizationLevel) + : path; + int driveStrLength = OS.getDriveStrLength(normalizedPath); + return new PathFragment(normalizedPath, driveStrLength); } /** - * Constructs a PathFragment, taking ownership of {@code segments} and assuming the {@link - * String}s within have already been interned. + * Creates a new path fragment, where the caller promises that the path is normalized. * - * <p>Package-private because it does not perform a defensive copy of the segments array. Used - * here in PathFragment, and by Path.asFragment() and Path.relativeTo(). + * <p>WARNING! Make sure the path fragment is in fact already normalized. The rest of the code + * assumes this is the case. */ - static PathFragment createAlreadyInterned( - char driveLetter, boolean isAbsolute, String[] segments) { - return HELPER.createAlreadyInterned(driveLetter, isAbsolute, segments); - } - - /** Returns whether the current {@code path} contains a path separator. */ - static boolean containsSeparator(String path) { - return HELPER.containsSeparatorChar(path); + public static PathFragment createAlreadyNormalized(String normalizedPath) { + int driveStrLength = OS.getDriveStrLength(normalizedPath); + return createAlreadyNormalized(normalizedPath, driveStrLength); } /** - * Construct a PathFragment from a sequence of other PathFragments. The new fragment will be - * absolute iff the first fragment was absolute. + * Creates a new path fragment, where the caller promises that the path is normalized. + * + * <p>Should only be used internally. */ - // TODO(bazel-team): Most usages of this method are wasteful from a garbage perspective. Refactor - // to something better. - public static PathFragment create(PathFragment first, PathFragment second, PathFragment... more) { - String[] segments = new String[sumLengths(first, second, more)]; - int offset = 0; - offset += addSegmentsTo(segments, offset, first); - offset += addSegmentsTo(segments, offset, second); - for (PathFragment fragment : more) { - offset += addSegmentsTo(segments, offset, fragment); - } - boolean isAbsolute = first.isAbsolute(); - char driveLetter = first.getDriveLetter(); - return HELPER.createAlreadyInterned(driveLetter, isAbsolute, segments); + static PathFragment createAlreadyNormalized(String normalizedPath, int driveStrLength) { + return new PathFragment(normalizedPath, driveStrLength); } - // Medium sized builds can easily hold millions of live PathFragments, so the per-instance size of - // PathFragment is a concern. - // - // We have two oop-sized fields (segments, path), and one 4-byte-sized one (hashCode). - // - // If Blaze is run on a jvm with -XX:+UseCompressedOops, each PathFragment instance is 24 bytes - // and so adding any additional field will increase the per-instance size to at least 32 bytes. - // - // If Blaze is run on a jvm with -XX:-UseCompressedOops, each PathFragment instance is 32 bytes - // and so adding any additional field will increase the per-instance size to at least 40 bytes. - // - // Therefore, do not add any additional fields unless you have considered the memory implications. - - // The individual path components. - // Does *not* include the Windows drive letter. - protected final String[] segments; - - // hashCode and path are lazily initialized but semantically immutable. - private int hashCode; - private String path; - - protected PathFragment(String[] segments) { - this.segments = segments; - } - - private static int addSegmentsTo(String[] segments, int offset, PathFragment fragment) { - int count = fragment.segmentCount(); - System.arraycopy(fragment.segments, 0, segments, offset, count); - return count; + /** This method expects path to already be normalized. */ + private PathFragment(String path, int driveStrLength) { + this.path = Preconditions.checkNotNull(path); + this.driveStrLength = driveStrLength; } - private static int sumLengths(PathFragment first, PathFragment second, PathFragment[] more) { - int total = first.segmentCount() + second.segmentCount(); - for (PathFragment fragment : more) { - total += fragment.segmentCount(); - } - return total; + public String getPathString() { + return path; } - protected Object writeReplace() { - return new PathFragmentSerializationProxy(toString()); + public boolean isEmpty() { + return path.isEmpty(); } - protected void readObject(ObjectInputStream stream) throws InvalidObjectException { - throw new InvalidObjectException("Serialization is allowed only by proxy"); + int getDriveStrLength() { + return driveStrLength; } /** - * Returns the path string using '/' as the name-separator character. Returns "" if the path - * is both relative and empty. + * If called on a {@link PathFragment} instance for a mount name (eg. '/' or 'C:/'), the empty + * string is returned. + * + * <p>This operation allocates a new string. */ - public String getPathString() { - // Double-checked locking works, even without volatile, because path is a String, according to: - // http://www.cs.umd.edu/~pugh/java/memoryModel/DoubleCheckedLocking.html - if (path == null) { - synchronized (this) { - if (path == null) { - path = StringCanonicalizer.intern(joinSegments(HELPER.getPrimarySeparatorChar())); - } - } - } - return path; + public String getBaseName() { + int lastSeparator = path.lastIndexOf(SEPARATOR_CHAR); + return lastSeparator < driveStrLength + ? path.substring(driveStrLength) + : path.substring(lastSeparator + 1); } /** - * Returns "." if the path fragment is both relative and empty, or {@link - * #getPathString} otherwise. + * Returns a {@link PathFragment} instance representing the relative path between this {@link + * PathFragment} and the given {@link PathFragment}. + * + * <p>If the passed path is absolute it is returned untouched. This can be useful to resolve + * symlinks. */ - // TODO(bazel-team): Change getPathString to do this - this behavior makes more sense. - public String getSafePathString() { - return (!isAbsolute() && (segmentCount() == 0)) ? "." : getPathString(); + public PathFragment getRelative(PathFragment other) { + Preconditions.checkNotNull(other); + // Fast-path: The path fragment is already normal, use cheaper normalization check + String otherStr = other.path; + return getRelative(otherStr, other.getDriveStrLength(), OS.needsToNormalizeSuffix(otherStr)); } /** - * Returns the path string using '/' as the name-separator character, but do so in a way - * unambiguously recognizable as path. In other words, return "." for relative and empty paths, - * and prefix relative paths with one segment by "./". + * Returns a {@link PathFragment} instance representing the relative path between this {@link + * PathFragment} and the given path. * - * <p>In this way, a shell will always interpret such a string as path (absolute or relative to - * the working directory) and not as command to be searched for in the search path. + * <p>See {@link #getRelative(PathFragment)} for details. */ - public String getCallablePathString() { - if (isAbsolute()) { - return getPathString(); - } else if (segmentCount() == 0) { - return "."; - } else if (segmentCount() == 1) { - return "." + HELPER.getPrimarySeparatorChar() + getPathString(); - } else { - return getPathString(); - } + public PathFragment getRelative(String other) { + Preconditions.checkNotNull(other); + return getRelative(other, OS.getDriveStrLength(other), OS.needsToNormalize(other)); } - /** - * Throws {@link IllegalArgumentException} if {@code paths} contains any paths that - * are equal to {@code startingWithPath} or that are not beneath {@code startingWithPath}. - */ - public static void checkAllPathsAreUnder(Iterable<PathFragment> paths, - PathFragment startingWithPath) { - for (PathFragment path : paths) { - Preconditions.checkArgument( - !path.equals(startingWithPath) && path.startsWith(startingWithPath), - "%s is not beneath %s", path, startingWithPath); + private PathFragment getRelative(String other, int otherDriveStrLength, int normalizationLevel) { + if (path.isEmpty()) { + return create(other); } - } - - private String joinSegments(char separatorChar) { - if (segments.length == 0 && isAbsolute()) { - return windowsVolume() + ROOT_DIR; + if (other.isEmpty()) { + return this; } - - // Profile driven optimization: - // Preallocate a size determined by the number of segments, so that - // we do not have to expand the capacity of the StringBuilder. - // Heuristically, this estimate is right for about 99% of the time. - int estimateSize = - ((getDriveLetter() != '\0') ? 2 : 0) - + ((segments.length == 0) ? 0 : (segments.length + 1) * 20); - StringBuilder result = new StringBuilder(estimateSize); - if (isAbsolute()) { - // Only print the Windows volume label if the PathFragment is absolute. Do not print relative - // Windows paths like "C:foo/bar", it would break all kinds of things, e.g. glob(). - result.append(windowsVolume()); - } - boolean initialSegment = true; - for (String segment : segments) { - if (!initialSegment || isAbsolute()) { - result.append(separatorChar); - } - initialSegment = false; - result.append(segment); + // This is an absolute path, simply return it + if (otherDriveStrLength > 0) { + String normalizedPath = + normalizationLevel != OsPathPolicy.NORMALIZED + ? OS.normalize(other, normalizationLevel) + : other; + return new PathFragment(normalizedPath, otherDriveStrLength); + } + String newPath; + if (path.length() == driveStrLength) { + newPath = path + other; + } else { + newPath = path + '/' + other; } - return result.toString(); + newPath = + normalizationLevel != OsPathPolicy.NORMALIZED + ? OS.normalize(newPath, normalizationLevel) + : newPath; + return new PathFragment(newPath, driveStrLength); } - /** - * Return true iff none of the segments are either "." or "..". - */ - public boolean isNormalized() { - for (String segment : segments) { - if (segment.equals(".") || segment.equals("..")) { - return false; - } + public PathFragment getChild(String baseName) { + checkBaseName(baseName); + String newPath; + if (path.length() == driveStrLength) { + newPath = path + baseName; + } else { + newPath = path + '/' + baseName; } - return true; - } - - public static boolean isNormalized(String path) { - return PathFragment.create(path).isNormalized(); + return new PathFragment(newPath, driveStrLength); } /** - * Normalizes the path fragment: removes "." and ".." segments if possible - * (if there are too many ".." segments, the resulting PathFragment will still - * start with ".."). + * Returns the parent directory of this {@link PathFragment}. + * + * <p>If this is called on an single directory for a relative path, this returns an empty relative + * path. If it's called on a root (like '/') or the empty string, it returns null. */ - public PathFragment normalize() { - String[] scratchSegments = new String[segments.length]; - int segmentCount = 0; - - for (String segment : segments) { - switch (segment) { - case ".": - // Just discard it - break; - case "..": - if (segmentCount > 0 && !scratchSegments[segmentCount - 1].equals("..")) { - // Remove the last segment, if there is one and it is not "..". This - // means that the resulting PathFragment can still contain ".." - // segments at the beginning. - segmentCount--; - } else { - scratchSegments[segmentCount++] = segment; - } - break; - default: - scratchSegments[segmentCount++] = segment; + @Nullable + public PathFragment getParentDirectory() { + int lastSeparator = path.lastIndexOf(SEPARATOR_CHAR); + + // For absolute paths we need to specially handle when we hit root + // Relative paths can't hit this path as driveStrLength == 0 + if (driveStrLength > 0) { + if (lastSeparator < driveStrLength) { + if (path.length() > driveStrLength) { + String newPath = path.substring(0, driveStrLength); + return new PathFragment(newPath, driveStrLength); + } else { + return null; + } + } + } else { + if (lastSeparator == -1) { + if (!path.isEmpty()) { + return EMPTY_FRAGMENT; + } else { + return null; + } } } - - if (segmentCount == segments.length) { - // Optimization, no new PathFragment needs to be created. - return this; - } - - return HELPER.createAlreadyInterned( - getDriveLetter(), isAbsolute(), subarray(scratchSegments, 0, segmentCount)); + String newPath = path.substring(0, lastSeparator); + return new PathFragment(newPath, driveStrLength); } /** - * Returns the path formed by appending the relative or absolute path fragment - * {@code otherFragment} to this path. + * Returns the {@link PathFragment} relative to the base {@link PathFragment}. * - * <p>If {@code otherFragment} is absolute, the current path will be ignored; - * otherwise, they will be concatenated. This is a purely syntactic operation, - * with no path normalization or I/O performed. + * <p>For example, <code>FilePath.create("foo/bar/wiz").relativeTo(FilePath.create("foo"))</code> + * returns <code>"bar/wiz"</code>. + * + * <p>If the {@link PathFragment} is not a child of the passed {@link PathFragment} an {@link + * IllegalArgumentException} is thrown. In particular, this will happen whenever the two {@link + * PathFragment} instances aren't both absolute or both relative. */ - public PathFragment getRelative(PathFragment otherFragment) { - if (otherFragment == EMPTY_FRAGMENT) { + public PathFragment relativeTo(PathFragment base) { + Preconditions.checkNotNull(base); + if (isAbsolute() != base.isAbsolute()) { + throw new IllegalArgumentException( + "Cannot relativize an absolute and a non-absolute path pair"); + } + String basePath = base.path; + if (!OS.startsWith(path, basePath)) { + throw new IllegalArgumentException( + String.format("Path '%s' is not under '%s', cannot relativize", this, base)); + } + int bn = basePath.length(); + if (bn == 0) { return this; } - - if (otherFragment.isAbsolute()) { - char driveLetter = getDriveLetter(); - return driveLetter == '\0' || otherFragment.getDriveLetter() != '\0' - ? otherFragment - : createAlreadyInterned(driveLetter, true, otherFragment.segments); + if (path.length() == bn) { + return EMPTY_FRAGMENT; + } + final int lastSlashIndex; + if (basePath.charAt(bn - 1) == '/') { + lastSlashIndex = bn - 1; } else { - return create(this, otherFragment); + lastSlashIndex = bn; + } + if (path.charAt(lastSlashIndex) != '/') { + throw new IllegalArgumentException( + String.format("Path '%s' is not under '%s', cannot relativize", this, base)); } + String newPath = path.substring(lastSlashIndex + 1); + return new PathFragment(newPath, 0 /* Always a relative path */); } - /** - * Returns the path formed by appending the relative or absolute string - * {@code path} to this path. - * - * <p>If the given path string is absolute, the current path will be ignored; - * otherwise, they will be concatenated. This is a purely syntactic operation, - * with no path normalization or I/O performed. - */ - public PathFragment getRelative(String path) { - return getRelative(create(path)); + public PathFragment relativeTo(String base) { + return relativeTo(PathFragment.create(base)); } /** - * Returns the path formed by appending the single non-special segment "baseName" to this path. + * Returns whether this path is an ancestor of another path. * - * <p>You should almost always use {@link #getRelative} instead, which has the same performance - * characteristics if the given name is a valid base name, and which also works for '.', '..', and - * strings containing '/'. + * <p>If this == other, true is returned. * - * @throws IllegalArgumentException if {@code baseName} is not a valid base name according to - * {@link #checkBaseName} + * <p>An absolute path can never be an ancestor of a relative path, and vice versa. */ - public PathFragment getChild(String baseName) { - checkBaseName(baseName); - baseName = StringCanonicalizer.intern(baseName); - String[] newSegments = Arrays.copyOf(segments, segments.length + 1); - newSegments[newSegments.length - 1] = baseName; - return createAlreadyInterned(getDriveLetter(), isAbsolute(), newSegments); + public boolean startsWith(PathFragment other) { + Preconditions.checkNotNull(other); + if (other.path.length() > path.length()) { + return false; + } + if (driveStrLength != other.driveStrLength) { + return false; + } + if (!OS.startsWith(path, other.path)) { + return false; + } + return path.length() == other.path.length() + || other.path.length() == driveStrLength + || path.charAt(other.path.length()) == SEPARATOR_CHAR; } /** - * Returns the last segment of this path, or "" for the empty fragment. + * Returns true iff {@code suffix}, considered as a list of path segments, is relative and a + * suffix of {@code this}, or both are absolute and equal. + * + * <p>This is a reflexive, transitive, anti-symmetric relation (i.e. a partial order) */ - public String getBaseName() { - return (segments.length == 0) ? "" : segments[segments.length - 1]; + public boolean endsWith(PathFragment other) { + Preconditions.checkNotNull(other); + if (other.path.length() > path.length()) { + return false; + } + if (other.isAbsolute()) { + return this.equals(other); + } + if (!OS.endsWith(path, other.path)) { + return false; + } + return path.length() == other.path.length() + || other.path.length() == 0 + || path.charAt(path.length() - other.path.length() - 1) == SEPARATOR_CHAR; } - /** - * Returns the file extension of this path, excluding the period, or "" if there is no extension. - */ - public String getFileExtension() { - String baseName = getBaseName(); + public boolean isAbsolute() { + return driveStrLength > 0; + } - int lastIndex = baseName.lastIndexOf('.'); - if (lastIndex != -1) { - return baseName.substring(lastIndex + 1); - } + public static boolean isAbsolute(String path) { + return OS.getDriveStrLength(path) > 0; + } - return ""; + @Override + public String toString() { + return path; } - /** - * Returns a relative path fragment to this path, relative to - * {@code ancestorDirectory}. - * <p> - * <code>x.relativeTo(z) == y</code> implies - * <code>z.getRelative(y) == x</code>. - * <p> - * For example, <code>"foo/bar/wiz".relativeTo("foo")</code> - * returns <code>"bar/wiz"</code>. - */ - public PathFragment relativeTo(PathFragment ancestorDirectory) { - String[] ancestorSegments = ancestorDirectory.segments(); - int ancestorLength = ancestorSegments.length; + @Override + public void repr(SkylarkPrinter printer) { + printer.append(path); + } - if (isAbsolute() != ancestorDirectory.isAbsolute() || segments.length < ancestorLength) { - throw new IllegalArgumentException("PathFragment " + this - + " is not beneath " + ancestorDirectory); + @Override + public boolean equals(Object o) { + if (this == o) { + return true; } - - if (!HELPER.segmentsEqual(ancestorLength, segments, 0, ancestorSegments)) { - throw new IllegalArgumentException( - "PathFragment " + this + " is not beneath " + ancestorDirectory); + if (o == null || getClass() != o.getClass()) { + return false; } - - int length = segments.length - ancestorLength; - String[] resultSegments = subarray(segments, ancestorLength, length); - return createAlreadyInterned('\0', false, resultSegments); + return OS.equals(this.path, ((PathFragment) o).path); } - /** - * Returns a relative path fragment to this path, relative to {@code path}. - */ - public PathFragment relativeTo(String path) { - return relativeTo(create(path)); + @Override + public int hashCode() { + return OS.hash(this.path); } - /** - * Returns a new PathFragment formed by appending {@code newName} to the - * parent directory. Null is returned iff this method is called on a - * PathFragment with zero segments. If {@code newName} designates an absolute path, - * the value of {@code this} will be ignored and a PathFragment corresponding to - * {@code newName} will be returned. This behavior is consistent with the behavior of - * {@link #getRelative(String)}. - */ - public PathFragment replaceName(String newName) { - return segments.length == 0 ? null : getParentDirectory().getRelative(newName); + @Override + public int compareTo(PathFragment o) { + return OS.compare(this.path, o.path); } - /** - * Returns a path representing the parent directory of this path, - * or null iff this Path represents the root of the filesystem. - * - * <p>Note: This method DOES NOT normalize ".." and "." path segments. - */ - public PathFragment getParentDirectory() { - return segments.length == 0 ? null : subFragment(0, segments.length - 1); - } + //////////////////////////////////////////////////////////////////////// /** - * Returns true iff {@code prefix}, considered as a list of path segments, is - * a prefix of {@code this}, and that they are both relative or both - * absolute. + * Returns the number of segments in this path. * - * <p>This is a reflexive, transitive, anti-symmetric relation (i.e. a partial - * order) + * <p>This operation is O(N) on the length of the string. */ - public boolean startsWith(PathFragment prefix) { - if (isAbsolute() != prefix.isAbsolute() - || this.segments.length < prefix.segments.length - || (isAbsolute() && getDriveLetter() != prefix.getDriveLetter())) { - return false; + public int segmentCount() { + int n = path.length(); + int segmentCount = 0; + int i; + for (i = driveStrLength; i < n; ++i) { + if (path.charAt(i) == SEPARATOR_CHAR) { + ++segmentCount; + } } - return HELPER.segmentsEqual(prefix.segments.length, segments, 0, prefix.segments); - } - - /** - * Returns true iff {@code suffix}, considered as a list of path segments, is - * relative and a suffix of {@code this}, or both are absolute and equal. - * - * <p>This is a reflexive, transitive, anti-symmetric relation (i.e. a partial - * order) - */ - public boolean endsWith(PathFragment suffix) { - if ((suffix.isAbsolute() && !suffix.equals(this)) - || this.segments.length < suffix.segments.length) { - return false; + // Add last segment if one exists. + if (i > driveStrLength) { + ++segmentCount; } - int offset = this.segments.length - suffix.segments.length; - return HELPER.segmentsEqual(suffix.segments.length, segments, offset, suffix.segments); - } - - private static String[] subarray(String[] array, int start, int length) { - String[] subarray = new String[length]; - System.arraycopy(array, start, subarray, 0, length); - return subarray; + return segmentCount; } /** - * Returns a new path fragment that is a sub fragment of this one. - * The sub fragment begins at the specified <code>beginIndex</code> segment - * and ends at the segment at index <code>endIndex - 1</code>. Thus the number - * of segments in the new PathFragment is <code>endIndex - beginIndex</code>. + * Returns the specified segment of this path; index must be positive and less than numSegments(). * - * @param beginIndex the beginning index, inclusive. - * @param endIndex the ending index, exclusive. - * @return the specified sub fragment, never null. - * @exception IndexOutOfBoundsException if the - * <code>beginIndex</code> is negative, or - * <code>endIndex</code> is larger than the length of - * this <code>String</code> object, or - * <code>beginIndex</code> is larger than - * <code>endIndex</code>. + * <p>This operation is O(N) on the length of the string. */ - public PathFragment subFragment(int beginIndex, int endIndex) { - int count = segments.length; - if ((beginIndex < 0) || (beginIndex > endIndex) || (endIndex > count)) { - throw new IndexOutOfBoundsException(String.format("path: %s, beginIndex: %d endIndex: %d", - toString(), beginIndex, endIndex)); + public String getSegment(int index) { + int n = path.length(); + int segmentCount = 0; + int i; + for (i = driveStrLength; i < n && segmentCount < index; ++i) { + if (path.charAt(i) == SEPARATOR_CHAR) { + ++segmentCount; + } + } + int starti = i; + for (; i < n; ++i) { + if (path.charAt(i) == SEPARATOR_CHAR) { + break; + } } - boolean isAbsolute = (beginIndex == 0) && isAbsolute(); - return ((beginIndex == 0) && (endIndex == count)) - ? this - : createAlreadyInterned( - getDriveLetter(), isAbsolute, subarray(segments, beginIndex, endIndex - beginIndex)); + // Add last segment if one exists. + if (i > driveStrLength) { + ++segmentCount; + } + int endi = i; + if (index < 0 || index >= segmentCount) { + throw new IllegalArgumentException("Illegal segment index: " + index); + } + return path.substring(starti, endi); } /** * Returns a new path fragment that is a sub fragment of this one. The sub fragment begins at the - * specified <code>beginIndex</code> segment and contains the rest of the original path fragment. + * specified <code>beginIndex</code> segment and ends at the segment at index <code>endIndex - 1 + * </code>. Thus the number of segments in the new PathFragment is <code>endIndex - beginIndex + * </code>. + * + * <p>This operation is O(N) on the length of the string. * * @param beginIndex the beginning index, inclusive. + * @param endIndex the ending index, exclusive. * @return the specified sub fragment, never null. * @exception IndexOutOfBoundsException if the <code>beginIndex</code> is negative, or <code> * endIndex</code> is larger than the length of this <code>String</code> object, or <code> * beginIndex</code> is larger than <code>endIndex</code>. */ - public PathFragment subFragment(int beginIndex) { - return subFragment(beginIndex, segments.length); + public PathFragment subFragment(int beginIndex, int endIndex) { + if (beginIndex < 0 || beginIndex > endIndex) { + throw new IndexOutOfBoundsException( + String.format("path: %s, beginIndex: %d endIndex: %d", toString(), beginIndex, endIndex)); + } + return subFragmentImpl(beginIndex, endIndex); } - /** - * Returns true iff the path represented by this object is absolute. - * - * <p>True both for UNIX-style absolute paths ("/foo") and Windows-style ("C:/foo"). False for a - * Windows-style volume label ("C:") which is actually a relative path. - */ - public abstract boolean isAbsolute(); + public PathFragment subFragment(int beginIndex) { + if (beginIndex < 0) { + throw new IndexOutOfBoundsException( + String.format("path: %s, beginIndex: %d", toString(), beginIndex)); + } + return subFragmentImpl(beginIndex, -1); + } - public static boolean isAbsolute(String path) { - return PathFragment.create(path).isAbsolute(); + private PathFragment subFragmentImpl(int beginIndex, int endIndex) { + int n = path.length(); + int segmentIndex = 0; + int i; + for (i = driveStrLength; i < n && segmentIndex < beginIndex; ++i) { + if (path.charAt(i) == SEPARATOR_CHAR) { + ++segmentIndex; + } + } + int starti = i; + if (segmentIndex < endIndex) { + for (; i < n; ++i) { + if (path.charAt(i) == SEPARATOR_CHAR) { + ++segmentIndex; + if (segmentIndex == endIndex) { + break; + } + } + } + } else if (endIndex == -1) { + i = path.length(); + } + int endi = i; + // Add last segment if one exists for verification + if (i == n && i > driveStrLength) { + ++segmentIndex; + } + if (beginIndex > segmentIndex || endIndex > segmentIndex) { + throw new IndexOutOfBoundsException( + String.format("path: %s, beginIndex: %d endIndex: %d", toString(), beginIndex, endIndex)); + } + // If beginIndex is 0 we include the drive. Very odd semantics. + int driveStrLength = 0; + if (beginIndex == 0) { + starti = 0; + driveStrLength = this.driveStrLength; + endi = Math.max(endi, driveStrLength); + } + return new PathFragment(path.substring(starti, endi), driveStrLength); } /** @@ -647,62 +487,104 @@ public abstract class PathFragment * modified. */ String[] segments() { + int segmentCount = segmentCount(); + String[] segments = new String[segmentCount]; + int segmentIndex = 0; + int nexti = driveStrLength; + int n = path.length(); + for (int i = driveStrLength; i < n; ++i) { + if (path.charAt(i) == SEPARATOR_CHAR) { + segments[segmentIndex++] = path.substring(nexti, i); + nexti = i + 1; + } + } + // Add last segment if one exists. + if (nexti < n) { + segments[segmentIndex] = path.substring(nexti); + } return segments; } + /** + * Returns a list of the segments. + * + * <p>This operation is O(N) on the length of the string. + */ public ImmutableList<String> getSegments() { - return ImmutableList.copyOf(segments); + return ImmutableList.copyOf(segments()); } - public abstract String windowsVolume(); - - /** Return the drive letter or '\0' if not applicable. */ - // TODO(bazel-team): This doesn't need to pollute the PathFragment interface (ditto for - // windowsVolume). - public abstract char getDriveLetter(); + public int getFirstSegment(Set<String> values) { + String[] segments = segments(); + for (int i = 0; i < segments.length; ++i) { + if (values.contains(segments[i])) { + return i; + } + } + return INVALID_SEGMENT; + } - public boolean isEmpty() { - return segments.length == 0; + /** Returns the path string, or '.' if the path is empty. */ + public String getSafePathString() { + return !path.isEmpty() ? path : "."; } /** - * Returns the number of segments in this path. + * Returns the path string using '/' as the name-separator character, but do so in a way + * unambiguously recognizable as path. In other words, return "." for relative and empty paths, + * and prefix relative paths with one segment by "./". + * + * <p>In this way, a shell will always interpret such a string as path (absolute or relative to + * the working directory) and not as command to be searched for in the search path. */ - public int segmentCount() { - return segments.length; + public String getCallablePathString() { + if (isAbsolute()) { + return path; + } else if (path.isEmpty()) { + return "."; + } else if (path.indexOf(SEPARATOR_CHAR) == -1) { + return "." + SEPARATOR_CHAR + path; + } else { + return path; + } } /** - * Returns the specified segment of this path; index must be positive and - * less than numSegments(). + * Returns the file extension of this path, excluding the period, or "" if there is no extension. */ - public String getSegment(int index) { - return segments[index]; + public String getFileExtension() { + int n = path.length(); + for (int i = n - 1; i > driveStrLength; --i) { + char c = path.charAt(i); + if (c == '.') { + return path.substring(i + 1, n); + } else if (c == SEPARATOR_CHAR) { + break; + } + } + return ""; } /** - * Returns the index of the first segment which equals one of the input values - * or {@link PathFragment#INVALID_SEGMENT} if none of the segments match. + * Returns a new PathFragment formed by appending {@code newName} to the parent directory. Null is + * returned iff this method is called on a PathFragment with zero segments. If {@code newName} + * designates an absolute path, the value of {@code this} will be ignored and a PathFragment + * corresponding to {@code newName} will be returned. This behavior is consistent with the + * behavior of {@link #getRelative(String)}. */ - public int getFirstSegment(Set<String> values) { - for (int i = 0; i < segments.length; i++) { - if (values.contains(segments[i])) { - return i; - } - } - return INVALID_SEGMENT; + public PathFragment replaceName(String newName) { + PathFragment parent = getParentDirectory(); + return parent != null ? parent.getRelative(newName) : null; } /** - * Returns true iff this path contains uplevel references "..". + * Returns the drive for an absolute path fragment. + * + * <p>On unix, this will return "/". On Windows it will return the drive letter, like "C:/". */ - public boolean containsUplevelReferences() { - for (String segment : segments) { - if (segment.equals("..")) { - return true; - } - } - return false; + public String getDriveStr() { + Preconditions.checkArgument(isAbsolute()); + return path.substring(0, driveStrLength); } /** @@ -711,60 +593,129 @@ public abstract class PathFragment */ public PathFragment toRelative() { Preconditions.checkArgument(isAbsolute()); - return HELPER.createAlreadyInterned(getDriveLetter(), false, segments); + return new PathFragment(path.substring(driveStrLength), 0); } - @Override - public final int hashCode() { - // We use the hash code caching strategy employed by java.lang.String. There are three subtle - // things going on here: - // - // (1) We use a value of 0 to indicate that the hash code hasn't been computed and cached yet. - // Yes, this means that if the hash code is really 0 then we will "recompute" it each time. But - // this isn't a problem in practice since a hash code of 0 is rare. - // - // (2) Since we have no synchronization, multiple threads can race here thinking they are the - // first one to compute and cache the hash code. - // - // (3) Moreover, since 'hashCode' is non-volatile, the cached hash code value written from one - // thread may not be visible by another. Note that we don't need to worry about multiple - // inefficient reads of 'hashCode' on the same thread since it's non-volatile. - // - // All three of these issues are benign from a correctness perspective; in the end we have no - // overhead from synchronization, at the cost of potentially computing the hash code more than - // once. - if (hashCode == 0) { - hashCode = computeHashCode(); - } - return hashCode; - } - - protected abstract int computeHashCode(); - - @Override - public abstract boolean equals(Object other); + /** + * Returns true if this path contains uplevel references "..". + * + * <p>Since path fragments are normalized, this implies that the uplevel reference is at the start + * of the path fragment. + */ + public boolean containsUplevelReferences() { + // Path is normalized, so any ".." would have to be at the very start + return path.startsWith(".."); + } /** - * Compares two PathFragments using the lexicographical order. + * Returns true if the passed path contains uplevel references ".." or single-dot references "." + * + * <p>This is useful to check a string for normalization before constructing a PathFragment, since + * these are always normalized and will throw uplevel references away. */ - @Override - public int compareTo(PathFragment p2) { - return HELPER.compare(this, p2); + public static boolean isNormalized(String path) { + return isNormalizedImpl(path, true /* lookForSameLevelReferences */); } - @Override - public String toString() { - return getPathString(); + /** + * Returns true if the passed path contains uplevel references "..". + * + * <p>This is useful to check a string for '..' segments before constructing a PathFragment, since + * these are always normalized and will throw uplevel references away. + */ + public static boolean containsUplevelReferences(String path) { + return !isNormalizedImpl(path, false /* lookForSameLevelReferences */); + } + + private enum NormalizedImplState { + Base, /* No particular state, eg. an 'a' or 'L' character */ + Separator, /* We just saw a separator */ + Dot, /* We just saw a dot after a separator */ + DotDot, /* We just saw two dots after a separator */ + } + + private static boolean isNormalizedImpl(String path, boolean lookForSameLevelReferences) { + // Starting state is equivalent to having just seen a separator + NormalizedImplState state = NormalizedImplState.Separator; + int n = path.length(); + for (int i = 0; i < n; ++i) { + char c = path.charAt(i); + boolean isSeparator = OS.isSeparator(c); + switch (state) { + case Base: + if (isSeparator) { + state = NormalizedImplState.Separator; + } else { + state = NormalizedImplState.Base; + } + break; + case Separator: + if (isSeparator) { + state = NormalizedImplState.Separator; + } else if (c == '.') { + state = NormalizedImplState.Dot; + } else { + state = NormalizedImplState.Base; + } + break; + case Dot: + if (isSeparator) { + if (lookForSameLevelReferences) { + // "." segment found + return false; + } + state = NormalizedImplState.Separator; + } else if (c == '.') { + state = NormalizedImplState.DotDot; + } else { + state = NormalizedImplState.Base; + } + break; + case DotDot: + if (isSeparator) { + // ".." segment found + return false; + } else { + state = NormalizedImplState.Base; + } + break; + default: + throw new IllegalStateException("Unhandled state: " + state); + } + } + // The character just after the string is equivalent to a separator + switch (state) { + case Dot: + if (lookForSameLevelReferences) { + // "." segment found + return false; + } + break; + case DotDot: + return false; + default: + } + return true; } - @Override - public void repr(SkylarkPrinter printer) { - printer.append(getPathString()); + /** + * Throws {@link IllegalArgumentException} if {@code paths} contains any paths that are equal to + * {@code startingWithPath} or that are not beneath {@code startingWithPath}. + */ + public static void checkAllPathsAreUnder( + Iterable<PathFragment> paths, PathFragment startingWithPath) { + for (PathFragment path : paths) { + Preconditions.checkArgument( + !path.equals(startingWithPath) && path.startsWith(startingWithPath), + "%s is not beneath %s", + path, + startingWithPath); + } } @Override public String filePathForFileTypeMatcher() { - return getBaseName(); + return path; } @Override @@ -783,25 +734,13 @@ public abstract class PathFragment @Override public void serialize(PathFragment pathFragment, CodedOutputStream codedOut) throws IOException, SerializationException { - codedOut.writeInt32NoTag(pathFragment.getDriveLetter()); - codedOut.writeBoolNoTag(pathFragment.isAbsolute()); - codedOut.writeInt32NoTag(pathFragment.segmentCount()); - for (int i = 0; i < pathFragment.segmentCount(); i++) { - stringCodec.serialize(pathFragment.getSegment(i), codedOut); - } + stringCodec.serialize(pathFragment.getPathString(), codedOut); } @Override public PathFragment deserialize(CodedInputStream codedIn) throws IOException, SerializationException { - char driveLetter = (char) codedIn.readInt32(); - boolean isAbsolute = codedIn.readBool(); - int segmentCount = codedIn.readInt32(); - String[] segments = new String[segmentCount]; - for (int i = 0; i < segmentCount; i++) { - segments[i] = stringCodec.deserialize(codedIn); - } - return PathFragment.create(driveLetter, isAbsolute, segments); + return PathFragment.createAlreadyNormalized(stringCodec.deserialize(codedIn)); } } @@ -816,4 +755,8 @@ public abstract class PathFragment throw new IllegalArgumentException("baseName must not contain a slash: '" + baseName + "'"); } } + + private Object writeReplace() { + return new PathFragmentSerializationProxy(path); + } } |