diff options
author | Googler <noreply@google.com> | 2016-06-13 18:00:27 +0000 |
---|---|---|
committer | Yue Gan <yueg@google.com> | 2016-06-14 08:14:33 +0000 |
commit | 0005bbcba9b3564d9e4490725973a68674aa9455 (patch) | |
tree | d54304e8080c73f7079bdeed7252560149541fa9 /src | |
parent | f812a2f8dfebf21645cbb559a29357b627edddde (diff) |
Optimize hotspot in DependencySet.process().
--
MOS_MIGRATED_REVID=124744073
Diffstat (limited to 'src')
3 files changed, 116 insertions, 73 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/util/DependencySet.java b/src/main/java/com/google/devtools/build/lib/util/DependencySet.java index f1d7ee22ed..e68e927654 100644 --- a/src/main/java/com/google/devtools/build/lib/util/DependencySet.java +++ b/src/main/java/com/google/devtools/build/lib/util/DependencySet.java @@ -24,8 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Representation of a set of file dependencies for a given output file. There @@ -49,10 +47,6 @@ import java.util.regex.Pattern; */ public final class DependencySet { - private static final Pattern DOTD_MERGED_LINE_SEPARATOR = Pattern.compile("\\\\[\n\r]+"); - private static final Pattern DOTD_LINE_SEPARATOR = Pattern.compile("[\n\r]+"); - private static final Pattern DOTD_DEP = Pattern.compile("(?:[^\\s\\\\]++|\\\\ |\\\\)+"); - /** * The set of dependent files that this DependencySet embodies. They are all * Path with the same FileSystem A tree set is used to ensure that we @@ -73,7 +67,7 @@ public final class DependencySet { public void setOutputFileName(String outputFileName) { this.outputFileName = outputFileName; } - + /** * Constructs a new empty DependencySet instance. */ @@ -128,74 +122,90 @@ public final class DependencySet { * them reach into hundreds of kilobytes. */ public DependencySet process(byte[] content) throws IOException { - if (content.length > 0 && content[content.length - 1] != '\n') { + final int n = content.length; + if (n > 0 && content[n - 1] != '\n') { throw new IOException("File does not end in a newline"); + // From now on, we can safely peek ahead one character when not at a newline. } - // true if there is a CR in the input. - boolean cr = content.length > 0 && content[0] == '\r'; - // true if there is more than one line in the input, not counting \-wrapped lines. - boolean multiline = false; - - byte prevByte = ' '; - for (int i = 1; i < content.length; i++) { - byte b = content[i]; - if (cr || b == '\r') { - // CR found, abort since our little loop here does not deal with CR/LFs. - cr = true; - break; - } - if (b == '\n') { - // Merge lines wrapped using backslashes. - if (prevByte == '\\') { - content[i] = ' '; - content[i - 1] = ' '; - } else { - multiline = true; - } - } - prevByte = b; - } - - if (!cr && content.length > 0 && content[content.length - 1] == '\n') { - content[content.length - 1] = ' '; - } + // Our write position in content[]; we use the prefix as working space to build strings. + int w = 0; + // Have we seen a leading "mumble.o:" on this line yet? If not, we ignore + // any dependencies we parse. This is bug-for-bug compatibility with our + // MSVC wrapper, which generates invalid .d files :( + boolean sawTarget = false; + for (int r = 0; r < n; ) { + final byte c = content[r++]; + switch (c) { + + case ' ': + // If we haven't yet seen the colon delimiting the target name, + // keep scanning. We do this to cope with "foo.o : \" which is + // valid Makefile syntax produced by the cuda compiler. + if (sawTarget && w > 0) { + addDependency(new String(content, 0, w, StandardCharsets.UTF_8)); + w = 0; + } + continue; + + case '\r': + // Ignore, should be followed by a \n. + continue; + + case '\n': + // This closes a filename. + // (Arguably if !sawTarget && w > 0 we should report an error, + // as that suggests the .d file is malformed.) + if (sawTarget && w > 0) { + addDependency(new String(content, 0, w, StandardCharsets.UTF_8)); + } + w = 0; + sawTarget = false; // reset for new line + continue; + + case ':': + // Normally this indicates the target name, but it might be part of a + // filename on Windows. Peek ahead at the next character. + switch (content[r]) { + case ' ': + case '\n': + case '\r': + if (w > 0) { + outputFileName = new String(content, 0, w, StandardCharsets.UTF_8); + w = 0; + sawTarget = true; + } + continue; + default: + content[w++] = c; // copy a colon to filename + continue; + } + + case '\\': + // Peek ahead at the next character. + switch (content[r]) { + // Backslashes are taken literally except when followed by whitespace. + // See the Windows tests for some of the nonsense we have to tolerate. + case ' ': + content[w++] = ' '; // copy a space to the filename + ++r; // skip over the space + continue; + case '\n': + ++r; // skip over the newline + continue; + case '\r': + // One backslash can escape \r\n, so peek one more character. + if (content[++r] == '\n') { + ++r; + } + continue; + default: + content[w++] = c; // copy a backlash to the filename + continue; + } + + default: + content[w++] = c; - String s = new String(content, StandardCharsets.UTF_8); - if (cr) { - s = DOTD_MERGED_LINE_SEPARATOR.matcher(s).replaceAll(" ").trim(); - multiline = true; - } - return process(s, multiline); - } - - private DependencySet process(String contents, boolean multiline) { - String[] lines; - if (!multiline) { - // Microoptimization: skip the usually unnecessary expensive-ish splitting step if there is - // only one target. This saves about 20% of CPU time. - lines = new String[] { contents }; - } else { - lines = DOTD_LINE_SEPARATOR.split(contents); - } - - for (String line : lines) { - // Split off output file name. - int pos = line.indexOf(':'); - if (pos == -1) { - continue; - } - outputFileName = line.substring(0, pos); - - String deps = line.substring(pos + 1); - - Matcher m = DOTD_DEP.matcher(deps); - while (m.find()) { - String token = m.group(); - // Process escaped spaces. - if (token.contains("\\ ")) { - token = token.replace("\\ ", " "); - } - addDependency(token); } } return this; diff --git a/src/test/java/com/google/devtools/build/lib/util/DependencySetTest.java b/src/test/java/com/google/devtools/build/lib/util/DependencySetTest.java index fd33d34bc6..ad0590186f 100644 --- a/src/test/java/com/google/devtools/build/lib/util/DependencySetTest.java +++ b/src/test/java/com/google/devtools/build/lib/util/DependencySetTest.java @@ -191,6 +191,39 @@ public class DependencySetTest { } } + /* + * Test compatibility with --config=nvcc, which writes an extra space before the colon. + */ + @Test + public void dotDParser_spaceBeforeColon() throws Exception { + Path file1 = fileSystem.getPath("/usr/local/blah/blah/genhello/hello.cc"); + Path file2 = fileSystem.getPath("/usr/local/blah/blah/genhello/hello.h"); + String filename = "hello.o"; + Path dotd = scratch.file("/tmp/foo.d", + filename + " : \\", + " " + file1 + " \\", + " " + file2 + " "); + DependencySet depset = newDependencySet().read(dotd); + assertThat(depset.getDependencies()).containsExactlyElementsIn(Sets.newHashSet(file1, file2)); + assertEquals(depset.getOutputFileName(), filename); + } + + /* + * Bug-for-bug compatibility with --config=msvc, which writes malformed .d files. + */ + @Test + public void dotDParser_missingBackslash() throws Exception { + Path file1 = fileSystem.getPath("/usr/local/blah/blah/genhello/hello.cc"); + Path file2 = fileSystem.getPath("/usr/local/blah/blah/genhello/hello.h"); + String filename = "hello.o"; + Path dotd = scratch.file("/tmp/foo.d", + filename + ": ", + " " + file1 + " \\", + " " + file2 + " "); + DependencySet depset = newDependencySet().read(dotd); + assertThat(depset.getDependencies()).isEmpty(); + } + @Test public void writeSet() throws Exception { Path file1 = fileSystem.getPath("/usr/local/blah/blah/genhello/hello.cc"); diff --git a/src/test/java/com/google/devtools/build/lib/util/DependencySetWindowsTest.java b/src/test/java/com/google/devtools/build/lib/util/DependencySetWindowsTest.java index 129692bae8..15e32e0cff 100644 --- a/src/test/java/com/google/devtools/build/lib/util/DependencySetWindowsTest.java +++ b/src/test/java/com/google/devtools/build/lib/util/DependencySetWindowsTest.java @@ -90,7 +90,7 @@ public class DependencySetWindowsTest { } @Test - public void dotDParser_caseInsenstivie() throws Exception { + public void dotDParser_caseInsensitive() throws Exception { Path file1 = fileSystem.getPath("C:/blah/blah/genhello/hello.cc"); Path file2 = fileSystem.getPath("C:/blah/blah/genhello/hello.h"); Path file2DiffCase = fileSystem.getPath("C:/Blah/blah/Genhello/hello.h"); |