diff options
author | Lukacs Berki <lberki@google.com> | 2016-06-22 15:38:11 +0000 |
---|---|---|
committer | Lukacs Berki <lberki@google.com> | 2016-06-23 11:03:43 +0000 |
commit | f3dc0df65c7832b05df5e642af21401b52afbc9a (patch) | |
tree | 1cb4fd7d3e7cbcd5ade67928d382fc595ed5d6b7 /src/main/java | |
parent | 0861282d10265201ac3c307668ac553e597d14a8 (diff) |
Fix handling of backslash-escaped CRLF line terminators.
The character sequences in the test cases behave the same way Python does.
Fixed #1306.
--
MOS_MIGRATED_REVID=125568600
Diffstat (limited to 'src/main/java')
-rw-r--r-- | src/main/java/com/google/devtools/build/lib/syntax/Lexer.java | 35 |
1 files changed, 29 insertions, 6 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java index 1b10a667a6..1d60d50e82 100644 --- a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java +++ b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java @@ -349,13 +349,28 @@ public final class Lexer { // Insert \ and the following character. // As in Python, it means that a raw string can never end with a single \. literal.append('\\'); - literal.append(buffer[pos]); - pos++; + if (pos + 1 < buffer.length && buffer[pos] == '\r' && buffer[pos + 1] == '\n') { + literal.append("\n"); + pos += 2; + } else if (buffer[pos] == '\r' || buffer[pos] == '\n') { + literal.append("\n"); + pos += 1; + } else { + literal.append(buffer[pos]); + pos += 1; + } break; } c = buffer[pos]; pos++; switch (c) { + case '\r': + if (pos < buffer.length && buffer[pos] == '\n') { + pos += 1; + break; + } else { + break; + } case '\n': // ignore end of line character break; @@ -460,9 +475,15 @@ public final class Lexer { return t; case '\\': if (isRaw) { - // skip the next character - pos++; - break; + if (pos + 1 < buffer.length && buffer[pos] == '\r' && buffer[pos + 1] == '\n') { + // There was a CRLF after the newline. No shortcut possible, since it needs to be + // transformed into a single LF. + pos = oldPos + 1; + return escapedStringLiteral(quot, true); + } else { + pos++; + break; + } } // oops, hit an escape, need to start over & build a new string buffer pos = oldPos + 1; @@ -757,7 +778,9 @@ public final class Lexer { case '\\': { // Backslash character is valid only at the end of a line (or in a string) if (pos + 1 < buffer.length && buffer[pos] == '\n') { - pos++; // skip the end of line character + pos += 1; // skip the end of line character + } else if (pos + 2 < buffer.length && buffer[pos] == '\r' && buffer[pos + 1] == '\n') { + pos += 2; // skip the CRLF at the end of line } else { addToken(new Token(TokenKind.ILLEGAL, pos - 1, pos, Character.toString(c))); } |