aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java
diff options
context:
space:
mode:
authorGravatar Lukacs Berki <lberki@google.com>2016-06-22 15:38:11 +0000
committerGravatar Lukacs Berki <lberki@google.com>2016-06-23 11:03:43 +0000
commitf3dc0df65c7832b05df5e642af21401b52afbc9a (patch)
tree1cb4fd7d3e7cbcd5ade67928d382fc595ed5d6b7 /src/main/java
parent0861282d10265201ac3c307668ac553e597d14a8 (diff)
Fix handling of backslash-escaped CRLF line terminators.
The character sequences in the test cases behave the same way Python does. Fixed #1306. -- MOS_MIGRATED_REVID=125568600
Diffstat (limited to 'src/main/java')
-rw-r--r--src/main/java/com/google/devtools/build/lib/syntax/Lexer.java35
1 files changed, 29 insertions, 6 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
index 1b10a667a6..1d60d50e82 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
@@ -349,13 +349,28 @@ public final class Lexer {
// Insert \ and the following character.
// As in Python, it means that a raw string can never end with a single \.
literal.append('\\');
- literal.append(buffer[pos]);
- pos++;
+ if (pos + 1 < buffer.length && buffer[pos] == '\r' && buffer[pos + 1] == '\n') {
+ literal.append("\n");
+ pos += 2;
+ } else if (buffer[pos] == '\r' || buffer[pos] == '\n') {
+ literal.append("\n");
+ pos += 1;
+ } else {
+ literal.append(buffer[pos]);
+ pos += 1;
+ }
break;
}
c = buffer[pos];
pos++;
switch (c) {
+ case '\r':
+ if (pos < buffer.length && buffer[pos] == '\n') {
+ pos += 1;
+ break;
+ } else {
+ break;
+ }
case '\n':
// ignore end of line character
break;
@@ -460,9 +475,15 @@ public final class Lexer {
return t;
case '\\':
if (isRaw) {
- // skip the next character
- pos++;
- break;
+ if (pos + 1 < buffer.length && buffer[pos] == '\r' && buffer[pos + 1] == '\n') {
+ // There was a CRLF after the newline. No shortcut possible, since it needs to be
+ // transformed into a single LF.
+ pos = oldPos + 1;
+ return escapedStringLiteral(quot, true);
+ } else {
+ pos++;
+ break;
+ }
}
// oops, hit an escape, need to start over & build a new string buffer
pos = oldPos + 1;
@@ -757,7 +778,9 @@ public final class Lexer {
case '\\': {
// Backslash character is valid only at the end of a line (or in a string)
if (pos + 1 < buffer.length && buffer[pos] == '\n') {
- pos++; // skip the end of line character
+ pos += 1; // skip the end of line character
+ } else if (pos + 2 < buffer.length && buffer[pos] == '\r' && buffer[pos + 1] == '\n') {
+ pos += 2; // skip the CRLF at the end of line
} else {
addToken(new Token(TokenKind.ILLEGAL, pos - 1, pos, Character.toString(c)));
}