aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/main/java/com/google/devtools/build/lib/syntax/Lexer.java98
-rw-r--r--src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java11
2 files changed, 62 insertions, 47 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
index 01f84bd23b..b1b63c822d 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
@@ -313,9 +313,8 @@ public final class Lexer {
* @return the string-literal token.
*/
private Token escapedStringLiteral(char quot, boolean isRaw) {
+ int literalStartPos = isRaw ? pos - 2 : pos - 1;
boolean inTriplequote = skipTripleQuote(quot);
-
- int oldPos = pos - 1;
// more expensive second choice that expands escaped into a buffer
StringBuilder literal = new StringBuilder();
while (pos < buffer.length) {
@@ -327,14 +326,14 @@ public final class Lexer {
literal.append(c);
break;
} else {
- error("unterminated string literal at eol", oldPos, pos);
+ error("unterminated string literal at eol", literalStartPos, pos);
newline();
- return new Token(TokenKind.STRING, oldPos, pos, literal.toString());
+ return new Token(TokenKind.STRING, literalStartPos, pos, literal.toString());
}
case '\\':
if (pos == buffer.length) {
- error("unterminated string literal at eof", oldPos, pos);
- return new Token(TokenKind.STRING, oldPos, pos, literal.toString());
+ error("unterminated string literal at eof", literalStartPos, pos);
+ return new Token(TokenKind.STRING, literalStartPos, pos, literal.toString());
}
if (isRaw) {
// Insert \ and the following character.
@@ -383,32 +382,46 @@ public final class Lexer {
case '"':
literal.append('"');
break;
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7': { // octal escape
- int octal = c - '0';
- if (pos < buffer.length) {
- c = buffer[pos];
- if (c >= '0' && c <= '7') {
- pos++;
- octal = (octal << 3) | (c - '0');
- if (pos < buffer.length) {
- c = buffer[pos];
- if (c >= '0' && c <= '7') {
- pos++;
- octal = (octal << 3) | (c - '0');
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ { // octal escape
+ int octal = c - '0';
+ if (pos < buffer.length) {
+ c = buffer[pos];
+ if (c >= '0' && c <= '7') {
+ pos++;
+ octal = (octal << 3) | (c - '0');
+ if (pos < buffer.length) {
+ c = buffer[pos];
+ if (c >= '0' && c <= '7') {
+ pos++;
+ octal = (octal << 3) | (c - '0');
+ }
}
}
}
+ if (octal > 0xff) {
+ error("octal escape sequence out of range (maximum is \\377)");
+ }
+ literal.append((char) (octal & 0xff));
+ break;
}
- if (octal > 0xff) {
- error("octal escape sequence out of range (maximum is \\377)");
- }
- literal.append((char) (octal & 0xff));
- break;
- }
- case 'a': case 'b': case 'f': case 'N': case 'u': case 'U': case 'v': case 'x':
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'N':
+ case 'u':
+ case 'U':
+ case 'v':
+ case 'x':
// exists in Python but not implemented in Blaze => error
- error("escape sequence not implemented: \\" + c, oldPos, pos);
+ error("escape sequence not implemented: \\" + c, literalStartPos, pos);
break;
default:
// unknown char escape => "\literal"
@@ -419,13 +432,12 @@ public final class Lexer {
break;
case '\'':
case '"':
- if (c != quot
- || (inTriplequote && !skipTripleQuote(quot))) {
+ if (c != quot || (inTriplequote && !skipTripleQuote(quot))) {
// Non-matching quote, treat it like a regular char.
literal.append(c);
} else {
// Matching close-delimiter, all done.
- return new Token(TokenKind.STRING, oldPos, pos, literal.toString());
+ return new Token(TokenKind.STRING, literalStartPos, pos, literal.toString());
}
break;
default:
@@ -433,8 +445,8 @@ public final class Lexer {
break;
}
}
- error("unterminated string literal at eof", oldPos, pos);
- return new Token(TokenKind.STRING, oldPos, pos, literal.toString());
+ error("unterminated string literal at eof", literalStartPos, pos);
+ return new Token(TokenKind.STRING, literalStartPos, pos, literal.toString());
}
/**
@@ -449,7 +461,8 @@ public final class Lexer {
* @return the string-literal token.
*/
private Token stringLiteral(char quot, boolean isRaw) {
- int oldPos = pos - 1;
+ int literalStartPos = isRaw ? pos - 2 : pos - 1;
+ int contentStartPos = pos;
// Don't even attempt to parse triple-quotes here.
if (skipTripleQuote(quot)) {
@@ -462,9 +475,10 @@ public final class Lexer {
char c = buffer[pos++];
switch (c) {
case '\n':
- error("unterminated string literal at eol", oldPos, pos);
- Token t = new Token(TokenKind.STRING, oldPos, pos,
- bufferSlice(oldPos + 1, pos - 1));
+ error("unterminated string literal at eol", literalStartPos, pos);
+ Token t =
+ new Token(
+ TokenKind.STRING, literalStartPos, pos, bufferSlice(contentStartPos, pos - 1));
newline();
return t;
case '\\':
@@ -472,7 +486,7 @@ public final class Lexer {
if (lookaheadIs(0, '\r') && lookaheadIs(1, '\n')) {
// There was a CRLF after the newline. No shortcut possible, since it needs to be
// transformed into a single LF.
- pos = oldPos + 1;
+ pos = contentStartPos;
return escapedStringLiteral(quot, true);
} else {
pos++;
@@ -480,13 +494,14 @@ public final class Lexer {
}
}
// oops, hit an escape, need to start over & build a new string buffer
- pos = oldPos + 1;
+ pos = contentStartPos;
return escapedStringLiteral(quot, false);
case '\'':
case '"':
if (c == quot) {
// close-quote, all done.
- return new Token(TokenKind.STRING, oldPos, pos, bufferSlice(oldPos + 1, pos - 1));
+ return new Token(
+ TokenKind.STRING, literalStartPos, pos, bufferSlice(contentStartPos, pos - 1));
}
break;
default: // fall out
@@ -499,9 +514,8 @@ public final class Lexer {
pos = buffer.length;
}
- error("unterminated string literal at eof", oldPos, pos);
- return new Token(TokenKind.STRING, oldPos, pos,
- bufferSlice(oldPos + 1, pos));
+ error("unterminated string literal at eof", literalStartPos, pos);
+ return new Token(TokenKind.STRING, literalStartPos, pos, bufferSlice(contentStartPos, pos));
}
private static final Map<String, TokenKind> keywordMap = new HashMap<>();
diff --git a/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
index 1c4abd240e..8d9318f071 100644
--- a/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
+++ b/src/test/java/com/google/devtools/build/lib/syntax/LexerTest.java
@@ -447,11 +447,12 @@ public class LexerTest {
@Test
public void testTokenPositions() throws Exception {
- assertThat(positions(tokens("foo(bar, {1: 'quux'})"))).isEqualTo(
- // foo ( bar , { 1 :
- "[0,3) [3,4) [4,7) [7,8) [9,10) [10,11) [11,12)"
- // 'quux' } ) NEWLINE EOF
- + " [13,19) [19,20) [20,21) [20,21) [21,21)");
+ assertThat(positions(tokens("foo(bar, {1: 'quux'}, \"\"\"b\"\"\", r\"\")")))
+ .isEqualTo(
+ // foo ( bar , { 1 :
+ "[0,3) [3,4) [4,7) [7,8) [9,10) [10,11) [11,12)"
+ // 'quux' } , """b""" , r"" ) NEWLINE EOF
+ + " [13,19) [19,20) [20,21) [22,29) [29,30) [31,34) [34,35) [34,35) [35,35)");
}
@Test