aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/syntax
diff options
context:
space:
mode:
authorGravatar laurentlb <laurentlb@google.com>2018-06-05 09:42:59 -0700
committerGravatar Copybara-Service <copybara-piper@google.com>2018-06-05 09:45:02 -0700
commitfc23edc368244849c59636d1e2ae6c375bebf262 (patch)
tree2d742b93785efbd2f1fbe21de03972edd4838d7b /src/main/java/com/google/devtools/build/lib/syntax
parentc04ce79e4f719df54e3564f58a2ea26529ac1494 (diff)
Stop allocating new tokens in the lexer
There's only one Token and it gets reused. This reduces the memory usage of the lexer. Parsing time seems to be 5%-10% faster with this change on a large file. This makes little difference on the overall performance of Bazel though. RELNOTES: None. PiperOrigin-RevId: 199310860
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/syntax')
-rw-r--r--src/main/java/com/google/devtools/build/lib/syntax/Lexer.java152
-rw-r--r--src/main/java/com/google/devtools/build/lib/syntax/Token.java10
2 files changed, 94 insertions, 68 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
index 91384f06d4..ed7bd4c17e 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
@@ -81,8 +81,12 @@ public final class Lexer {
// bottom.
private final Stack<Integer> indentStack = new Stack<>();
- /** Token to return */
- private Token token;
+ /**
+ * Token to return. This token is mutated in-place. Its kind is set to
+ * null to indicate the intermediate state, where the new token has not
+ * been scanned yet.
+ */
+ private final Token token;
private final List<Comment> comments;
@@ -112,6 +116,7 @@ public final class Lexer {
this.checkIndentation = true;
this.comments = new ArrayList<>();
this.dents = 0;
+ this.token = new Token(null, -1, -1);
indentStack.push(0);
}
@@ -146,9 +151,10 @@ public final class Lexer {
* after EOF has been returned.
*/
public Token nextToken() {
- boolean afterNewline = token != null && token.kind == TokenKind.NEWLINE;
- token = null;
+ boolean afterNewline = token.kind == TokenKind.NEWLINE;
+ token.kind = null;
tokenize();
+ Preconditions.checkState(token.kind != null);
// Like Python, always end with a NEWLINE token, even if no '\n' in input:
if (token.kind == TokenKind.EOF && !afterNewline) {
@@ -226,9 +232,20 @@ public final class Lexer {
}
/** invariant: symbol positions are half-open intervals. */
- private void setToken(Token s) {
- Preconditions.checkState(token == null);
- token = s;
+ private void setToken(TokenKind kind, int left, int right) {
+ Preconditions.checkState(token.kind == null);
+ token.kind = kind;
+ token.left = left;
+ token.right = right;
+ token.value = null;
+ }
+
+ private void setToken(TokenKind kind, int left, int right, Object value) {
+ Preconditions.checkState(token.kind == null);
+ token.kind = kind;
+ token.left = left;
+ token.right = right;
+ token.value = value;
}
/**
@@ -241,7 +258,7 @@ public final class Lexer {
newlineInsideExpression(); // in an expression: ignore space
} else {
checkIndentation = true;
- setToken(new Token(TokenKind.NEWLINE, pos - 1, pos));
+ setToken(TokenKind.NEWLINE, pos - 1, pos);
}
}
@@ -330,7 +347,7 @@ public final class Lexer {
*
* @return the string-literal token.
*/
- private Token escapedStringLiteral(char quot, boolean isRaw) {
+ private void escapedStringLiteral(char quot, boolean isRaw) {
int literalStartPos = isRaw ? pos - 2 : pos - 1;
boolean inTriplequote = skipTripleQuote(quot);
// more expensive second choice that expands escaped into a buffer
@@ -345,12 +362,14 @@ public final class Lexer {
break;
} else {
error("unterminated string literal at eol", literalStartPos, pos);
- return new Token(TokenKind.STRING, literalStartPos, pos, literal.toString());
+ setToken(TokenKind.STRING, literalStartPos, pos, literal.toString());
+ return;
}
case '\\':
if (pos == buffer.length) {
error("unterminated string literal at eof", literalStartPos, pos);
- return new Token(TokenKind.STRING, literalStartPos, pos, literal.toString());
+ setToken(TokenKind.STRING, literalStartPos, pos, literal.toString());
+ return;
}
if (isRaw) {
// Insert \ and the following character.
@@ -454,7 +473,8 @@ public final class Lexer {
literal.append(c);
} else {
// Matching close-delimiter, all done.
- return new Token(TokenKind.STRING, literalStartPos, pos, literal.toString());
+ setToken(TokenKind.STRING, literalStartPos, pos, literal.toString());
+ return;
}
break;
default:
@@ -463,7 +483,7 @@ public final class Lexer {
}
}
error("unterminated string literal at eof", literalStartPos, pos);
- return new Token(TokenKind.STRING, literalStartPos, pos, literal.toString());
+ setToken(TokenKind.STRING, literalStartPos, pos, literal.toString());
}
/**
@@ -477,14 +497,15 @@ public final class Lexer {
* @param isRaw if true, do not escape the string.
* @return the string-literal token.
*/
- private Token stringLiteral(char quot, boolean isRaw) {
+ private void stringLiteral(char quot, boolean isRaw) {
int literalStartPos = isRaw ? pos - 2 : pos - 1;
int contentStartPos = pos;
// Don't even attempt to parse triple-quotes here.
if (skipTripleQuote(quot)) {
pos -= 2;
- return escapedStringLiteral(quot, isRaw);
+ escapedStringLiteral(quot, isRaw);
+ return;
}
// first quick optimistic scan for a simple non-escaped string
@@ -493,17 +514,16 @@ public final class Lexer {
switch (c) {
case '\n':
error("unterminated string literal at eol", literalStartPos, pos);
- Token t =
- new Token(
- TokenKind.STRING, literalStartPos, pos, bufferSlice(contentStartPos, pos - 1));
- return t;
+ setToken(TokenKind.STRING, literalStartPos, pos, bufferSlice(contentStartPos, pos - 1));
+ return;
case '\\':
if (isRaw) {
if (lookaheadIs(0, '\r') && lookaheadIs(1, '\n')) {
// There was a CRLF after the newline. No shortcut possible, since it needs to be
// transformed into a single LF.
pos = contentStartPos;
- return escapedStringLiteral(quot, true);
+ escapedStringLiteral(quot, true);
+ return;
} else {
pos++;
break;
@@ -511,13 +531,15 @@ public final class Lexer {
}
// oops, hit an escape, need to start over & build a new string buffer
pos = contentStartPos;
- return escapedStringLiteral(quot, false);
+ escapedStringLiteral(quot, false);
+ return;
case '\'':
case '"':
if (c == quot) {
// close-quote, all done.
- return new Token(
+ setToken(
TokenKind.STRING, literalStartPos, pos, bufferSlice(contentStartPos, pos - 1));
+ return;
}
break;
default: // fall out
@@ -531,7 +553,7 @@ public final class Lexer {
}
error("unterminated string literal at eof", literalStartPos, pos);
- return new Token(TokenKind.STRING, literalStartPos, pos, bufferSlice(contentStartPos, pos));
+ setToken(TokenKind.STRING, literalStartPos, pos, bufferSlice(contentStartPos, pos));
}
private static final Map<String, TokenKind> keywordMap = new HashMap<>();
@@ -578,13 +600,15 @@ public final class Lexer {
*
* @return the identifier or keyword token.
*/
- private Token identifierOrKeyword() {
+ private void identifierOrKeyword() {
int oldPos = pos - 1;
String id = scanIdentifier();
TokenKind kind = keywordMap.get(id);
- return (kind == null)
- ? new Token(TokenKind.IDENTIFIER, oldPos, pos, id)
- : new Token(kind, oldPos, pos, null);
+ if (kind == null) {
+ setToken(TokenKind.IDENTIFIER, oldPos, pos, id);
+ } else {
+ setToken(kind, oldPos, pos, null);
+ }
}
private String scanIdentifier() {
@@ -649,10 +673,8 @@ public final class Lexer {
*
* <p>ON ENTRY: 'pos' is 1 + the index of the first char in the literal.
* ON EXIT: 'pos' is 1 + the index of the last char in the literal.
- *
- * @return the integer token.
*/
- private Token integer() {
+ private void integer() {
int oldPos = pos - 1;
String literal = scanInteger();
@@ -679,7 +701,7 @@ public final class Lexer {
error("invalid base-" + radix + " integer constant: " + literal);
}
- return new Token(TokenKind.INT, oldPos, pos, value);
+ setToken(TokenKind.INT, oldPos, pos, value);
}
/**
@@ -701,7 +723,7 @@ public final class Lexer {
if (tok == null) {
return false;
} else {
- setToken(new Token(tok, pos, pos + 2));
+ setToken(tok, pos, pos + 2);
return true;
}
}
@@ -712,8 +734,8 @@ public final class Lexer {
}
/**
- * Performs tokenization of the character buffer of file contents provided to the constructor.
- * Advances pos and sets the token variable.
+ * Performs tokenization of the character buffer of file contents provided to the constructor. At
+ * least one token will be added to the tokens queue.
*/
private void tokenize() {
if (checkIndentation) {
@@ -725,10 +747,10 @@ public final class Lexer {
if (dents != 0) {
if (dents < 0) {
dents++;
- setToken(new Token(TokenKind.OUTDENT, pos - 1, pos));
+ setToken(TokenKind.OUTDENT, pos - 1, pos);
} else {
dents--;
- setToken(new Token(TokenKind.INDENT, pos - 1, pos));
+ setToken(TokenKind.INDENT, pos - 1, pos);
}
return;
}
@@ -742,94 +764,94 @@ public final class Lexer {
pos++;
switch (c) {
case '{': {
- setToken(new Token(TokenKind.LBRACE, pos - 1, pos));
+ setToken(TokenKind.LBRACE, pos - 1, pos);
openParenStackDepth++;
break;
}
case '}': {
- setToken(new Token(TokenKind.RBRACE, pos - 1, pos));
+ setToken(TokenKind.RBRACE, pos - 1, pos);
popParen();
break;
}
case '(': {
- setToken(new Token(TokenKind.LPAREN, pos - 1, pos));
+ setToken(TokenKind.LPAREN, pos - 1, pos);
openParenStackDepth++;
break;
}
case ')': {
- setToken(new Token(TokenKind.RPAREN, pos - 1, pos));
+ setToken(TokenKind.RPAREN, pos - 1, pos);
popParen();
break;
}
case '[': {
- setToken(new Token(TokenKind.LBRACKET, pos - 1, pos));
+ setToken(TokenKind.LBRACKET, pos - 1, pos);
openParenStackDepth++;
break;
}
case ']': {
- setToken(new Token(TokenKind.RBRACKET, pos - 1, pos));
+ setToken(TokenKind.RBRACKET, pos - 1, pos);
popParen();
break;
}
case '>': {
- setToken(new Token(TokenKind.GREATER, pos - 1, pos));
+ setToken(TokenKind.GREATER, pos - 1, pos);
break;
}
case '<': {
- setToken(new Token(TokenKind.LESS, pos - 1, pos));
+ setToken(TokenKind.LESS, pos - 1, pos);
break;
}
case ':': {
- setToken(new Token(TokenKind.COLON, pos - 1, pos));
+ setToken(TokenKind.COLON, pos - 1, pos);
break;
}
case ',': {
- setToken(new Token(TokenKind.COMMA, pos - 1, pos));
+ setToken(TokenKind.COMMA, pos - 1, pos);
break;
}
case '+': {
- setToken(new Token(TokenKind.PLUS, pos - 1, pos));
+ setToken(TokenKind.PLUS, pos - 1, pos);
break;
}
case '-': {
- setToken(new Token(TokenKind.MINUS, pos - 1, pos));
+ setToken(TokenKind.MINUS, pos - 1, pos);
break;
}
case '|': {
- setToken(new Token(TokenKind.PIPE, pos - 1, pos));
+ setToken(TokenKind.PIPE, pos - 1, pos);
break;
}
case '=': {
- setToken(new Token(TokenKind.EQUALS, pos - 1, pos));
+ setToken(TokenKind.EQUALS, pos - 1, pos);
break;
}
case '%': {
- setToken(new Token(TokenKind.PERCENT, pos - 1, pos));
+ setToken(TokenKind.PERCENT, pos - 1, pos);
break;
}
case '/': {
if (lookaheadIs(0, '/') && lookaheadIs(1, '=')) {
- setToken(new Token(TokenKind.SLASH_SLASH_EQUALS, pos - 1, pos + 2));
+ setToken(TokenKind.SLASH_SLASH_EQUALS, pos - 1, pos + 2);
pos += 2;
} else if (lookaheadIs(0, '/')) {
- setToken(new Token(TokenKind.SLASH_SLASH, pos - 1, pos + 1));
+ setToken(TokenKind.SLASH_SLASH, pos - 1, pos + 1);
pos += 1;
} else {
// /= is handled by tokenizeTwoChars.
- setToken(new Token(TokenKind.SLASH, pos - 1, pos));
+ setToken(TokenKind.SLASH, pos - 1, pos);
}
break;
}
case ';': {
- setToken(new Token(TokenKind.SEMI, pos - 1, pos));
+ setToken(TokenKind.SEMI, pos - 1, pos);
break;
}
case '.': {
- setToken(new Token(TokenKind.DOT, pos - 1, pos));
+ setToken(TokenKind.DOT, pos - 1, pos);
break;
}
case '*': {
- setToken(new Token(TokenKind.STAR, pos - 1, pos));
+ setToken(TokenKind.STAR, pos - 1, pos);
break;
}
case ' ':
@@ -845,7 +867,7 @@ public final class Lexer {
} else if (lookaheadIs(0, '\r') && lookaheadIs(1, '\n')) {
pos += 2; // skip the CRLF at the end of line
} else {
- setToken(new Token(TokenKind.ILLEGAL, pos - 1, pos, Character.toString(c)));
+ setToken(TokenKind.ILLEGAL, pos - 1, pos, Character.toString(c));
}
break;
}
@@ -868,7 +890,7 @@ public final class Lexer {
}
case '\'':
case '\"': {
- setToken(stringLiteral(c, false));
+ stringLiteral(c, false);
break;
}
default: {
@@ -877,27 +899,27 @@ public final class Lexer {
&& (buffer[pos] == '\'' || buffer[pos] == '\"')) {
c = buffer[pos];
pos++;
- setToken(stringLiteral(c, true));
+ stringLiteral(c, true);
break;
}
if (c >= '0' && c <= '9') {
- setToken(integer());
+ integer();
} else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
- setToken(identifierOrKeyword());
+ identifierOrKeyword();
} else {
error("invalid character: '" + c + "'");
}
break;
} // default
} // switch
- if (token != null) { // stop here if we scanned a token
+ if (token.kind != null) { // stop here if we scanned a token
return;
}
} // while
if (indentStack.size() > 1) { // top of stack is always zero
- setToken(new Token(TokenKind.NEWLINE, pos - 1, pos));
+ setToken(TokenKind.NEWLINE, pos - 1, pos);
while (indentStack.size() > 1) {
indentStack.pop();
dents--;
@@ -905,7 +927,7 @@ public final class Lexer {
return;
}
- setToken(new Token(TokenKind.EOF, pos, pos));
+ setToken(TokenKind.EOF, pos, pos);
}
/**
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Token.java b/src/main/java/com/google/devtools/build/lib/syntax/Token.java
index 3c99df5f8c..733b5387d0 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Token.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Token.java
@@ -22,14 +22,14 @@ import javax.annotation.Nullable;
class Token {
TokenKind kind;
- final int left;
- final int right;
+ int left;
+ int right;
/**
* value is an Integer if the kind is INT.
* It is a String if the kind is STRING, IDENTIFIER, or COMMENT.
* It is null otherwise.
*/
- @Nullable final Object value;
+ @Nullable Object value;
Token(TokenKind kind, int left, int right) {
this(kind, left, right, null);
@@ -42,6 +42,10 @@ class Token {
this.value = value;
}
+ Token copy() {
+ return new Token(kind, left, right, value);
+ }
+
/**
* Constructs an easy-to-read string representation of token, suitable for use
* in user error messages.