diff options
author | 2018-05-24 07:32:52 -0700 | |
---|---|---|
committer | 2018-05-24 07:33:48 -0700 | |
commit | 17f8d4e5a36f5c4bd020ce9163f5b1db62679e2c (patch) | |
tree | 9b065d3c27259a5da38563fcda505c3f7002275c /src/main/java/com/google/devtools/build | |
parent | 2a6051b0c74ce59e30522fbd509ccbb460289df7 (diff) |
Reject files when the first line is indented.
A bug in the lexer ignored indentation on the first line of a file. This now
causes an error.
Also, remove the COMMENT token from the lexer. Comments are now accessed separately. This will allow further optimizations in the lexer. It also aligns the code a bit more with the Go implementation.
RELNOTES[INC]:
Indentation on the first line of a file was previously ignored. This is now fixed.
PiperOrigin-RevId: 197889775
Diffstat (limited to 'src/main/java/com/google/devtools/build')
3 files changed, 32 insertions, 25 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java index 5ecae12c6e..a50a0a0049 100644 --- a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java +++ b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java @@ -24,7 +24,9 @@ import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec; import com.google.devtools.build.lib.util.Pair; import com.google.devtools.build.lib.vfs.PathFragment; import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Stack; @@ -87,11 +89,18 @@ public final class Lexer { /** Last Token that was scanned. */ private Token lastToken; + private final List<Comment> comments; + // The number of unclosed open-parens ("(", '{', '[') at the current point in // the stream. Whitespace is handled differently when this is nonzero. private int openParenStackDepth = 0; private boolean containsErrors; + /** + * True after a NEWLINE token. + * In other words, we are outside an expression and we have to check the indentation. + */ + private boolean checkIndentation; /** * Constructs a lexer which tokenizes the contents of the specified InputBuffer. Any errors during @@ -104,6 +113,8 @@ public final class Lexer { this.pos = 0; this.eventHandler = eventHandler; this.locationInfo = new LocationInfo(input.getPath(), lineNumberTable); + this.checkIndentation = true; + this.comments = new ArrayList<>(); indentStack.push(0); } @@ -112,6 +123,10 @@ public final class Lexer { this(input, eventHandler, LineNumberTable.create(input.getContent(), input.getPath())); } + List<Comment> getComments() { + return comments; + } + /** * Returns the filename from which the lexer's input came. Returns an empty value if the input * came from a string. @@ -216,18 +231,16 @@ public final class Lexer { } /** - * Parses an end-of-line sequence, handling statement indentation correctly. + * Parses an end-of-line sequence. * * <p>UNIX newlines are assumed (LF). Carriage returns are always ignored. - * - * <p>ON ENTRY: 'pos' is the index of the char after '\n'. - * ON EXIT: 'pos' is the index of the next non-space char after '\n'. */ private void newline() { if (openParenStackDepth > 0) { newlineInsideExpression(); // in an expression: ignore space } else { - newlineOutsideExpression(); // generate NEWLINE/INDENT/OUTDENT tokens + checkIndentation = true; + addToken(new Token(TokenKind.NEWLINE, pos - 1, pos)); } } @@ -244,10 +257,6 @@ public final class Lexer { } private void newlineOutsideExpression() { - if (pos > 1) { // skip over newline at start of file - addToken(new Token(TokenKind.NEWLINE, pos - 1, pos)); - } - // we're in a stmt: suck up space at beginning of next line int indentLen = 0; while (pos < buffer.length) { @@ -269,7 +278,7 @@ public final class Lexer { while (pos < buffer.length && c != '\n') { c = buffer[pos++]; } - addToken(new Token(TokenKind.COMMENT, oldPos, pos - 1, bufferSlice(oldPos, pos - 1))); + makeComment(oldPos, pos - 1, bufferSlice(oldPos, pos - 1)); indentLen = 0; } else { // printing character break; @@ -707,6 +716,14 @@ public final class Lexer { * least one token will be added to the tokens queue. */ private void tokenize() { + if (checkIndentation) { + checkIndentation = false; + newlineOutsideExpression(); // generate INDENT/OUTDENT tokens + if (!tokens.isEmpty()) { + return; + } + } + while (pos < buffer.length) { if (tokenizeTwoChars()) { pos += 2; @@ -837,7 +854,7 @@ public final class Lexer { pos++; } } - addToken(new Token(TokenKind.COMMENT, oldPos, pos, bufferSlice(oldPos, pos))); + makeComment(oldPos, pos, bufferSlice(oldPos, pos)); break; } case '\'': @@ -908,4 +925,7 @@ public final class Lexer { return new String(this.buffer, start, end - start); } + private void makeComment(int start, int end, String content) { + comments.add(ASTNode.setLocation(createLocation(start, end), new Comment(content))); + } } diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java index d593dc0d36..ec6d323f1b 100644 --- a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java +++ b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java @@ -115,7 +115,6 @@ public class Parser { private final Lexer lexer; private final EventHandler eventHandler; - private final List<Comment> comments; private static final Map<TokenKind, Operator> binaryOperators = new ImmutableMap.Builder<TokenKind, Operator>() @@ -167,7 +166,6 @@ public class Parser { private Parser(Lexer lexer, EventHandler eventHandler) { this.lexer = lexer; this.eventHandler = eventHandler; - this.comments = new ArrayList<>(); nextToken(); } @@ -195,7 +193,7 @@ public class Parser { List<Statement> statements = parser.parseFileInput(); boolean errors = parser.errorsCount > 0 || lexer.containsErrors(); return new ParseResult( - statements, parser.comments, locationFromStatements(lexer, statements), errors); + statements, lexer.getComments(), locationFromStatements(lexer, statements), errors); } /** @@ -415,11 +413,6 @@ public class Parser { private void nextToken() { if (token == null || token.kind != TokenKind.EOF) { token = lexer.nextToken(); - // transparently handle comment tokens - while (token.kind == TokenKind.COMMENT) { - makeComment(); - token = lexer.nextToken(); - } } checkForbiddenKeywords(); if (DEBUGGING) { @@ -1344,9 +1337,4 @@ public class Parser { } return setLocation(new ReturnStatement(expression), start, end); } - - // create a comment node - private void makeComment() { - comments.add(setLocation(new Comment((String) token.value), token.left, token.right)); - } } diff --git a/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java b/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java index e5098f18e2..e2a4dcdebe 100644 --- a/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java +++ b/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java @@ -26,7 +26,6 @@ public enum TokenKind { CLASS("class"), COLON(":"), COMMA(","), - COMMENT("comment"), CONTINUE("continue"), DEF("def"), DEL("del"), |