Reject files when the first line is indented.

A bug in the lexer ignored indentation on the first line of a file. This now causes an error. Also, remove the COMMENT token from the lexer. Comments are now accessed separately. This will allow further optimizations in the lexer. It also aligns the code a bit more with the Go implementation. RELNOTES[INC]: Indentation on the first line of a file was previously ignored. This is now fixed. PiperOrigin-RevId: 197889775
author: laurentlb <laurentlb@google.com> 2018-05-24 07:32:52 -0700
committer: Copybara-Service <copybara-piper@google.com> 2018-05-24 07:33:48 -0700
commit: 17f8d4e5a36f5c4bd020ce9163f5b1db62679e2c (patch)
tree: 9b065d3c27259a5da38563fcda505c3f7002275c /src/main/java/com/google/devtools/build
parent: 2a6051b0c74ce59e30522fbd509ccbb460289df7 (diff)
3 files changed, 32 insertions, 25 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
index 5ecae12c6e..a50a0a0049 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Lexer.java
@@ -24,7 +24,9 @@ import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec;
 import com.google.devtools.build.lib.util.Pair;
 import com.google.devtools.build.lib.vfs.PathFragment;
 import java.util.ArrayDeque;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Stack;
@@ -87,11 +89,18 @@ public final class Lexer {
   /** Last Token that was scanned. */
   private Token lastToken;
 
+  private final List<Comment> comments;
+
   // The number of unclosed open-parens ("(", '{', '[') at the current point in
   // the stream. Whitespace is handled differently when this is nonzero.
   private int openParenStackDepth = 0;
 
   private boolean containsErrors;
+  /**
+   * True after a NEWLINE token.
+   * In other words, we are outside an expression and we have to check the indentation.
+   */
+  private boolean checkIndentation;
 
   /**
    * Constructs a lexer which tokenizes the contents of the specified InputBuffer. Any errors during
@@ -104,6 +113,8 @@ public final class Lexer {
     this.pos = 0;
     this.eventHandler = eventHandler;
     this.locationInfo = new LocationInfo(input.getPath(), lineNumberTable);
+    this.checkIndentation = true;
+    this.comments = new ArrayList<>();
 
     indentStack.push(0);
   }
@@ -112,6 +123,10 @@ public final class Lexer {
     this(input, eventHandler, LineNumberTable.create(input.getContent(), input.getPath()));
   }
 
+  List<Comment> getComments() {
+    return comments;
+  }
+
   /**
    * Returns the filename from which the lexer's input came. Returns an empty value if the input
    * came from a string.
@@ -216,18 +231,16 @@ public final class Lexer {
   }
 
   /**
-   * Parses an end-of-line sequence, handling statement indentation correctly.
+   * Parses an end-of-line sequence.
    *
    * <p>UNIX newlines are assumed (LF). Carriage returns are always ignored.
-   *
-   * <p>ON ENTRY: 'pos' is the index of the char after '\n'.
-   * ON EXIT: 'pos' is the index of the next non-space char after '\n'.
    */
   private void newline() {
     if (openParenStackDepth > 0) {
       newlineInsideExpression(); // in an expression: ignore space
     } else {
-      newlineOutsideExpression(); // generate NEWLINE/INDENT/OUTDENT tokens
+      checkIndentation = true;
+      addToken(new Token(TokenKind.NEWLINE, pos - 1, pos));
     }
   }
 
@@ -244,10 +257,6 @@ public final class Lexer {
   }
 
   private void newlineOutsideExpression() {
-    if (pos > 1) { // skip over newline at start of file
-      addToken(new Token(TokenKind.NEWLINE, pos - 1, pos));
-    }
-
     // we're in a stmt: suck up space at beginning of next line
     int indentLen = 0;
     while (pos < buffer.length) {
@@ -269,7 +278,7 @@ public final class Lexer {
         while (pos < buffer.length && c != '\n') {
           c = buffer[pos++];
         }
-        addToken(new Token(TokenKind.COMMENT, oldPos, pos - 1, bufferSlice(oldPos, pos - 1)));
+        makeComment(oldPos, pos - 1, bufferSlice(oldPos, pos - 1));
         indentLen = 0;
       } else { // printing character
         break;
@@ -707,6 +716,14 @@ public final class Lexer {
    * least one token will be added to the tokens queue.
    */
   private void tokenize() {
+    if (checkIndentation) {
+      checkIndentation = false;
+      newlineOutsideExpression(); // generate INDENT/OUTDENT tokens
+      if (!tokens.isEmpty()) {
+        return;
+      }
+    }
+
     while (pos < buffer.length) {
       if (tokenizeTwoChars()) {
         pos += 2;
@@ -837,7 +854,7 @@ public final class Lexer {
             pos++;
           }
         }
-        addToken(new Token(TokenKind.COMMENT, oldPos, pos, bufferSlice(oldPos, pos)));
+        makeComment(oldPos, pos, bufferSlice(oldPos, pos));
         break;
       }
       case '\'':
@@ -908,4 +925,7 @@ public final class Lexer {
     return new String(this.buffer, start, end - start);
   }
 
+  private void makeComment(int start, int end, String content) {
+    comments.add(ASTNode.setLocation(createLocation(start, end), new Comment(content)));
+  }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
index d593dc0d36..ec6d323f1b 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
@@ -115,7 +115,6 @@ public class Parser {
 
   private final Lexer lexer;
   private final EventHandler eventHandler;
-  private final List<Comment> comments;
 
   private static final Map<TokenKind, Operator> binaryOperators =
       new ImmutableMap.Builder<TokenKind, Operator>()
@@ -167,7 +166,6 @@ public class Parser {
   private Parser(Lexer lexer, EventHandler eventHandler) {
     this.lexer = lexer;
     this.eventHandler = eventHandler;
-    this.comments = new ArrayList<>();
     nextToken();
   }
 
@@ -195,7 +193,7 @@ public class Parser {
     List<Statement> statements = parser.parseFileInput();
     boolean errors = parser.errorsCount > 0 || lexer.containsErrors();
     return new ParseResult(
-        statements, parser.comments, locationFromStatements(lexer, statements), errors);
+        statements, lexer.getComments(), locationFromStatements(lexer, statements), errors);
   }
 
   /**
@@ -415,11 +413,6 @@ public class Parser {
   private void nextToken() {
     if (token == null || token.kind != TokenKind.EOF) {
       token = lexer.nextToken();
-      // transparently handle comment tokens
-      while (token.kind == TokenKind.COMMENT) {
-        makeComment();
-        token = lexer.nextToken();
-      }
     }
     checkForbiddenKeywords();
     if (DEBUGGING) {
@@ -1344,9 +1337,4 @@ public class Parser {
     }
     return setLocation(new ReturnStatement(expression), start, end);
   }
-
-  // create a comment node
-  private void makeComment() {
-    comments.add(setLocation(new Comment((String) token.value), token.left, token.right));
-  }
 }
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java b/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java
index e5098f18e2..e2a4dcdebe 100644
--- a/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java
+++ b/src/main/java/com/google/devtools/build/lib/syntax/TokenKind.java
@@ -26,7 +26,6 @@ public enum TokenKind {
   CLASS("class"),
   COLON(":"),
   COMMA(","),
-  COMMENT("comment"),
   CONTINUE("continue"),
   DEF("def"),
   DEL("del"),
author	laurentlb <laurentlb@google.com>	2018-05-24 07:32:52 -0700
committer	Copybara-Service <copybara-piper@google.com>	2018-05-24 07:33:48 -0700
commit	17f8d4e5a36f5c4bd020ce9163f5b1db62679e2c (patch)
tree	9b065d3c27259a5da38563fcda505c3f7002275c /src/main/java/com/google/devtools/build
parent	2a6051b0c74ce59e30522fbd509ccbb460289df7 (diff)