1 files changed, 1274 insertions, 0 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
new file mode 100644
index 0000000000..66c3c6729f
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java
@@ -0,0 +1,1274 @@
+// Copyright 2014 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.syntax;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.devtools.build.lib.events.Event;
+import com.google.devtools.build.lib.events.EventHandler;
+import com.google.devtools.build.lib.events.Location;
+import com.google.devtools.build.lib.packages.CachingPackageLocator;
+import com.google.devtools.build.lib.syntax.DictionaryLiteral.DictionaryEntryLiteral;
+import com.google.devtools.build.lib.syntax.IfStatement.ConditionalStatements;
+import com.google.devtools.build.lib.vfs.Path;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Recursive descent parser for LL(2) BUILD language.
+ * Loosely based on Python 2 grammar.
+ * See https://docs.python.org/2/reference/grammar.html
+ *
+ */
+class Parser {
+
+  /**
+   * Combines the parser result into a single value object.
+   */
+  public static final class ParseResult {
+    /** The statements (rules, basically) from the parsed file. */
+    public final List<Statement> statements;
+
+    /** The comments from the parsed file. */
+    public final List<Comment> comments;
+
+    /** Whether the file contained any errors. */
+    public final boolean containsErrors;
+
+    public ParseResult(List<Statement> statements, List<Comment> comments, boolean containsErrors) {
+      // No need to copy here; when the object is created, the parser instance is just about to go
+      // out of scope and be garbage collected.
+      this.statements = Preconditions.checkNotNull(statements);
+      this.comments = Preconditions.checkNotNull(comments);
+      this.containsErrors = containsErrors;
+    }
+  }
+
+  private static final EnumSet<TokenKind> STATEMENT_TERMINATOR_SET =
+    EnumSet.of(TokenKind.EOF, TokenKind.NEWLINE);
+
+  private static final EnumSet<TokenKind> LIST_TERMINATOR_SET =
+    EnumSet.of(TokenKind.EOF, TokenKind.RBRACKET, TokenKind.SEMI);
+
+  private static final EnumSet<TokenKind> DICT_TERMINATOR_SET =
+    EnumSet.of(TokenKind.EOF, TokenKind.RBRACE, TokenKind.SEMI);
+
+  private static final EnumSet<TokenKind> EXPR_TERMINATOR_SET = EnumSet.of(
+      TokenKind.EOF,
+      TokenKind.COMMA,
+      TokenKind.COLON,
+      TokenKind.FOR,
+      TokenKind.PLUS,
+      TokenKind.MINUS,
+      TokenKind.PERCENT,
+      TokenKind.RPAREN,
+      TokenKind.RBRACKET);
+
+  private Token token; // current lookahead token
+  private Token pushedToken = null; // used to implement LL(2)
+
+  private static final boolean DEBUGGING = false;
+
+  private final Lexer lexer;
+  private final EventHandler eventHandler;
+  private final List<Comment> comments;
+  private final boolean parsePython;
+  /** Whether advanced language constructs are allowed */
+  private boolean skylarkMode = false;
+
+  private static final Map<TokenKind, Operator> binaryOperators =
+      new ImmutableMap.Builder<TokenKind, Operator>()
+          .put(TokenKind.AND, Operator.AND)
+          .put(TokenKind.EQUALS_EQUALS, Operator.EQUALS_EQUALS)
+          .put(TokenKind.GREATER, Operator.GREATER)
+          .put(TokenKind.GREATER_EQUALS, Operator.GREATER_EQUALS)
+          .put(TokenKind.IN, Operator.IN)
+          .put(TokenKind.LESS, Operator.LESS)
+          .put(TokenKind.LESS_EQUALS, Operator.LESS_EQUALS)
+          .put(TokenKind.MINUS, Operator.MINUS)
+          .put(TokenKind.NOT_EQUALS, Operator.NOT_EQUALS)
+          .put(TokenKind.OR, Operator.OR)
+          .put(TokenKind.PERCENT, Operator.PERCENT)
+          .put(TokenKind.PLUS, Operator.PLUS)
+          .put(TokenKind.STAR, Operator.MULT)
+          .build();
+
+  private static final Map<TokenKind, Operator> augmentedAssignmentMethods =
+      new ImmutableMap.Builder<TokenKind, Operator>()
+      .put(TokenKind.PLUS_EQUALS, Operator.PLUS) // += // TODO(bazel-team): other similar operators
+      .build();
+
+  /** Highest precedence goes last.
+   *  Based on: http://docs.python.org/2/reference/expressions.html#operator-precedence
+   **/
+  private static final List<EnumSet<Operator>> operatorPrecedence = ImmutableList.of(
+      EnumSet.of(Operator.OR),
+      EnumSet.of(Operator.AND),
+      EnumSet.of(Operator.NOT),
+      EnumSet.of(Operator.EQUALS_EQUALS, Operator.NOT_EQUALS, Operator.LESS, Operator.LESS_EQUALS,
+          Operator.GREATER, Operator.GREATER_EQUALS, Operator.IN),
+      EnumSet.of(Operator.MINUS, Operator.PLUS),
+      EnumSet.of(Operator.MULT, Operator.PERCENT));
+
+  private Iterator<Token> tokens = null;
+  private int errorsCount;
+  private boolean recoveryMode;  // stop reporting errors until next statement
+
+  private CachingPackageLocator locator;
+
+  private List<Path> includedFiles;
+
+  private static final String PREPROCESSING_NEEDED =
+      "Add \"# PYTHON-PREPROCESSING-REQUIRED\" on the first line of the file";
+
+  private Parser(Lexer lexer, EventHandler eventHandler, CachingPackageLocator locator,
+                 boolean parsePython) {
+    this.lexer = lexer;
+    this.eventHandler = eventHandler;
+    this.parsePython = parsePython;
+    this.tokens = lexer.getTokens().iterator();
+    this.comments = new ArrayList<Comment>();
+    this.locator = locator;
+    this.includedFiles = new ArrayList<Path>();
+    this.includedFiles.add(lexer.getFilename());
+    nextToken();
+  }
+
+  private Parser(Lexer lexer, EventHandler eventHandler, CachingPackageLocator locator) {
+    this(lexer, eventHandler, locator, false /* parsePython */);
+  }
+
+  public Parser setSkylarkMode(boolean skylarkMode) {
+    this.skylarkMode = skylarkMode;
+    return this;
+  }
+
+  /**
+   * Entry-point to parser that parses a build file with comments.  All errors
+   * encountered during parsing are reported via "reporter".
+   */
+  public static ParseResult parseFile(
+      Lexer lexer, EventHandler eventHandler, CachingPackageLocator locator,
+      boolean parsePython) {
+    Parser parser = new Parser(lexer, eventHandler, locator, parsePython);
+    List<Statement> statements = parser.parseFileInput();
+    return new ParseResult(statements, parser.comments,
+        parser.errorsCount > 0 || lexer.containsErrors());
+  }
+
+  /**
+   * Entry-point to parser that parses a build file with comments.  All errors
+   * encountered during parsing are reported via "reporter".  Enable Skylark extensions
+   * that are not part of the core BUILD language.
+   */
+  public static ParseResult parseFileForSkylark(
+      Lexer lexer, EventHandler eventHandler, CachingPackageLocator locator,
+      ValidationEnvironment validationEnvironment) {
+    Parser parser = new Parser(lexer, eventHandler, locator).setSkylarkMode(true);
+    List<Statement> statements = parser.parseFileInput();
+    boolean hasSemanticalErrors = false;
+    try {
+      for (Statement statement : statements) {
+        statement.validate(validationEnvironment);
+      }
+    } catch (EvalException e) {
+      eventHandler.handle(Event.error(e.getLocation(), e.getMessage()));
+      hasSemanticalErrors = true;
+    }
+    return new ParseResult(statements, parser.comments,
+        parser.errorsCount > 0 || lexer.containsErrors() || hasSemanticalErrors);
+  }
+
+  /**
+   * Entry-point to parser that parses a statement.  All errors encountered
+   * during parsing are reported via "reporter".
+   */
+  @VisibleForTesting
+  public static Statement parseStatement(
+      Lexer lexer, EventHandler eventHandler) {
+    return new Parser(lexer, eventHandler, null).parseSmallStatement();
+  }
+
+  /**
+   * Entry-point to parser that parses an expression.  All errors encountered
+   * during parsing are reported via "reporter".  The expression may be followed
+   * by newline tokens.
+   */
+  @VisibleForTesting
+  public static Expression parseExpression(Lexer lexer, EventHandler eventHandler) {
+    Parser parser = new Parser(lexer, eventHandler, null);
+    Expression result = parser.parseExpression();
+    while (parser.token.kind == TokenKind.NEWLINE) {
+      parser.nextToken();
+    }
+    parser.expect(TokenKind.EOF);
+    return result;
+  }
+
+  private void addIncludedFiles(List<Path> files) {
+    this.includedFiles.addAll(files);
+  }
+
+  private void reportError(Location location, String message) {
+    errorsCount++;
+    // Limit the number of reported errors to avoid spamming output.
+    if (errorsCount <= 5) {
+      eventHandler.handle(Event.error(location, message));
+    }
+  }
+
+  private void syntaxError(Token token) {
+    if (!recoveryMode) {
+      String msg = token.kind == TokenKind.INDENT
+          ? "indentation error"
+          : "syntax error at '" + token + "'";
+      reportError(lexer.createLocation(token.left, token.right), msg);
+      recoveryMode = true;
+    }
+  }
+
+  // Consumes the current token.  If it is not of the specified (expected)
+  // kind, reports a syntax error.
+  private boolean expect(TokenKind kind) {
+    boolean expected = token.kind == kind;
+    if (!expected) {
+      syntaxError(token);
+    }
+    nextToken();
+    return expected;
+  }
+
+  /**
+   * Consume tokens past the first token that has a kind that is in the set of
+   * teminatingTokens.
+   * @param terminatingTokens
+   * @return the end offset of the terminating token.
+   */
+  private int syncPast(EnumSet<TokenKind> terminatingTokens) {
+    Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
+    while (!terminatingTokens.contains(token.kind)) {
+      nextToken();
+    }
+    int end = token.right;
+    // read past the synchronization token
+    nextToken();
+    return end;
+  }
+
+  /**
+   * Consume tokens until we reach the first token that has a kind that is in
+   * the set of teminatingTokens.
+   * @param terminatingTokens
+   * @return the end offset of the terminating token.
+   */
+  private int syncTo(EnumSet<TokenKind> terminatingTokens) {
+    // EOF must be in the set to prevent an infinite loop
+    Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
+    // read past the problematic token
+    int previous = token.right;
+    nextToken();
+    int current = previous;
+    while (!terminatingTokens.contains(token.kind)) {
+      nextToken();
+      previous = current;
+      current = token.right;
+    }
+    return previous;
+  }
+
+  private void nextToken() {
+    if (pushedToken != null) {
+      token = pushedToken;
+      pushedToken = null;
+    } else {
+      if (token == null || token.kind != TokenKind.EOF) {
+        token = tokens.next();
+        // transparently handle comment tokens
+        while (token.kind == TokenKind.COMMENT) {
+          makeComment(token);
+          token = tokens.next();
+        }
+      }
+    }
+    if (DEBUGGING) {
+      System.err.print(token);
+    }
+  }
+
+  private void pushToken(Token tokenToPush) {
+    if (pushedToken != null) {
+      throw new IllegalStateException("Exceeded LL(2) lookahead!");
+    }
+    pushedToken = token;
+    token = tokenToPush;
+  }
+
+  // create an error expression
+  private Ident makeErrorExpression(int start, int end) {
+    return setLocation(new Ident("$error$"), start, end);
+  }
+
+  // Convenience wrapper around ASTNode.setLocation that returns the node.
+  private <NODE extends ASTNode> NODE
+      setLocation(NODE node, int startOffset, int endOffset) {
+    node.setLocation(lexer.createLocation(startOffset, endOffset));
+    return node;
+  }
+
+  // Another convenience wrapper method around ASTNode.setLocation
+  private <NODE extends ASTNode> NODE setLocation(NODE node, Location location) {
+    node.setLocation(location);
+    return node;
+  }
+
+  // Convenience method that uses end offset from the last node.
+  private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, ASTNode lastNode) {
+    return setLocation(node, startOffset, lastNode.getLocation().getEndOffset());
+  }
+
+  // create a funcall expression
+  private Expression makeFuncallExpression(Expression receiver, Ident function,
+                                           List<Argument> args,
+                                           int start, int end) {
+    if (function.getLocation() == null) {
+      function = setLocation(function, start, end);
+    }
+    boolean seenKeywordArg = false;
+    boolean seenKwargs = false;
+    for (Argument arg : args) {
+      if (arg.isPositional()) {
+        if (seenKeywordArg || seenKwargs) {
+          reportError(arg.getLocation(), "syntax error: non-keyword arg after keyword arg");
+          return makeErrorExpression(start, end);
+        }
+      } else if (arg.isKwargs()) {
+        if (seenKwargs) {
+          reportError(arg.getLocation(), "there can be only one **kwargs argument");
+          return makeErrorExpression(start, end);
+        }
+        seenKwargs = true;
+      } else {
+        seenKeywordArg = true;
+      }
+    }
+
+    return setLocation(new FuncallExpression(receiver, function, args), start, end);
+  }
+
+  // arg ::= IDENTIFIER '=' expr
+  //       | expr
+  private Argument parseFunctionCallArgument() {
+    int start = token.left;
+    if (token.kind == TokenKind.IDENTIFIER) {
+      Token identToken = token;
+      String name = (String) token.value;
+      Ident ident = setLocation(new Ident(name), start, token.right);
+      nextToken();
+      if (token.kind == TokenKind.EQUALS) { // it's a named argument
+        nextToken();
+        Expression expr = parseExpression();
+        return setLocation(new Argument(ident, expr), start, expr);
+      } else { // oops, back up!
+        pushToken(identToken);
+      }
+    }
+    // parse **expr
+    if (token.kind == TokenKind.STAR) {
+      expect(TokenKind.STAR);
+      expect(TokenKind.STAR);
+      Expression expr = parseExpression();
+      return setLocation(new Argument(null, expr, true), start, expr);
+    }
+    // parse a positional argument
+    Expression expr = parseExpression();
+    return setLocation(new Argument(expr), start, expr);
+  }
+
+  // arg ::= IDENTIFIER '=' expr
+  //       | IDENTIFIER
+  private Argument parseFunctionDefArgument(boolean onlyOptional) {
+    int start = token.left;
+    Ident ident = parseIdent();
+    if (token.kind == TokenKind.EQUALS) { // there's a default value
+      nextToken();
+      Expression expr = parseExpression();
+      return setLocation(new Argument(ident, expr), start, expr);
+    } else if (onlyOptional) {
+      reportError(ident.getLocation(),
+          "Optional arguments are only allowed at the end of the argument list.");
+    }
+    return setLocation(new Argument(ident), start, ident);
+  }
+
+  // funcall_suffix ::= '(' arg_list? ')'
+  private Expression parseFuncallSuffix(int start, Expression receiver,
+                                        Ident function) {
+    List<Argument> args = Collections.emptyList();
+    expect(TokenKind.LPAREN);
+    int end;
+    if (token.kind == TokenKind.RPAREN) {
+      end = token.right;
+      nextToken(); // RPAREN
+    } else {
+      args = parseFunctionCallArguments(); // (includes optional trailing comma)
+      end = token.right;
+      expect(TokenKind.RPAREN);
+    }
+    return makeFuncallExpression(receiver, function, args, start, end);
+  }
+
+  // selector_suffix ::= '.' IDENTIFIER
+  //                    |'.' IDENTIFIER funcall_suffix
+  private Expression parseSelectorSuffix(int start, Expression receiver) {
+    expect(TokenKind.DOT);
+    if (token.kind == TokenKind.IDENTIFIER) {
+      Ident ident = parseIdent();
+      if (token.kind == TokenKind.LPAREN) {
+        return parseFuncallSuffix(start, receiver, ident);
+      } else {
+        return setLocation(new DotExpression(receiver, ident), start, token.right);
+      }
+    } else {
+      syntaxError(token);
+      int end = syncTo(EXPR_TERMINATOR_SET);
+      return makeErrorExpression(start, end);
+    }
+  }
+
+  // arg_list ::= ( (arg ',')* arg ','? )?
+  private List<Argument> parseFunctionCallArguments() {
+    List<Argument> args = new ArrayList<>();
+    //  terminating tokens for an arg list
+    while (token.kind != TokenKind.RPAREN) {
+      if (token.kind == TokenKind.EOF) {
+        syntaxError(token);
+        break;
+      }
+      args.add(parseFunctionCallArgument());
+      if (token.kind == TokenKind.COMMA) {
+        nextToken();
+      } else {
+        break;
+      }
+    }
+    return args;
+  }
+
+  // expr_list ::= ( (expr ',')* expr ','? )?
+  private List<Expression> parseExprList() {
+    List<Expression> list = new ArrayList<>();
+    //  terminating tokens for an expression list
+    while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.RBRACKET) {
+      list.add(parseExpression());
+      if (token.kind == TokenKind.COMMA) {
+        nextToken();
+      } else {
+        break;
+      }
+    }
+    return list;
+  }
+
+  // dict_entry_list ::= ( (dict_entry ',')* dict_entry ','? )?
+  private List<DictionaryEntryLiteral> parseDictEntryList() {
+    List<DictionaryEntryLiteral> list = new ArrayList<>();
+    // the terminating token for a dict entry list
+    while (token.kind != TokenKind.RBRACE) {
+      list.add(parseDictEntry());
+      if (token.kind == TokenKind.COMMA) {
+        nextToken();
+      } else {
+        break;
+      }
+    }
+    return list;
+  }
+
+  // dict_entry ::= expression ':' expression
+  private DictionaryEntryLiteral parseDictEntry() {
+    int start = token.left;
+    Expression key = parseExpression();
+    expect(TokenKind.COLON);
+    Expression value = parseExpression();
+    return setLocation(new DictionaryEntryLiteral(key, value), start, value);
+  }
+
+  private ExpressionStatement mocksubincludeExpression(
+      String labelName, String file, Location location) {
+    List<Argument> args = new ArrayList<>();
+    args.add(setLocation(new Argument(new StringLiteral(labelName, '"')), location));
+    args.add(setLocation(new Argument(new StringLiteral(file, '"')), location));
+    Ident mockIdent = setLocation(new Ident("mocksubinclude"), location);
+    Expression funCall = new FuncallExpression(null, mockIdent, args);
+    return setLocation(new ExpressionStatement(funCall), location);
+  }
+
+  // parse a file from an include call
+  private void include(String labelName, List<Statement> list, Location location) {
+    if (locator == null) {
+      return;
+    }
+
+    try {
+      Label label = Label.parseAbsolute(labelName);
+      String packageName = label.getPackageFragment().getPathString();
+      Path packagePath = locator.getBuildFileForPackage(packageName);
+      if (packagePath == null) {
+        reportError(location, "Package '" + packageName + "' not found");
+        list.add(mocksubincludeExpression(labelName, "", location));
+        return;
+      }
+      Path path = packagePath.getParentDirectory();
+      Path file = path.getRelative(label.getName());
+
+      if (this.includedFiles.contains(file)) {
+        reportError(location, "Recursive inclusion of file '" + path + "'");
+        return;
+      }
+      ParserInputSource inputSource = ParserInputSource.create(file);
+
+      // Insert call to the mocksubinclude function to get the dependencies right.
+      list.add(mocksubincludeExpression(labelName, file.toString(), location));
+
+      Lexer lexer = new Lexer(inputSource, eventHandler, parsePython);
+      Parser parser = new Parser(lexer, eventHandler, locator, parsePython);
+      parser.addIncludedFiles(this.includedFiles);
+      list.addAll(parser.parseFileInput());
+    } catch (Label.SyntaxException e) {
+      reportError(location, "Invalid label '" + labelName + "'");
+    } catch (IOException e) {
+      reportError(location, "Include of '" + labelName + "' failed: " + e.getMessage());
+      list.add(mocksubincludeExpression(labelName, "", location));
+    }
+  }
+
+  //  primary ::= INTEGER
+  //            | STRING
+  //            | STRING '.' IDENTIFIER funcall_suffix
+  //            | IDENTIFIER
+  //            | IDENTIFIER funcall_suffix
+  //            | IDENTIFIER '.' selector_suffix
+  //            | list_expression
+  //            | '(' ')'                    // a tuple with zero elements
+  //            | '(' expr ')'               // a parenthesized expression
+  //            | '(' expr ',' expr_list ')' // a tuple with n elements
+  //            | dict_expression
+  //            | '-' primary_with_suffix
+  private Expression parsePrimary() {
+    int start = token.left;
+    switch (token.kind) {
+      case INT: {
+        IntegerLiteral literal = new IntegerLiteral((Integer) token.value);
+        setLocation(literal, start, token.right);
+        nextToken();
+        return literal;
+      }
+      case STRING: {
+        String value = (String) token.value;
+        int end = token.right;
+        char quoteChar = lexer.charAt(start);
+        nextToken();
+        if (token.kind == TokenKind.STRING) {
+          reportError(lexer.createLocation(end, token.left),
+              "Implicit string concatenation is forbidden, use the + operator");
+        }
+        StringLiteral literal = new StringLiteral(value, quoteChar);
+        setLocation(literal, start, end);
+        return literal;
+      }
+      case IDENTIFIER: {
+        Ident ident = parseIdent();
+        if (token.kind == TokenKind.LPAREN) { // it's a function application
+          return parseFuncallSuffix(start, null, ident);
+        } else {
+          return ident;
+        }
+      }
+      case LBRACKET: { // it's a list
+        return parseListExpression();
+      }
+      case LBRACE: { // it's a dictionary
+        return parseDictExpression();
+      }
+      case LPAREN: {
+        nextToken();
+        // check for the empty tuple literal
+        if (token.kind == TokenKind.RPAREN) {
+          ListLiteral literal =
+              ListLiteral.makeTuple(Collections.<Expression>emptyList());
+          setLocation(literal, start, token.right);
+          nextToken();
+          return literal;
+        }
+        // parse the first expression
+        Expression expression = parseExpression();
+        if (token.kind == TokenKind.COMMA) {  // it's a tuple
+          nextToken();
+          // parse the rest of the expression tuple
+          List<Expression> tuple = parseExprList();
+          // add the first expression to the front of the tuple
+          tuple.add(0, expression);
+          expect(TokenKind.RPAREN);
+          return setLocation(
+              ListLiteral.makeTuple(tuple), start, token.right);
+        }
+        setLocation(expression, start, token.right);
+        if (token.kind == TokenKind.RPAREN) {
+          nextToken();
+          return expression;
+        }
+        syntaxError(token);
+        int end = syncTo(EXPR_TERMINATOR_SET);
+        return makeErrorExpression(start, end);
+      }
+      case MINUS: {
+        nextToken();
+
+        List<Argument> args = new ArrayList<>();
+        Expression expr = parsePrimaryWithSuffix();
+        args.add(setLocation(new Argument(expr), start, expr));
+        return makeFuncallExpression(null, new Ident("-"), args,
+                                     start, token.right);
+      }
+      default: {
+        syntaxError(token);
+        int end = syncTo(EXPR_TERMINATOR_SET);
+        return makeErrorExpression(start, end);
+      }
+    }
+  }
+
+  // primary_with_suffix ::= primary selector_suffix*
+  //                       | primary substring_suffix
+  private Expression parsePrimaryWithSuffix() {
+    int start = token.left;
+    Expression receiver = parsePrimary();
+    while (true) {
+      if (token.kind == TokenKind.DOT) {
+        receiver = parseSelectorSuffix(start, receiver);
+      } else if (token.kind == TokenKind.LBRACKET) {
+        receiver = parseSubstringSuffix(start, receiver);
+      } else {
+        break;
+      }
+    }
+    return receiver;
+  }
+
+  // substring_suffix ::= '[' expression? ':' expression? ']'
+  private Expression parseSubstringSuffix(int start, Expression receiver) {
+    List<Argument> args = new ArrayList<>();
+    Expression startExpr;
+    Expression endExpr;
+
+    expect(TokenKind.LBRACKET);
+    int loc1 = token.left;
+    if (token.kind == TokenKind.COLON) {
+      startExpr = setLocation(new IntegerLiteral(0), token.left, token.right);
+    } else {
+      startExpr = parseExpression();
+    }
+    args.add(setLocation(new Argument(startExpr), loc1, startExpr));
+    // This is a dictionary access
+    if (token.kind == TokenKind.RBRACKET) {
+      expect(TokenKind.RBRACKET);
+      return makeFuncallExpression(receiver, new Ident("$index"), args,
+                                   start, token.right);
+    }
+    // This is a substring
+    expect(TokenKind.COLON);
+    int loc2 = token.left;
+    if (token.kind == TokenKind.RBRACKET) {
+      endExpr = setLocation(new IntegerLiteral(Integer.MAX_VALUE), token.left, token.right);
+    } else {
+      endExpr = parseExpression();
+    }
+    expect(TokenKind.RBRACKET);
+
+    args.add(setLocation(new Argument(endExpr), loc2, endExpr));
+    return makeFuncallExpression(receiver, new Ident("$substring"), args,
+                                 start, token.right);
+  }
+
+  // loop_variables ::= '(' variables ')'
+  //                  | variables
+  // variables ::= ident (',' ident)*
+  private Ident parseForLoopVariables() {
+    int start = token.left;
+    boolean hasParen = false;
+    if (token.kind == TokenKind.LPAREN) {
+      hasParen = true;
+      nextToken();
+    }
+
+    // TODO(bazel-team): allow multiple variables in the core Blaze language too.
+    Ident firstIdent = parseIdent();
+    boolean multipleVariables = false;
+
+    while (token.kind == TokenKind.COMMA) {
+      multipleVariables = true;
+      nextToken();
+      parseIdent();
+    }
+
+    if (hasParen) {
+      expect(TokenKind.RPAREN);
+    }
+
+    int end = token.right;
+    if (multipleVariables && !parsePython) {
+      reportError(lexer.createLocation(start, end),
+          "For loops with multiple variables are not yet supported. "
+          + PREPROCESSING_NEEDED);
+    }
+    return multipleVariables ? makeErrorExpression(start, end) : firstIdent;
+  }
+
+  // list_expression ::= '[' ']'
+  //                    |'[' expr ']'
+  //                    |'[' expr ',' expr_list ']'
+  //                    |'[' expr ('FOR' loop_variables 'IN' expr)+ ']'
+  private Expression parseListExpression() {
+    int start = token.left;
+    expect(TokenKind.LBRACKET);
+    if (token.kind == TokenKind.RBRACKET) { // empty List
+      ListLiteral literal =
+          ListLiteral.makeList(Collections.<Expression>emptyList());
+      setLocation(literal, start, token.right);
+      nextToken();
+      return literal;
+    }
+    Expression expression = parseExpression();
+    Preconditions.checkNotNull(expression,
+        "null element in list in AST at %s:%s", token.left, token.right);
+    switch (token.kind) {
+      case RBRACKET: { // singleton List
+        ListLiteral literal =
+            ListLiteral.makeList(Collections.singletonList(expression));
+        setLocation(literal, start, token.right);
+        nextToken();
+        return literal;
+      }
+      case FOR: { // list comprehension
+        ListComprehension listComprehension =
+          new ListComprehension(expression);
+        do {
+          nextToken();
+          Ident ident = parseForLoopVariables();
+          if (token.kind == TokenKind.IN) {
+            nextToken();
+            Expression listExpression = parseExpression();
+            listComprehension.add(ident, listExpression);
+          } else {
+            break;
+          }
+          if (token.kind == TokenKind.RBRACKET) {
+            setLocation(listComprehension, start, token.right);
+            nextToken();
+            return listComprehension;
+          }
+        } while (token.kind == TokenKind.FOR);
+
+        syntaxError(token);
+        int end = syncPast(LIST_TERMINATOR_SET);
+        return makeErrorExpression(start, end);
+      }
+      case COMMA: {
+        nextToken();
+        List<Expression> list = parseExprList();
+        Preconditions.checkState(!list.contains(null),
+            "null element in list in AST at %s:%s", token.left, token.right);
+        list.add(0, expression);
+        if (token.kind == TokenKind.RBRACKET) {
+          ListLiteral literal = ListLiteral.makeList(list);
+          setLocation(literal, start, token.right);
+          nextToken();
+          return literal;
+        }
+        syntaxError(token);
+        int end = syncPast(LIST_TERMINATOR_SET);
+        return makeErrorExpression(start, end);
+      }
+      default: {
+        syntaxError(token);
+        int end = syncPast(LIST_TERMINATOR_SET);
+        return makeErrorExpression(start, end);
+      }
+    }
+  }
+
+  // dict_expression ::= '{' '}'
+  //                    |'{' dict_entry_list '}'
+  //                    |'{' dict_entry 'FOR' loop_variables 'IN' expr '}'
+  private Expression parseDictExpression() {
+    int start = token.left;
+    expect(TokenKind.LBRACE);
+    if (token.kind == TokenKind.RBRACE) { // empty List
+      DictionaryLiteral literal =
+          new DictionaryLiteral(ImmutableList.<DictionaryEntryLiteral>of());
+      setLocation(literal, start, token.right);
+      nextToken();
+      return literal;
+    }
+    DictionaryEntryLiteral entry = parseDictEntry();
+    if (token.kind == TokenKind.FOR) {
+      // Dict comprehension
+      nextToken();
+      Ident loopVar = parseForLoopVariables();
+      expect(TokenKind.IN);
+      Expression listExpression = parseExpression();
+      expect(TokenKind.RBRACE);
+      return setLocation(new DictComprehension(
+          entry.getKey(), entry.getValue(), loopVar, listExpression), start, token.right);
+    }
+    List<DictionaryEntryLiteral> entries = new ArrayList<>();
+    entries.add(entry);
+    if (token.kind == TokenKind.COMMA) {
+      expect(TokenKind.COMMA);
+      entries.addAll(parseDictEntryList());
+    }
+    if (token.kind == TokenKind.RBRACE) {
+      DictionaryLiteral literal = new DictionaryLiteral(entries);
+      setLocation(literal, start, token.right);
+      nextToken();
+      return literal;
+    }
+    syntaxError(token);
+    int end = syncPast(DICT_TERMINATOR_SET);
+    return makeErrorExpression(start, end);
+  }
+
+  private Ident parseIdent() {
+    if (token.kind != TokenKind.IDENTIFIER) {
+      syntaxError(token);
+      return makeErrorExpression(token.left, token.right);
+    }
+    Ident ident = new Ident(((String) token.value));
+    setLocation(ident, token.left, token.right);
+    nextToken();
+    return ident;
+  }
+
+  // binop_expression ::= binop_expression OP binop_expression
+  //                    | parsePrimaryWithSuffix
+  // This function takes care of precedence between operators (see operatorPrecedence for
+  // the order), and it assumes left-to-right associativity.
+  private Expression parseBinOpExpression(int prec) {
+    int start = token.left;
+    Expression expr = parseExpression(prec + 1);
+    // The loop is not strictly needed, but it prevents risks of stack overflow. Depth is
+    // limited to number of different precedence levels (operatorPrecedence.size()).
+    for (;;) {
+      if (!binaryOperators.containsKey(token.kind)) {
+        return expr;
+      }
+      Operator operator = binaryOperators.get(token.kind);
+      if (!operatorPrecedence.get(prec).contains(operator)) {
+        return expr;
+      }
+      nextToken();
+      Expression secondary = parseExpression(prec + 1);
+      expr = optimizeBinOpExpression(operator, expr, secondary);
+      setLocation(expr, start, secondary);
+    }
+  }
+
+  // Optimize binary expressions.
+  // string literal + string literal can be concatenated into one string literal
+  // so we don't have to do the expensive string concatenation at runtime.
+  private Expression optimizeBinOpExpression(
+      Operator operator, Expression expr, Expression secondary) {
+    if (operator == Operator.PLUS) {
+      if (expr instanceof StringLiteral && secondary instanceof StringLiteral) {
+        StringLiteral left = (StringLiteral) expr;
+        StringLiteral right = (StringLiteral) secondary;
+        if (left.getQuoteChar() == right.getQuoteChar()) {
+          return new StringLiteral(left.getValue() + right.getValue(), left.getQuoteChar());
+        }
+      }
+    }
+    return new BinaryOperatorExpression(operator, expr, secondary);
+  }
+
+  private Expression parseExpression() {
+    return parseExpression(0);
+  }
+
+  private Expression parseExpression(int prec) {
+    if (prec >= operatorPrecedence.size()) {
+      return parsePrimaryWithSuffix();
+    }
+    if (token.kind == TokenKind.NOT && operatorPrecedence.get(prec).contains(Operator.NOT)) {
+      return parseNotExpression(prec);
+    }
+    return parseBinOpExpression(prec);
+  }
+
+  // not_expr :== 'not' expr
+  private Expression parseNotExpression(int prec) {
+    int start = token.left;
+    expect(TokenKind.NOT);
+    Expression expression = parseExpression(prec + 1);
+    NotExpression notExpression = new NotExpression(expression);
+    return setLocation(notExpression, start, token.right);
+  }
+
+  // file_input ::= ('\n' | stmt)* EOF
+  private List<Statement> parseFileInput() {
+    List<Statement> list =  new ArrayList<>();
+    while (token.kind != TokenKind.EOF) {
+      if (token.kind == TokenKind.NEWLINE) {
+        expect(TokenKind.NEWLINE);
+      } else {
+        parseTopLevelStatement(list);
+      }
+    }
+    return list;
+  }
+
+  // load(STRING (COMMA STRING)*)
+  private void parseLoad(List<Statement> list) {
+    int start = token.left;
+    if (token.kind != TokenKind.STRING) {
+      expect(TokenKind.STRING);
+      return;
+    }
+    String path = (String) token.value;
+    nextToken();
+    expect(TokenKind.COMMA);
+
+    List<Ident> symbols = new ArrayList<>();
+    if (token.kind == TokenKind.STRING) {
+      symbols.add(new Ident((String) token.value));
+    }
+    expect(TokenKind.STRING);
+    while (token.kind == TokenKind.COMMA) {
+      expect(TokenKind.COMMA);
+      if (token.kind == TokenKind.STRING) {
+        symbols.add(new Ident((String) token.value));
+      }
+      expect(TokenKind.STRING);
+    }
+    expect(TokenKind.RPAREN);
+    list.add(setLocation(new LoadStatement(path, symbols), start, token.left));
+  }
+
+  private void parseTopLevelStatement(List<Statement> list) {
+    // In Python grammar, there is no "top-level statement" and imports are
+    // considered as "small statements". We are a bit stricter than Python here.
+    int start = token.left;
+
+    // Check if there is an include
+    if (token.kind == TokenKind.IDENTIFIER) {
+      Token identToken = token;
+      Ident ident = parseIdent();
+
+      if (ident.getName().equals("include") && token.kind == TokenKind.LPAREN && !skylarkMode) {
+        expect(TokenKind.LPAREN);
+        if (token.kind == TokenKind.STRING) {
+          include((String) token.value, list, lexer.createLocation(start, token.right));
+        }
+        expect(TokenKind.STRING);
+        expect(TokenKind.RPAREN);
+        return;
+      } else if (ident.getName().equals("load") && token.kind == TokenKind.LPAREN) {
+        expect(TokenKind.LPAREN);
+        parseLoad(list);
+        return;
+      }
+      pushToken(identToken); // push the ident back to parse it as a statement
+    }
+    parseStatement(list, true);
+  }
+
+  // simple_stmt ::= small_stmt (';' small_stmt)* ';'? NEWLINE
+  private void parseSimpleStatement(List<Statement> list) {
+    list.add(parseSmallStatement());
+
+    while (token.kind == TokenKind.SEMI) {
+      nextToken();
+      if (token.kind == TokenKind.NEWLINE) {
+        break;
+      }
+      list.add(parseSmallStatement());
+    }
+    expect(TokenKind.NEWLINE);
+    // This is a safe place to recover: There is a new line at top-level
+    // and the parser is at the end of a statement.
+    recoveryMode = false;
+  }
+
+  //     small_stmt ::= assign_stmt
+  //                  | expr
+  //                  | RETURN expr
+  //     assign_stmt ::= expr ('=' | augassign) expr
+  //     augassign ::= ('+=' )
+  // Note that these are in Python, but not implemented here (at least for now):
+  // '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |'<<=' | '>>=' | '**=' | '//='
+  // Semantic difference from Python:
+  // In Skylark, x += y is simple syntactic sugar for x = x + y.
+  // In Python, x += y is more or less equivalent to x = x + y, but if a method is defined
+  // on x.__iadd__(y), then it takes precedence, and in the case of lists it side-effects
+  // the original list (it doesn't do that on tuples); if no such method is defined it falls back
+  // to the x.__add__(y) method that backs x + y. In Skylark, we don't support this side-effect.
+  // Note also that there is a special casing to translate 'ident[key] = value'
+  // to 'ident = ident + {key: value}'. This is needed to support the pure version of Python-like
+  // dictionary assignment syntax.
+  private Statement parseSmallStatement() {
+    int start = token.left;
+    if (token.kind == TokenKind.RETURN) {
+      return parseReturnStatement();
+    }
+    Expression expression = parseExpression();
+    if (token.kind == TokenKind.EQUALS) {
+      nextToken();
+      Expression rvalue = parseExpression();
+      if (expression instanceof FuncallExpression) {
+        FuncallExpression func = (FuncallExpression) expression;
+        if (func.getFunction().getName().equals("$index") && func.getObject() instanceof Ident) {
+          // Special casing to translate 'ident[key] = value' to 'ident = ident + {key: value}'
+          // Note that the locations of these extra expressions are fake.
+          Preconditions.checkArgument(func.getArguments().size() == 1);
+          DictionaryLiteral dictRValue = setLocation(new DictionaryLiteral(ImmutableList.of(
+              setLocation(new DictionaryEntryLiteral(func.getArguments().get(0).getValue(), rvalue),
+                  start, token.right))), start, token.right);
+          BinaryOperatorExpression binExp = setLocation(new BinaryOperatorExpression(
+              Operator.PLUS, func.getObject(), dictRValue), start, token.right);
+          return setLocation(new AssignmentStatement(func.getObject(), binExp), start, token.right);
+        }
+      }
+      return setLocation(new AssignmentStatement(expression, rvalue), start, rvalue);
+    } else if (augmentedAssignmentMethods.containsKey(token.kind)) {
+      Operator operator = augmentedAssignmentMethods.get(token.kind);
+      nextToken();
+      Expression operand = parseExpression();
+      int end = operand.getLocation().getEndOffset();
+      return setLocation(new AssignmentStatement(expression,
+               setLocation(new BinaryOperatorExpression(
+                   operator, expression, operand), start, end)),
+               start, end);
+    } else {
+      return setLocation(new ExpressionStatement(expression), start, expression);
+    }
+  }
+
+  // if_stmt ::= IF expr ':' suite [ELIF expr ':' suite]* [ELSE ':' suite]?
+  private void parseIfStatement(List<Statement> list) {
+    int start = token.left;
+    List<ConditionalStatements> thenBlocks = new ArrayList<>();
+    thenBlocks.add(parseConditionalStatements(TokenKind.IF));
+    while (token.kind == TokenKind.ELIF) {
+      thenBlocks.add(parseConditionalStatements(TokenKind.ELIF));
+    }
+    List<Statement> elseBlock = new ArrayList<>();
+    if (token.kind == TokenKind.ELSE) {
+      expect(TokenKind.ELSE);
+      expect(TokenKind.COLON);
+      parseSuite(elseBlock);
+    }
+    Statement stmt = new IfStatement(thenBlocks, elseBlock);
+    list.add(setLocation(stmt, start, token.right));
+  }
+
+  // cond_stmts ::= [EL]IF expr ':' suite
+  private ConditionalStatements parseConditionalStatements(TokenKind tokenKind) {
+    int start = token.left;
+    expect(tokenKind);
+    Expression expr = parseExpression();
+    expect(TokenKind.COLON);
+    List<Statement> thenBlock = new ArrayList<>();
+    parseSuite(thenBlock);
+    ConditionalStatements stmt = new ConditionalStatements(expr, thenBlock);
+    return setLocation(stmt, start, token.right);
+  }
+
+  // for_stmt ::= FOR IDENTIFIER IN expr ':' suite
+  private void parseForStatement(List<Statement> list) {
+    int start = token.left;
+    expect(TokenKind.FOR);
+    Ident ident = parseIdent();
+    expect(TokenKind.IN);
+    Expression collection = parseExpression();
+    expect(TokenKind.COLON);
+    List<Statement> block = new ArrayList<>();
+    parseSuite(block);
+    Statement stmt = new ForStatement(ident, collection, block);
+    list.add(setLocation(stmt, start, token.right));
+  }
+
+  // def foo(bar1, bar2):
+  private void parseFunctionDefStatement(List<Statement> list) {
+    int start = token.left;
+    expect(TokenKind.DEF);
+    Ident ident = parseIdent();
+    expect(TokenKind.LPAREN);
+    // parsing the function arguments, at this point only identifiers
+    // TODO(bazel-team): support proper arguments with default values and kwargs
+    List<Argument> args = parseFunctionDefArguments();
+    expect(TokenKind.RPAREN);
+    expect(TokenKind.COLON);
+    List<Statement> block = new ArrayList<>();
+    parseSuite(block);
+    FunctionDefStatement stmt = new FunctionDefStatement(ident, args, block);
+    list.add(setLocation(stmt, start, token.right));
+  }
+
+  private List<Argument> parseFunctionDefArguments() {
+    List<Argument> args = new ArrayList<>();
+    Set<String> argNames = new HashSet<>();
+    boolean onlyOptional = false;
+    while (token.kind != TokenKind.RPAREN) {
+      Argument arg = parseFunctionDefArgument(onlyOptional);
+      if (arg.hasValue()) {
+        onlyOptional = true;
+      }
+      args.add(arg);
+      if (argNames.contains(arg.getArgName())) {
+        reportError(lexer.createLocation(token.left, token.right),
+            "duplicate argument name in function definition");
+      }
+      argNames.add(arg.getArgName());
+      if (token.kind == TokenKind.COMMA) {
+        nextToken();
+      } else {
+        break;
+      }
+    }
+    return args;
+  }
+
+  // suite ::= simple_stmt
+  //         | NEWLINE INDENT stmt+ OUTDENT
+  private void parseSuite(List<Statement> list) {
+    if (token.kind == TokenKind.NEWLINE) {
+      expect(TokenKind.NEWLINE);
+      if (token.kind != TokenKind.INDENT) {
+        reportError(lexer.createLocation(token.left, token.right),
+                    "expected an indented block");
+        return;
+      }
+      expect(TokenKind.INDENT);
+      while (token.kind != TokenKind.OUTDENT && token.kind != TokenKind.EOF) {
+        parseStatement(list, false);
+      }
+      expect(TokenKind.OUTDENT);
+    } else {
+      Statement stmt = parseSmallStatement();
+      list.add(stmt);
+      expect(TokenKind.NEWLINE);
+    }
+  }
+
+  // skipSuite does not check that the code is syntactically correct, it
+  // just skips based on indentation levels.
+  private void skipSuite() {
+    if (token.kind == TokenKind.NEWLINE) {
+      expect(TokenKind.NEWLINE);
+      if (token.kind != TokenKind.INDENT) {
+        reportError(lexer.createLocation(token.left, token.right),
+                    "expected an indented block");
+        return;
+      }
+      expect(TokenKind.INDENT);
+
+      // Don't try to parse all the Python syntax, just skip the block
+      // until the corresponding outdent token.
+      int depth = 1;
+      while (depth > 0) {
+        // Because of the way the lexer works, this should never happen
+        Preconditions.checkState(token.kind != TokenKind.EOF);
+
+        if (token.kind == TokenKind.INDENT) {
+          depth++;
+        }
+        if (token.kind == TokenKind.OUTDENT) {
+          depth--;
+        }
+        nextToken();
+      }
+
+    } else {
+      // the block ends at the newline token
+      // e.g.  if x == 3: print "three"
+      syncTo(STATEMENT_TERMINATOR_SET);
+    }
+  }
+
+  // stmt ::= simple_stmt
+  //        | compound_stmt
+  private void parseStatement(List<Statement> list, boolean isTopLevel) {
+    if (token.kind == TokenKind.DEF && skylarkMode) {
+      if (!isTopLevel) {
+        reportError(lexer.createLocation(token.left, token.right),
+            "nested functions are not allowed. Move the function to top-level");
+      }
+      parseFunctionDefStatement(list);
+    } else if (token.kind == TokenKind.IF && skylarkMode) {
+      parseIfStatement(list);
+    } else if (token.kind == TokenKind.FOR && skylarkMode) {
+      if (isTopLevel) {
+        reportError(lexer.createLocation(token.left, token.right),
+            "for loops are not allowed on top-level. Put it into a function");
+      }
+      parseForStatement(list);
+    } else if (token.kind == TokenKind.IF
+        || token.kind == TokenKind.ELSE
+        || token.kind == TokenKind.FOR
+        || token.kind == TokenKind.CLASS
+        || token.kind == TokenKind.DEF
+        || token.kind == TokenKind.TRY) {
+      skipBlock();
+    } else {
+      parseSimpleStatement(list);
+    }
+  }
+
+  // return_stmt ::= RETURN expr
+  private ReturnStatement parseReturnStatement() {
+    int start = token.left;
+    expect(TokenKind.RETURN);
+    Expression expression = parseExpression();
+    return setLocation(new ReturnStatement(expression), start, expression);
+  }
+
+  // block ::= ('if' | 'for' | 'class') expr ':' suite
+  private void skipBlock() {
+    int start = token.left;
+    Token blockToken = token;
+    syncTo(EnumSet.of(TokenKind.COLON, TokenKind.EOF)); // skip over expression or name
+    if (!parsePython) {
+      reportError(lexer.createLocation(start, token.right), "syntax error at '"
+                  + blockToken + "': This Python-style construct is not supported. "
+                  + PREPROCESSING_NEEDED);
+    }
+    expect(TokenKind.COLON);
+    skipSuite();
+  }
+
+  // create a comment node
+  private void makeComment(Token token) {
+    comments.add(setLocation(new Comment((String) token.value), token.left, token.right));
+  }
+}