diff options
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/syntax/Parser.java')
-rw-r--r-- | src/main/java/com/google/devtools/build/lib/syntax/Parser.java | 1274 |
1 files changed, 1274 insertions, 0 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Parser.java b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java new file mode 100644 index 0000000000..66c3c6729f --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/syntax/Parser.java @@ -0,0 +1,1274 @@ +// Copyright 2014 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.syntax; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.devtools.build.lib.events.Event; +import com.google.devtools.build.lib.events.EventHandler; +import com.google.devtools.build.lib.events.Location; +import com.google.devtools.build.lib.packages.CachingPackageLocator; +import com.google.devtools.build.lib.syntax.DictionaryLiteral.DictionaryEntryLiteral; +import com.google.devtools.build.lib.syntax.IfStatement.ConditionalStatements; +import com.google.devtools.build.lib.vfs.Path; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Recursive descent parser for LL(2) BUILD language. + * Loosely based on Python 2 grammar. + * See https://docs.python.org/2/reference/grammar.html + * + */ +class Parser { + + /** + * Combines the parser result into a single value object. + */ + public static final class ParseResult { + /** The statements (rules, basically) from the parsed file. */ + public final List<Statement> statements; + + /** The comments from the parsed file. */ + public final List<Comment> comments; + + /** Whether the file contained any errors. */ + public final boolean containsErrors; + + public ParseResult(List<Statement> statements, List<Comment> comments, boolean containsErrors) { + // No need to copy here; when the object is created, the parser instance is just about to go + // out of scope and be garbage collected. + this.statements = Preconditions.checkNotNull(statements); + this.comments = Preconditions.checkNotNull(comments); + this.containsErrors = containsErrors; + } + } + + private static final EnumSet<TokenKind> STATEMENT_TERMINATOR_SET = + EnumSet.of(TokenKind.EOF, TokenKind.NEWLINE); + + private static final EnumSet<TokenKind> LIST_TERMINATOR_SET = + EnumSet.of(TokenKind.EOF, TokenKind.RBRACKET, TokenKind.SEMI); + + private static final EnumSet<TokenKind> DICT_TERMINATOR_SET = + EnumSet.of(TokenKind.EOF, TokenKind.RBRACE, TokenKind.SEMI); + + private static final EnumSet<TokenKind> EXPR_TERMINATOR_SET = EnumSet.of( + TokenKind.EOF, + TokenKind.COMMA, + TokenKind.COLON, + TokenKind.FOR, + TokenKind.PLUS, + TokenKind.MINUS, + TokenKind.PERCENT, + TokenKind.RPAREN, + TokenKind.RBRACKET); + + private Token token; // current lookahead token + private Token pushedToken = null; // used to implement LL(2) + + private static final boolean DEBUGGING = false; + + private final Lexer lexer; + private final EventHandler eventHandler; + private final List<Comment> comments; + private final boolean parsePython; + /** Whether advanced language constructs are allowed */ + private boolean skylarkMode = false; + + private static final Map<TokenKind, Operator> binaryOperators = + new ImmutableMap.Builder<TokenKind, Operator>() + .put(TokenKind.AND, Operator.AND) + .put(TokenKind.EQUALS_EQUALS, Operator.EQUALS_EQUALS) + .put(TokenKind.GREATER, Operator.GREATER) + .put(TokenKind.GREATER_EQUALS, Operator.GREATER_EQUALS) + .put(TokenKind.IN, Operator.IN) + .put(TokenKind.LESS, Operator.LESS) + .put(TokenKind.LESS_EQUALS, Operator.LESS_EQUALS) + .put(TokenKind.MINUS, Operator.MINUS) + .put(TokenKind.NOT_EQUALS, Operator.NOT_EQUALS) + .put(TokenKind.OR, Operator.OR) + .put(TokenKind.PERCENT, Operator.PERCENT) + .put(TokenKind.PLUS, Operator.PLUS) + .put(TokenKind.STAR, Operator.MULT) + .build(); + + private static final Map<TokenKind, Operator> augmentedAssignmentMethods = + new ImmutableMap.Builder<TokenKind, Operator>() + .put(TokenKind.PLUS_EQUALS, Operator.PLUS) // += // TODO(bazel-team): other similar operators + .build(); + + /** Highest precedence goes last. + * Based on: http://docs.python.org/2/reference/expressions.html#operator-precedence + **/ + private static final List<EnumSet<Operator>> operatorPrecedence = ImmutableList.of( + EnumSet.of(Operator.OR), + EnumSet.of(Operator.AND), + EnumSet.of(Operator.NOT), + EnumSet.of(Operator.EQUALS_EQUALS, Operator.NOT_EQUALS, Operator.LESS, Operator.LESS_EQUALS, + Operator.GREATER, Operator.GREATER_EQUALS, Operator.IN), + EnumSet.of(Operator.MINUS, Operator.PLUS), + EnumSet.of(Operator.MULT, Operator.PERCENT)); + + private Iterator<Token> tokens = null; + private int errorsCount; + private boolean recoveryMode; // stop reporting errors until next statement + + private CachingPackageLocator locator; + + private List<Path> includedFiles; + + private static final String PREPROCESSING_NEEDED = + "Add \"# PYTHON-PREPROCESSING-REQUIRED\" on the first line of the file"; + + private Parser(Lexer lexer, EventHandler eventHandler, CachingPackageLocator locator, + boolean parsePython) { + this.lexer = lexer; + this.eventHandler = eventHandler; + this.parsePython = parsePython; + this.tokens = lexer.getTokens().iterator(); + this.comments = new ArrayList<Comment>(); + this.locator = locator; + this.includedFiles = new ArrayList<Path>(); + this.includedFiles.add(lexer.getFilename()); + nextToken(); + } + + private Parser(Lexer lexer, EventHandler eventHandler, CachingPackageLocator locator) { + this(lexer, eventHandler, locator, false /* parsePython */); + } + + public Parser setSkylarkMode(boolean skylarkMode) { + this.skylarkMode = skylarkMode; + return this; + } + + /** + * Entry-point to parser that parses a build file with comments. All errors + * encountered during parsing are reported via "reporter". + */ + public static ParseResult parseFile( + Lexer lexer, EventHandler eventHandler, CachingPackageLocator locator, + boolean parsePython) { + Parser parser = new Parser(lexer, eventHandler, locator, parsePython); + List<Statement> statements = parser.parseFileInput(); + return new ParseResult(statements, parser.comments, + parser.errorsCount > 0 || lexer.containsErrors()); + } + + /** + * Entry-point to parser that parses a build file with comments. All errors + * encountered during parsing are reported via "reporter". Enable Skylark extensions + * that are not part of the core BUILD language. + */ + public static ParseResult parseFileForSkylark( + Lexer lexer, EventHandler eventHandler, CachingPackageLocator locator, + ValidationEnvironment validationEnvironment) { + Parser parser = new Parser(lexer, eventHandler, locator).setSkylarkMode(true); + List<Statement> statements = parser.parseFileInput(); + boolean hasSemanticalErrors = false; + try { + for (Statement statement : statements) { + statement.validate(validationEnvironment); + } + } catch (EvalException e) { + eventHandler.handle(Event.error(e.getLocation(), e.getMessage())); + hasSemanticalErrors = true; + } + return new ParseResult(statements, parser.comments, + parser.errorsCount > 0 || lexer.containsErrors() || hasSemanticalErrors); + } + + /** + * Entry-point to parser that parses a statement. All errors encountered + * during parsing are reported via "reporter". + */ + @VisibleForTesting + public static Statement parseStatement( + Lexer lexer, EventHandler eventHandler) { + return new Parser(lexer, eventHandler, null).parseSmallStatement(); + } + + /** + * Entry-point to parser that parses an expression. All errors encountered + * during parsing are reported via "reporter". The expression may be followed + * by newline tokens. + */ + @VisibleForTesting + public static Expression parseExpression(Lexer lexer, EventHandler eventHandler) { + Parser parser = new Parser(lexer, eventHandler, null); + Expression result = parser.parseExpression(); + while (parser.token.kind == TokenKind.NEWLINE) { + parser.nextToken(); + } + parser.expect(TokenKind.EOF); + return result; + } + + private void addIncludedFiles(List<Path> files) { + this.includedFiles.addAll(files); + } + + private void reportError(Location location, String message) { + errorsCount++; + // Limit the number of reported errors to avoid spamming output. + if (errorsCount <= 5) { + eventHandler.handle(Event.error(location, message)); + } + } + + private void syntaxError(Token token) { + if (!recoveryMode) { + String msg = token.kind == TokenKind.INDENT + ? "indentation error" + : "syntax error at '" + token + "'"; + reportError(lexer.createLocation(token.left, token.right), msg); + recoveryMode = true; + } + } + + // Consumes the current token. If it is not of the specified (expected) + // kind, reports a syntax error. + private boolean expect(TokenKind kind) { + boolean expected = token.kind == kind; + if (!expected) { + syntaxError(token); + } + nextToken(); + return expected; + } + + /** + * Consume tokens past the first token that has a kind that is in the set of + * teminatingTokens. + * @param terminatingTokens + * @return the end offset of the terminating token. + */ + private int syncPast(EnumSet<TokenKind> terminatingTokens) { + Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF)); + while (!terminatingTokens.contains(token.kind)) { + nextToken(); + } + int end = token.right; + // read past the synchronization token + nextToken(); + return end; + } + + /** + * Consume tokens until we reach the first token that has a kind that is in + * the set of teminatingTokens. + * @param terminatingTokens + * @return the end offset of the terminating token. + */ + private int syncTo(EnumSet<TokenKind> terminatingTokens) { + // EOF must be in the set to prevent an infinite loop + Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF)); + // read past the problematic token + int previous = token.right; + nextToken(); + int current = previous; + while (!terminatingTokens.contains(token.kind)) { + nextToken(); + previous = current; + current = token.right; + } + return previous; + } + + private void nextToken() { + if (pushedToken != null) { + token = pushedToken; + pushedToken = null; + } else { + if (token == null || token.kind != TokenKind.EOF) { + token = tokens.next(); + // transparently handle comment tokens + while (token.kind == TokenKind.COMMENT) { + makeComment(token); + token = tokens.next(); + } + } + } + if (DEBUGGING) { + System.err.print(token); + } + } + + private void pushToken(Token tokenToPush) { + if (pushedToken != null) { + throw new IllegalStateException("Exceeded LL(2) lookahead!"); + } + pushedToken = token; + token = tokenToPush; + } + + // create an error expression + private Ident makeErrorExpression(int start, int end) { + return setLocation(new Ident("$error$"), start, end); + } + + // Convenience wrapper around ASTNode.setLocation that returns the node. + private <NODE extends ASTNode> NODE + setLocation(NODE node, int startOffset, int endOffset) { + node.setLocation(lexer.createLocation(startOffset, endOffset)); + return node; + } + + // Another convenience wrapper method around ASTNode.setLocation + private <NODE extends ASTNode> NODE setLocation(NODE node, Location location) { + node.setLocation(location); + return node; + } + + // Convenience method that uses end offset from the last node. + private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, ASTNode lastNode) { + return setLocation(node, startOffset, lastNode.getLocation().getEndOffset()); + } + + // create a funcall expression + private Expression makeFuncallExpression(Expression receiver, Ident function, + List<Argument> args, + int start, int end) { + if (function.getLocation() == null) { + function = setLocation(function, start, end); + } + boolean seenKeywordArg = false; + boolean seenKwargs = false; + for (Argument arg : args) { + if (arg.isPositional()) { + if (seenKeywordArg || seenKwargs) { + reportError(arg.getLocation(), "syntax error: non-keyword arg after keyword arg"); + return makeErrorExpression(start, end); + } + } else if (arg.isKwargs()) { + if (seenKwargs) { + reportError(arg.getLocation(), "there can be only one **kwargs argument"); + return makeErrorExpression(start, end); + } + seenKwargs = true; + } else { + seenKeywordArg = true; + } + } + + return setLocation(new FuncallExpression(receiver, function, args), start, end); + } + + // arg ::= IDENTIFIER '=' expr + // | expr + private Argument parseFunctionCallArgument() { + int start = token.left; + if (token.kind == TokenKind.IDENTIFIER) { + Token identToken = token; + String name = (String) token.value; + Ident ident = setLocation(new Ident(name), start, token.right); + nextToken(); + if (token.kind == TokenKind.EQUALS) { // it's a named argument + nextToken(); + Expression expr = parseExpression(); + return setLocation(new Argument(ident, expr), start, expr); + } else { // oops, back up! + pushToken(identToken); + } + } + // parse **expr + if (token.kind == TokenKind.STAR) { + expect(TokenKind.STAR); + expect(TokenKind.STAR); + Expression expr = parseExpression(); + return setLocation(new Argument(null, expr, true), start, expr); + } + // parse a positional argument + Expression expr = parseExpression(); + return setLocation(new Argument(expr), start, expr); + } + + // arg ::= IDENTIFIER '=' expr + // | IDENTIFIER + private Argument parseFunctionDefArgument(boolean onlyOptional) { + int start = token.left; + Ident ident = parseIdent(); + if (token.kind == TokenKind.EQUALS) { // there's a default value + nextToken(); + Expression expr = parseExpression(); + return setLocation(new Argument(ident, expr), start, expr); + } else if (onlyOptional) { + reportError(ident.getLocation(), + "Optional arguments are only allowed at the end of the argument list."); + } + return setLocation(new Argument(ident), start, ident); + } + + // funcall_suffix ::= '(' arg_list? ')' + private Expression parseFuncallSuffix(int start, Expression receiver, + Ident function) { + List<Argument> args = Collections.emptyList(); + expect(TokenKind.LPAREN); + int end; + if (token.kind == TokenKind.RPAREN) { + end = token.right; + nextToken(); // RPAREN + } else { + args = parseFunctionCallArguments(); // (includes optional trailing comma) + end = token.right; + expect(TokenKind.RPAREN); + } + return makeFuncallExpression(receiver, function, args, start, end); + } + + // selector_suffix ::= '.' IDENTIFIER + // |'.' IDENTIFIER funcall_suffix + private Expression parseSelectorSuffix(int start, Expression receiver) { + expect(TokenKind.DOT); + if (token.kind == TokenKind.IDENTIFIER) { + Ident ident = parseIdent(); + if (token.kind == TokenKind.LPAREN) { + return parseFuncallSuffix(start, receiver, ident); + } else { + return setLocation(new DotExpression(receiver, ident), start, token.right); + } + } else { + syntaxError(token); + int end = syncTo(EXPR_TERMINATOR_SET); + return makeErrorExpression(start, end); + } + } + + // arg_list ::= ( (arg ',')* arg ','? )? + private List<Argument> parseFunctionCallArguments() { + List<Argument> args = new ArrayList<>(); + // terminating tokens for an arg list + while (token.kind != TokenKind.RPAREN) { + if (token.kind == TokenKind.EOF) { + syntaxError(token); + break; + } + args.add(parseFunctionCallArgument()); + if (token.kind == TokenKind.COMMA) { + nextToken(); + } else { + break; + } + } + return args; + } + + // expr_list ::= ( (expr ',')* expr ','? )? + private List<Expression> parseExprList() { + List<Expression> list = new ArrayList<>(); + // terminating tokens for an expression list + while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.RBRACKET) { + list.add(parseExpression()); + if (token.kind == TokenKind.COMMA) { + nextToken(); + } else { + break; + } + } + return list; + } + + // dict_entry_list ::= ( (dict_entry ',')* dict_entry ','? )? + private List<DictionaryEntryLiteral> parseDictEntryList() { + List<DictionaryEntryLiteral> list = new ArrayList<>(); + // the terminating token for a dict entry list + while (token.kind != TokenKind.RBRACE) { + list.add(parseDictEntry()); + if (token.kind == TokenKind.COMMA) { + nextToken(); + } else { + break; + } + } + return list; + } + + // dict_entry ::= expression ':' expression + private DictionaryEntryLiteral parseDictEntry() { + int start = token.left; + Expression key = parseExpression(); + expect(TokenKind.COLON); + Expression value = parseExpression(); + return setLocation(new DictionaryEntryLiteral(key, value), start, value); + } + + private ExpressionStatement mocksubincludeExpression( + String labelName, String file, Location location) { + List<Argument> args = new ArrayList<>(); + args.add(setLocation(new Argument(new StringLiteral(labelName, '"')), location)); + args.add(setLocation(new Argument(new StringLiteral(file, '"')), location)); + Ident mockIdent = setLocation(new Ident("mocksubinclude"), location); + Expression funCall = new FuncallExpression(null, mockIdent, args); + return setLocation(new ExpressionStatement(funCall), location); + } + + // parse a file from an include call + private void include(String labelName, List<Statement> list, Location location) { + if (locator == null) { + return; + } + + try { + Label label = Label.parseAbsolute(labelName); + String packageName = label.getPackageFragment().getPathString(); + Path packagePath = locator.getBuildFileForPackage(packageName); + if (packagePath == null) { + reportError(location, "Package '" + packageName + "' not found"); + list.add(mocksubincludeExpression(labelName, "", location)); + return; + } + Path path = packagePath.getParentDirectory(); + Path file = path.getRelative(label.getName()); + + if (this.includedFiles.contains(file)) { + reportError(location, "Recursive inclusion of file '" + path + "'"); + return; + } + ParserInputSource inputSource = ParserInputSource.create(file); + + // Insert call to the mocksubinclude function to get the dependencies right. + list.add(mocksubincludeExpression(labelName, file.toString(), location)); + + Lexer lexer = new Lexer(inputSource, eventHandler, parsePython); + Parser parser = new Parser(lexer, eventHandler, locator, parsePython); + parser.addIncludedFiles(this.includedFiles); + list.addAll(parser.parseFileInput()); + } catch (Label.SyntaxException e) { + reportError(location, "Invalid label '" + labelName + "'"); + } catch (IOException e) { + reportError(location, "Include of '" + labelName + "' failed: " + e.getMessage()); + list.add(mocksubincludeExpression(labelName, "", location)); + } + } + + // primary ::= INTEGER + // | STRING + // | STRING '.' IDENTIFIER funcall_suffix + // | IDENTIFIER + // | IDENTIFIER funcall_suffix + // | IDENTIFIER '.' selector_suffix + // | list_expression + // | '(' ')' // a tuple with zero elements + // | '(' expr ')' // a parenthesized expression + // | '(' expr ',' expr_list ')' // a tuple with n elements + // | dict_expression + // | '-' primary_with_suffix + private Expression parsePrimary() { + int start = token.left; + switch (token.kind) { + case INT: { + IntegerLiteral literal = new IntegerLiteral((Integer) token.value); + setLocation(literal, start, token.right); + nextToken(); + return literal; + } + case STRING: { + String value = (String) token.value; + int end = token.right; + char quoteChar = lexer.charAt(start); + nextToken(); + if (token.kind == TokenKind.STRING) { + reportError(lexer.createLocation(end, token.left), + "Implicit string concatenation is forbidden, use the + operator"); + } + StringLiteral literal = new StringLiteral(value, quoteChar); + setLocation(literal, start, end); + return literal; + } + case IDENTIFIER: { + Ident ident = parseIdent(); + if (token.kind == TokenKind.LPAREN) { // it's a function application + return parseFuncallSuffix(start, null, ident); + } else { + return ident; + } + } + case LBRACKET: { // it's a list + return parseListExpression(); + } + case LBRACE: { // it's a dictionary + return parseDictExpression(); + } + case LPAREN: { + nextToken(); + // check for the empty tuple literal + if (token.kind == TokenKind.RPAREN) { + ListLiteral literal = + ListLiteral.makeTuple(Collections.<Expression>emptyList()); + setLocation(literal, start, token.right); + nextToken(); + return literal; + } + // parse the first expression + Expression expression = parseExpression(); + if (token.kind == TokenKind.COMMA) { // it's a tuple + nextToken(); + // parse the rest of the expression tuple + List<Expression> tuple = parseExprList(); + // add the first expression to the front of the tuple + tuple.add(0, expression); + expect(TokenKind.RPAREN); + return setLocation( + ListLiteral.makeTuple(tuple), start, token.right); + } + setLocation(expression, start, token.right); + if (token.kind == TokenKind.RPAREN) { + nextToken(); + return expression; + } + syntaxError(token); + int end = syncTo(EXPR_TERMINATOR_SET); + return makeErrorExpression(start, end); + } + case MINUS: { + nextToken(); + + List<Argument> args = new ArrayList<>(); + Expression expr = parsePrimaryWithSuffix(); + args.add(setLocation(new Argument(expr), start, expr)); + return makeFuncallExpression(null, new Ident("-"), args, + start, token.right); + } + default: { + syntaxError(token); + int end = syncTo(EXPR_TERMINATOR_SET); + return makeErrorExpression(start, end); + } + } + } + + // primary_with_suffix ::= primary selector_suffix* + // | primary substring_suffix + private Expression parsePrimaryWithSuffix() { + int start = token.left; + Expression receiver = parsePrimary(); + while (true) { + if (token.kind == TokenKind.DOT) { + receiver = parseSelectorSuffix(start, receiver); + } else if (token.kind == TokenKind.LBRACKET) { + receiver = parseSubstringSuffix(start, receiver); + } else { + break; + } + } + return receiver; + } + + // substring_suffix ::= '[' expression? ':' expression? ']' + private Expression parseSubstringSuffix(int start, Expression receiver) { + List<Argument> args = new ArrayList<>(); + Expression startExpr; + Expression endExpr; + + expect(TokenKind.LBRACKET); + int loc1 = token.left; + if (token.kind == TokenKind.COLON) { + startExpr = setLocation(new IntegerLiteral(0), token.left, token.right); + } else { + startExpr = parseExpression(); + } + args.add(setLocation(new Argument(startExpr), loc1, startExpr)); + // This is a dictionary access + if (token.kind == TokenKind.RBRACKET) { + expect(TokenKind.RBRACKET); + return makeFuncallExpression(receiver, new Ident("$index"), args, + start, token.right); + } + // This is a substring + expect(TokenKind.COLON); + int loc2 = token.left; + if (token.kind == TokenKind.RBRACKET) { + endExpr = setLocation(new IntegerLiteral(Integer.MAX_VALUE), token.left, token.right); + } else { + endExpr = parseExpression(); + } + expect(TokenKind.RBRACKET); + + args.add(setLocation(new Argument(endExpr), loc2, endExpr)); + return makeFuncallExpression(receiver, new Ident("$substring"), args, + start, token.right); + } + + // loop_variables ::= '(' variables ')' + // | variables + // variables ::= ident (',' ident)* + private Ident parseForLoopVariables() { + int start = token.left; + boolean hasParen = false; + if (token.kind == TokenKind.LPAREN) { + hasParen = true; + nextToken(); + } + + // TODO(bazel-team): allow multiple variables in the core Blaze language too. + Ident firstIdent = parseIdent(); + boolean multipleVariables = false; + + while (token.kind == TokenKind.COMMA) { + multipleVariables = true; + nextToken(); + parseIdent(); + } + + if (hasParen) { + expect(TokenKind.RPAREN); + } + + int end = token.right; + if (multipleVariables && !parsePython) { + reportError(lexer.createLocation(start, end), + "For loops with multiple variables are not yet supported. " + + PREPROCESSING_NEEDED); + } + return multipleVariables ? makeErrorExpression(start, end) : firstIdent; + } + + // list_expression ::= '[' ']' + // |'[' expr ']' + // |'[' expr ',' expr_list ']' + // |'[' expr ('FOR' loop_variables 'IN' expr)+ ']' + private Expression parseListExpression() { + int start = token.left; + expect(TokenKind.LBRACKET); + if (token.kind == TokenKind.RBRACKET) { // empty List + ListLiteral literal = + ListLiteral.makeList(Collections.<Expression>emptyList()); + setLocation(literal, start, token.right); + nextToken(); + return literal; + } + Expression expression = parseExpression(); + Preconditions.checkNotNull(expression, + "null element in list in AST at %s:%s", token.left, token.right); + switch (token.kind) { + case RBRACKET: { // singleton List + ListLiteral literal = + ListLiteral.makeList(Collections.singletonList(expression)); + setLocation(literal, start, token.right); + nextToken(); + return literal; + } + case FOR: { // list comprehension + ListComprehension listComprehension = + new ListComprehension(expression); + do { + nextToken(); + Ident ident = parseForLoopVariables(); + if (token.kind == TokenKind.IN) { + nextToken(); + Expression listExpression = parseExpression(); + listComprehension.add(ident, listExpression); + } else { + break; + } + if (token.kind == TokenKind.RBRACKET) { + setLocation(listComprehension, start, token.right); + nextToken(); + return listComprehension; + } + } while (token.kind == TokenKind.FOR); + + syntaxError(token); + int end = syncPast(LIST_TERMINATOR_SET); + return makeErrorExpression(start, end); + } + case COMMA: { + nextToken(); + List<Expression> list = parseExprList(); + Preconditions.checkState(!list.contains(null), + "null element in list in AST at %s:%s", token.left, token.right); + list.add(0, expression); + if (token.kind == TokenKind.RBRACKET) { + ListLiteral literal = ListLiteral.makeList(list); + setLocation(literal, start, token.right); + nextToken(); + return literal; + } + syntaxError(token); + int end = syncPast(LIST_TERMINATOR_SET); + return makeErrorExpression(start, end); + } + default: { + syntaxError(token); + int end = syncPast(LIST_TERMINATOR_SET); + return makeErrorExpression(start, end); + } + } + } + + // dict_expression ::= '{' '}' + // |'{' dict_entry_list '}' + // |'{' dict_entry 'FOR' loop_variables 'IN' expr '}' + private Expression parseDictExpression() { + int start = token.left; + expect(TokenKind.LBRACE); + if (token.kind == TokenKind.RBRACE) { // empty List + DictionaryLiteral literal = + new DictionaryLiteral(ImmutableList.<DictionaryEntryLiteral>of()); + setLocation(literal, start, token.right); + nextToken(); + return literal; + } + DictionaryEntryLiteral entry = parseDictEntry(); + if (token.kind == TokenKind.FOR) { + // Dict comprehension + nextToken(); + Ident loopVar = parseForLoopVariables(); + expect(TokenKind.IN); + Expression listExpression = parseExpression(); + expect(TokenKind.RBRACE); + return setLocation(new DictComprehension( + entry.getKey(), entry.getValue(), loopVar, listExpression), start, token.right); + } + List<DictionaryEntryLiteral> entries = new ArrayList<>(); + entries.add(entry); + if (token.kind == TokenKind.COMMA) { + expect(TokenKind.COMMA); + entries.addAll(parseDictEntryList()); + } + if (token.kind == TokenKind.RBRACE) { + DictionaryLiteral literal = new DictionaryLiteral(entries); + setLocation(literal, start, token.right); + nextToken(); + return literal; + } + syntaxError(token); + int end = syncPast(DICT_TERMINATOR_SET); + return makeErrorExpression(start, end); + } + + private Ident parseIdent() { + if (token.kind != TokenKind.IDENTIFIER) { + syntaxError(token); + return makeErrorExpression(token.left, token.right); + } + Ident ident = new Ident(((String) token.value)); + setLocation(ident, token.left, token.right); + nextToken(); + return ident; + } + + // binop_expression ::= binop_expression OP binop_expression + // | parsePrimaryWithSuffix + // This function takes care of precedence between operators (see operatorPrecedence for + // the order), and it assumes left-to-right associativity. + private Expression parseBinOpExpression(int prec) { + int start = token.left; + Expression expr = parseExpression(prec + 1); + // The loop is not strictly needed, but it prevents risks of stack overflow. Depth is + // limited to number of different precedence levels (operatorPrecedence.size()). + for (;;) { + if (!binaryOperators.containsKey(token.kind)) { + return expr; + } + Operator operator = binaryOperators.get(token.kind); + if (!operatorPrecedence.get(prec).contains(operator)) { + return expr; + } + nextToken(); + Expression secondary = parseExpression(prec + 1); + expr = optimizeBinOpExpression(operator, expr, secondary); + setLocation(expr, start, secondary); + } + } + + // Optimize binary expressions. + // string literal + string literal can be concatenated into one string literal + // so we don't have to do the expensive string concatenation at runtime. + private Expression optimizeBinOpExpression( + Operator operator, Expression expr, Expression secondary) { + if (operator == Operator.PLUS) { + if (expr instanceof StringLiteral && secondary instanceof StringLiteral) { + StringLiteral left = (StringLiteral) expr; + StringLiteral right = (StringLiteral) secondary; + if (left.getQuoteChar() == right.getQuoteChar()) { + return new StringLiteral(left.getValue() + right.getValue(), left.getQuoteChar()); + } + } + } + return new BinaryOperatorExpression(operator, expr, secondary); + } + + private Expression parseExpression() { + return parseExpression(0); + } + + private Expression parseExpression(int prec) { + if (prec >= operatorPrecedence.size()) { + return parsePrimaryWithSuffix(); + } + if (token.kind == TokenKind.NOT && operatorPrecedence.get(prec).contains(Operator.NOT)) { + return parseNotExpression(prec); + } + return parseBinOpExpression(prec); + } + + // not_expr :== 'not' expr + private Expression parseNotExpression(int prec) { + int start = token.left; + expect(TokenKind.NOT); + Expression expression = parseExpression(prec + 1); + NotExpression notExpression = new NotExpression(expression); + return setLocation(notExpression, start, token.right); + } + + // file_input ::= ('\n' | stmt)* EOF + private List<Statement> parseFileInput() { + List<Statement> list = new ArrayList<>(); + while (token.kind != TokenKind.EOF) { + if (token.kind == TokenKind.NEWLINE) { + expect(TokenKind.NEWLINE); + } else { + parseTopLevelStatement(list); + } + } + return list; + } + + // load(STRING (COMMA STRING)*) + private void parseLoad(List<Statement> list) { + int start = token.left; + if (token.kind != TokenKind.STRING) { + expect(TokenKind.STRING); + return; + } + String path = (String) token.value; + nextToken(); + expect(TokenKind.COMMA); + + List<Ident> symbols = new ArrayList<>(); + if (token.kind == TokenKind.STRING) { + symbols.add(new Ident((String) token.value)); + } + expect(TokenKind.STRING); + while (token.kind == TokenKind.COMMA) { + expect(TokenKind.COMMA); + if (token.kind == TokenKind.STRING) { + symbols.add(new Ident((String) token.value)); + } + expect(TokenKind.STRING); + } + expect(TokenKind.RPAREN); + list.add(setLocation(new LoadStatement(path, symbols), start, token.left)); + } + + private void parseTopLevelStatement(List<Statement> list) { + // In Python grammar, there is no "top-level statement" and imports are + // considered as "small statements". We are a bit stricter than Python here. + int start = token.left; + + // Check if there is an include + if (token.kind == TokenKind.IDENTIFIER) { + Token identToken = token; + Ident ident = parseIdent(); + + if (ident.getName().equals("include") && token.kind == TokenKind.LPAREN && !skylarkMode) { + expect(TokenKind.LPAREN); + if (token.kind == TokenKind.STRING) { + include((String) token.value, list, lexer.createLocation(start, token.right)); + } + expect(TokenKind.STRING); + expect(TokenKind.RPAREN); + return; + } else if (ident.getName().equals("load") && token.kind == TokenKind.LPAREN) { + expect(TokenKind.LPAREN); + parseLoad(list); + return; + } + pushToken(identToken); // push the ident back to parse it as a statement + } + parseStatement(list, true); + } + + // simple_stmt ::= small_stmt (';' small_stmt)* ';'? NEWLINE + private void parseSimpleStatement(List<Statement> list) { + list.add(parseSmallStatement()); + + while (token.kind == TokenKind.SEMI) { + nextToken(); + if (token.kind == TokenKind.NEWLINE) { + break; + } + list.add(parseSmallStatement()); + } + expect(TokenKind.NEWLINE); + // This is a safe place to recover: There is a new line at top-level + // and the parser is at the end of a statement. + recoveryMode = false; + } + + // small_stmt ::= assign_stmt + // | expr + // | RETURN expr + // assign_stmt ::= expr ('=' | augassign) expr + // augassign ::= ('+=' ) + // Note that these are in Python, but not implemented here (at least for now): + // '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |'<<=' | '>>=' | '**=' | '//=' + // Semantic difference from Python: + // In Skylark, x += y is simple syntactic sugar for x = x + y. + // In Python, x += y is more or less equivalent to x = x + y, but if a method is defined + // on x.__iadd__(y), then it takes precedence, and in the case of lists it side-effects + // the original list (it doesn't do that on tuples); if no such method is defined it falls back + // to the x.__add__(y) method that backs x + y. In Skylark, we don't support this side-effect. + // Note also that there is a special casing to translate 'ident[key] = value' + // to 'ident = ident + {key: value}'. This is needed to support the pure version of Python-like + // dictionary assignment syntax. + private Statement parseSmallStatement() { + int start = token.left; + if (token.kind == TokenKind.RETURN) { + return parseReturnStatement(); + } + Expression expression = parseExpression(); + if (token.kind == TokenKind.EQUALS) { + nextToken(); + Expression rvalue = parseExpression(); + if (expression instanceof FuncallExpression) { + FuncallExpression func = (FuncallExpression) expression; + if (func.getFunction().getName().equals("$index") && func.getObject() instanceof Ident) { + // Special casing to translate 'ident[key] = value' to 'ident = ident + {key: value}' + // Note that the locations of these extra expressions are fake. + Preconditions.checkArgument(func.getArguments().size() == 1); + DictionaryLiteral dictRValue = setLocation(new DictionaryLiteral(ImmutableList.of( + setLocation(new DictionaryEntryLiteral(func.getArguments().get(0).getValue(), rvalue), + start, token.right))), start, token.right); + BinaryOperatorExpression binExp = setLocation(new BinaryOperatorExpression( + Operator.PLUS, func.getObject(), dictRValue), start, token.right); + return setLocation(new AssignmentStatement(func.getObject(), binExp), start, token.right); + } + } + return setLocation(new AssignmentStatement(expression, rvalue), start, rvalue); + } else if (augmentedAssignmentMethods.containsKey(token.kind)) { + Operator operator = augmentedAssignmentMethods.get(token.kind); + nextToken(); + Expression operand = parseExpression(); + int end = operand.getLocation().getEndOffset(); + return setLocation(new AssignmentStatement(expression, + setLocation(new BinaryOperatorExpression( + operator, expression, operand), start, end)), + start, end); + } else { + return setLocation(new ExpressionStatement(expression), start, expression); + } + } + + // if_stmt ::= IF expr ':' suite [ELIF expr ':' suite]* [ELSE ':' suite]? + private void parseIfStatement(List<Statement> list) { + int start = token.left; + List<ConditionalStatements> thenBlocks = new ArrayList<>(); + thenBlocks.add(parseConditionalStatements(TokenKind.IF)); + while (token.kind == TokenKind.ELIF) { + thenBlocks.add(parseConditionalStatements(TokenKind.ELIF)); + } + List<Statement> elseBlock = new ArrayList<>(); + if (token.kind == TokenKind.ELSE) { + expect(TokenKind.ELSE); + expect(TokenKind.COLON); + parseSuite(elseBlock); + } + Statement stmt = new IfStatement(thenBlocks, elseBlock); + list.add(setLocation(stmt, start, token.right)); + } + + // cond_stmts ::= [EL]IF expr ':' suite + private ConditionalStatements parseConditionalStatements(TokenKind tokenKind) { + int start = token.left; + expect(tokenKind); + Expression expr = parseExpression(); + expect(TokenKind.COLON); + List<Statement> thenBlock = new ArrayList<>(); + parseSuite(thenBlock); + ConditionalStatements stmt = new ConditionalStatements(expr, thenBlock); + return setLocation(stmt, start, token.right); + } + + // for_stmt ::= FOR IDENTIFIER IN expr ':' suite + private void parseForStatement(List<Statement> list) { + int start = token.left; + expect(TokenKind.FOR); + Ident ident = parseIdent(); + expect(TokenKind.IN); + Expression collection = parseExpression(); + expect(TokenKind.COLON); + List<Statement> block = new ArrayList<>(); + parseSuite(block); + Statement stmt = new ForStatement(ident, collection, block); + list.add(setLocation(stmt, start, token.right)); + } + + // def foo(bar1, bar2): + private void parseFunctionDefStatement(List<Statement> list) { + int start = token.left; + expect(TokenKind.DEF); + Ident ident = parseIdent(); + expect(TokenKind.LPAREN); + // parsing the function arguments, at this point only identifiers + // TODO(bazel-team): support proper arguments with default values and kwargs + List<Argument> args = parseFunctionDefArguments(); + expect(TokenKind.RPAREN); + expect(TokenKind.COLON); + List<Statement> block = new ArrayList<>(); + parseSuite(block); + FunctionDefStatement stmt = new FunctionDefStatement(ident, args, block); + list.add(setLocation(stmt, start, token.right)); + } + + private List<Argument> parseFunctionDefArguments() { + List<Argument> args = new ArrayList<>(); + Set<String> argNames = new HashSet<>(); + boolean onlyOptional = false; + while (token.kind != TokenKind.RPAREN) { + Argument arg = parseFunctionDefArgument(onlyOptional); + if (arg.hasValue()) { + onlyOptional = true; + } + args.add(arg); + if (argNames.contains(arg.getArgName())) { + reportError(lexer.createLocation(token.left, token.right), + "duplicate argument name in function definition"); + } + argNames.add(arg.getArgName()); + if (token.kind == TokenKind.COMMA) { + nextToken(); + } else { + break; + } + } + return args; + } + + // suite ::= simple_stmt + // | NEWLINE INDENT stmt+ OUTDENT + private void parseSuite(List<Statement> list) { + if (token.kind == TokenKind.NEWLINE) { + expect(TokenKind.NEWLINE); + if (token.kind != TokenKind.INDENT) { + reportError(lexer.createLocation(token.left, token.right), + "expected an indented block"); + return; + } + expect(TokenKind.INDENT); + while (token.kind != TokenKind.OUTDENT && token.kind != TokenKind.EOF) { + parseStatement(list, false); + } + expect(TokenKind.OUTDENT); + } else { + Statement stmt = parseSmallStatement(); + list.add(stmt); + expect(TokenKind.NEWLINE); + } + } + + // skipSuite does not check that the code is syntactically correct, it + // just skips based on indentation levels. + private void skipSuite() { + if (token.kind == TokenKind.NEWLINE) { + expect(TokenKind.NEWLINE); + if (token.kind != TokenKind.INDENT) { + reportError(lexer.createLocation(token.left, token.right), + "expected an indented block"); + return; + } + expect(TokenKind.INDENT); + + // Don't try to parse all the Python syntax, just skip the block + // until the corresponding outdent token. + int depth = 1; + while (depth > 0) { + // Because of the way the lexer works, this should never happen + Preconditions.checkState(token.kind != TokenKind.EOF); + + if (token.kind == TokenKind.INDENT) { + depth++; + } + if (token.kind == TokenKind.OUTDENT) { + depth--; + } + nextToken(); + } + + } else { + // the block ends at the newline token + // e.g. if x == 3: print "three" + syncTo(STATEMENT_TERMINATOR_SET); + } + } + + // stmt ::= simple_stmt + // | compound_stmt + private void parseStatement(List<Statement> list, boolean isTopLevel) { + if (token.kind == TokenKind.DEF && skylarkMode) { + if (!isTopLevel) { + reportError(lexer.createLocation(token.left, token.right), + "nested functions are not allowed. Move the function to top-level"); + } + parseFunctionDefStatement(list); + } else if (token.kind == TokenKind.IF && skylarkMode) { + parseIfStatement(list); + } else if (token.kind == TokenKind.FOR && skylarkMode) { + if (isTopLevel) { + reportError(lexer.createLocation(token.left, token.right), + "for loops are not allowed on top-level. Put it into a function"); + } + parseForStatement(list); + } else if (token.kind == TokenKind.IF + || token.kind == TokenKind.ELSE + || token.kind == TokenKind.FOR + || token.kind == TokenKind.CLASS + || token.kind == TokenKind.DEF + || token.kind == TokenKind.TRY) { + skipBlock(); + } else { + parseSimpleStatement(list); + } + } + + // return_stmt ::= RETURN expr + private ReturnStatement parseReturnStatement() { + int start = token.left; + expect(TokenKind.RETURN); + Expression expression = parseExpression(); + return setLocation(new ReturnStatement(expression), start, expression); + } + + // block ::= ('if' | 'for' | 'class') expr ':' suite + private void skipBlock() { + int start = token.left; + Token blockToken = token; + syncTo(EnumSet.of(TokenKind.COLON, TokenKind.EOF)); // skip over expression or name + if (!parsePython) { + reportError(lexer.createLocation(start, token.right), "syntax error at '" + + blockToken + "': This Python-style construct is not supported. " + + PREPROCESSING_NEEDED); + } + expect(TokenKind.COLON); + skipSuite(); + } + + // create a comment node + private void makeComment(Token token) { + comments.add(setLocation(new Comment((String) token.value), token.left, token.right)); + } +} |