#!/usr/bin/env python # # Copyright 2010 The Closure Linter Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS-IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Methods for checking EcmaScript files for indentation issues.""" __author__ = ('robbyw@google.com (Robert Walker)') from closure_linter import ecmametadatapass from closure_linter import errors from closure_linter import javascripttokens from closure_linter import tokenutil from closure_linter.common import error from closure_linter.common import position import gflags as flags flags.DEFINE_boolean('debug_indentation', False, 'Whether to print debugging information for indentation.') # Shorthand Context = ecmametadatapass.EcmaContext Error = error.Error Position = position.Position Type = javascripttokens.JavaScriptTokenType # The general approach: # # 1. Build a stack of tokens that can affect indentation. # For each token, we determine if it is a block or continuation token. # Some tokens need to be temporarily overwritten in case they are removed # before the end of the line. # Much of the work here is determining which tokens to keep on the stack # at each point. Operators, for example, should be removed once their # expression or line is gone, while parentheses must stay until the matching # end parentheses is found. # # 2. Given that stack, determine the allowable indentations. # Due to flexible indentation rules in JavaScript, there may be many # allowable indentations for each stack. We follows the general # "no false positives" approach of GJsLint and build the most permissive # set possible. class TokenInfo(object): """Stores information about a token. Attributes: token: The token is_block: Whether the token represents a block indentation. is_transient: Whether the token should be automatically removed without finding a matching end token. overridden_by: TokenInfo for a token that overrides the indentation that this token would require. is_permanent_override: Whether the override on this token should persist even after the overriding token is removed from the stack. For example: x([ 1], 2); needs this to be set so the last line is not required to be a continuation indent. line_number: The effective line number of this token. Will either be the actual line number or the one before it in the case of a mis-wrapped operator. """ def __init__(self, token, is_block=False): """Initializes a TokenInfo object. Args: token: The token is_block: Whether the token represents a block indentation. """ self.token = token self.overridden_by = None self.is_permanent_override = False self.is_block = is_block self.is_transient = not is_block and not token.type in ( Type.START_PAREN, Type.START_PARAMETERS) self.line_number = token.line_number def __repr__(self): result = '\n %s' % self.token if self.overridden_by: result = '%s OVERRIDDEN [by "%s"]' % ( result, self.overridden_by.token.string) result += ' {is_block: %s, is_transient: %s}' % ( self.is_block, self.is_transient) return result class IndentationRules(object): """EmcaScript indentation rules. Can be used to find common indentation errors in JavaScript, ActionScript and other Ecma like scripting languages. """ def __init__(self): """Initializes the IndentationRules checker.""" self._stack = [] # Map from line number to number of characters it is off in indentation. self._start_index_offset = {} def Finalize(self): if self._stack: old_stack = self._stack self._stack = [] raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" % old_stack) def CheckToken(self, token, state): """Checks a token for indentation errors. Args: token: The current token under consideration state: Additional information about the current tree state Returns: An error array [error code, error string, error token] if the token is improperly indented, or None if indentation is correct. """ token_type = token.type indentation_errors = [] stack = self._stack is_first = self._IsFirstNonWhitespaceTokenInLine(token) # Add tokens that could decrease indentation before checking. if token_type == Type.END_PAREN: self._PopTo(Type.START_PAREN) elif token_type == Type.END_PARAMETERS: self._PopTo(Type.START_PARAMETERS) elif token_type == Type.END_BRACKET: self._PopTo(Type.START_BRACKET) elif token_type == Type.END_BLOCK: self._PopTo(Type.START_BLOCK) elif token_type == Type.KEYWORD and token.string in ('case', 'default'): self._Add(self._PopTo(Type.START_BLOCK)) elif is_first and token.string == '.': # This token should have been on the previous line, so treat it as if it # was there. info = TokenInfo(token) info.line_number = token.line_number - 1 self._Add(info) elif token_type == Type.SEMICOLON: self._PopTransient() not_binary_operator = (token_type != Type.OPERATOR or token.metadata.IsUnaryOperator()) not_dot = token.string != '.' if is_first and not_binary_operator and not_dot and token.type not in ( Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT): if flags.FLAGS.debug_indentation: print 'Line #%d: stack %r' % (token.line_number, stack) # Ignore lines that start in JsDoc since we don't check them properly yet. # TODO(robbyw): Support checking JsDoc indentation. # Ignore lines that start as multi-line strings since indentation is N/A. # Ignore lines that start with operators since we report that already. # Ignore lines with tabs since we report that already. expected = self._GetAllowableIndentations() actual = self._GetActualIndentation(token) # Special case comments describing else, case, and default. Allow them # to outdent to the parent block. if token_type in Type.COMMENT_TYPES: next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) if next_code and next_code.type == Type.END_BLOCK: next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES) if next_code and next_code.string in ('else', 'case', 'default'): # TODO(robbyw): This almost certainly introduces false negatives. expected |= self._AddToEach(expected, -2) if actual >= 0 and actual not in expected: expected = sorted(expected) indentation_errors.append([ errors.WRONG_INDENTATION, 'Wrong indentation: expected any of {%s} but got %d' % ( ', '.join( ['%d' % x for x in expected]), actual), token, Position(actual, expected[0])]) self._start_index_offset[token.line_number] = expected[0] - actual # Add tokens that could increase indentation. if token_type == Type.START_BRACKET: self._Add(TokenInfo(token=token, is_block=token.metadata.context.type == Context.ARRAY_LITERAL)) elif token_type == Type.START_BLOCK or token.metadata.is_implied_block: self._Add(TokenInfo(token=token, is_block=True)) elif token_type in (Type.START_PAREN, Type.START_PARAMETERS): self._Add(TokenInfo(token=token, is_block=False)) elif token_type == Type.KEYWORD and token.string == 'return': self._Add(TokenInfo(token)) elif not token.IsLastInLine() and ( token.IsAssignment() or token.IsOperator('?')): self._Add(TokenInfo(token=token)) # Handle implied block closes. if token.metadata.is_implied_block_close: self._PopToImpliedBlock() # Add some tokens only if they appear at the end of the line. is_last = self._IsLastCodeInLine(token) if is_last: if token_type == Type.OPERATOR: if token.string == ':': if (stack and stack[-1].token.string == '?'): # When a ternary : is on a different line than its '?', it doesn't # add indentation. if (token.line_number == stack[-1].token.line_number): self._Add(TokenInfo(token)) elif token.metadata.context.type == Context.CASE_BLOCK: # Pop transient tokens from say, line continuations, e.g., # case x. # y: # Want to pop the transient 4 space continuation indent. self._PopTransient() # Starting the body of the case statement, which is a type of # block. self._Add(TokenInfo(token=token, is_block=True)) elif token.metadata.context.type == Context.LITERAL_ELEMENT: # When in an object literal, acts as operator indicating line # continuations. self._Add(TokenInfo(token)) pass else: # ':' might also be a statement label, no effect on indentation in # this case. pass elif token.string != ',': self._Add(TokenInfo(token)) else: # The token is a comma. if token.metadata.context.type == Context.VAR: self._Add(TokenInfo(token)) elif token.metadata.context.type != Context.PARAMETERS: self._PopTransient() elif (token.string.endswith('.') and token_type in (Type.IDENTIFIER, Type.NORMAL)): self._Add(TokenInfo(token)) elif token_type == Type.PARAMETERS and token.string.endswith(','): # Parameter lists. self._Add(TokenInfo(token)) elif token.metadata.is_implied_semicolon: self._PopTransient() elif token.IsAssignment(): self._Add(TokenInfo(token)) return indentation_errors def _AddToEach(self, original, amount): """Returns a new set with the given amount added to each element. Args: original: The original set of numbers amount: The amount to add to each element Returns: A new set containing each element of the original set added to the amount. """ return set([x + amount for x in original]) _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS, Type.START_BRACKET) _HARD_STOP_STRINGS = ('return', '?') def _IsHardStop(self, token): """Determines if the given token can have a hard stop after it. Hard stops are indentations defined by the position of another token as in indentation lined up with return, (, [, and ?. """ return (token.type in self._HARD_STOP_TYPES or token.string in self._HARD_STOP_STRINGS or token.IsAssignment()) def _GetAllowableIndentations(self): """Computes the set of allowable indentations. Returns: The set of allowable indentations, given the current stack. """ expected = set([0]) hard_stops = set([]) # Whether the tokens are still in the same continuation, meaning additional # indentation is optional. As an example: # x = 5 + # 6 + # 7; # The second '+' does not add any required indentation. in_same_continuation = False for token_info in self._stack: token = token_info.token # Handle normal additive indentation tokens. if not token_info.overridden_by and token.string != 'return': if token_info.is_block: expected = self._AddToEach(expected, 2) hard_stops = self._AddToEach(hard_stops, 2) in_same_continuation = False elif in_same_continuation: expected |= self._AddToEach(expected, 4) hard_stops |= self._AddToEach(hard_stops, 4) else: expected = self._AddToEach(expected, 4) hard_stops |= self._AddToEach(hard_stops, 4) in_same_continuation = True # Handle hard stops after (, [, return, =, and ? if self._IsHardStop(token): override_is_hard_stop = (token_info.overridden_by and self._IsHardStop(token_info.overridden_by.token)) if not override_is_hard_stop: start_index = token.start_index if token.line_number in self._start_index_offset: start_index += self._start_index_offset[token.line_number] if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and not token_info.overridden_by): hard_stops.add(start_index + 1) elif token.string == 'return' and not token_info.overridden_by: hard_stops.add(start_index + 7) elif (token.type == Type.START_BRACKET): hard_stops.add(start_index + 1) elif token.IsAssignment(): hard_stops.add(start_index + len(token.string) + 1) elif token.IsOperator('?') and not token_info.overridden_by: hard_stops.add(start_index + 2) return (expected | hard_stops) or set([0]) def _GetActualIndentation(self, token): """Gets the actual indentation of the line containing the given token. Args: token: Any token on the line. Returns: The actual indentation of the line containing the given token. Returns -1 if this line should be ignored due to the presence of tabs. """ # Move to the first token in the line token = tokenutil.GetFirstTokenInSameLine(token) # If it is whitespace, it is the indentation. if token.type == Type.WHITESPACE: if token.string.find('\t') >= 0: return -1 else: return len(token.string) elif token.type == Type.PARAMETERS: return len(token.string) - len(token.string.lstrip()) else: return 0 def _IsFirstNonWhitespaceTokenInLine(self, token): """Determines if the given token is the first non-space token on its line. Args: token: The token. Returns: True if the token is the first non-whitespace token on its line. """ if token.type in (Type.WHITESPACE, Type.BLANK_LINE): return False if token.IsFirstInLine(): return True return (token.previous and token.previous.IsFirstInLine() and token.previous.type == Type.WHITESPACE) def _IsLastCodeInLine(self, token): """Determines if the given token is the last code token on its line. Args: token: The token. Returns: True if the token is the last code token on its line. """ if token.type in Type.NON_CODE_TYPES: return False start_token = token while True: token = token.next if not token or token.line_number != start_token.line_number: return True if token.type not in Type.NON_CODE_TYPES: return False def _Add(self, token_info): """Adds the given token info to the stack. Args: token_info: The token information to add. """ if self._stack and self._stack[-1].token == token_info.token: # Don't add the same token twice. return if token_info.is_block or token_info.token.type == Type.START_PAREN: index = 1 while index <= len(self._stack): stack_info = self._stack[-index] stack_token = stack_info.token if stack_info.line_number == token_info.line_number: # In general, tokens only override each other when they are on # the same line. stack_info.overridden_by = token_info if (token_info.token.type == Type.START_BLOCK and (stack_token.IsAssignment() or stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))): # Multi-line blocks have lasting overrides, as in: # callFn({ # a: 10 # }, # 30); close_block = token_info.token.metadata.context.end_token stack_info.is_permanent_override = \ close_block.line_number != token_info.token.line_number elif (token_info.token.type == Type.START_BLOCK and token_info.token.metadata.context.type == Context.BLOCK and (stack_token.IsAssignment() or stack_token.type == Type.IDENTIFIER)): # When starting a function block, the override can transcend lines. # For example # long.long.name = function( # a) { # In this case the { and the = are on different lines. But the # override should still apply. stack_info.overridden_by = token_info stack_info.is_permanent_override = True else: break index += 1 self._stack.append(token_info) def _Pop(self): """Pops the top token from the stack. Returns: The popped token info. """ token_info = self._stack.pop() if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET): # Remove any temporary overrides. self._RemoveOverrides(token_info) else: # For braces and brackets, which can be object and array literals, remove # overrides when the literal is closed on the same line. token_check = token_info.token same_type = token_check.type goal_type = None if token_info.token.type == Type.START_BRACKET: goal_type = Type.END_BRACKET else: goal_type = Type.END_BLOCK line_number = token_info.token.line_number count = 0 while token_check and token_check.line_number == line_number: if token_check.type == goal_type: count -= 1 if not count: self._RemoveOverrides(token_info) break if token_check.type == same_type: count += 1 token_check = token_check.next return token_info def _PopToImpliedBlock(self): """Pops the stack until an implied block token is found.""" while not self._Pop().token.metadata.is_implied_block: pass def _PopTo(self, stop_type): """Pops the stack until a token of the given type is popped. Args: stop_type: The type of token to pop to. Returns: The token info of the given type that was popped. """ last = None while True: last = self._Pop() if last.token.type == stop_type: break return last def _RemoveOverrides(self, token_info): """Marks any token that was overridden by this token as active again. Args: token_info: The token that is being removed from the stack. """ for stack_token in self._stack: if (stack_token.overridden_by == token_info and not stack_token.is_permanent_override): stack_token.overridden_by = None def _PopTransient(self): """Pops all transient tokens - i.e. not blocks, literals, or parens.""" while self._stack and self._stack[-1].is_transient: self._Pop()