diff options
Diffstat (limited to 'tools/closure_linter-2.3.4/closure_linter/indentation.py')
-rwxr-xr-x | tools/closure_linter-2.3.4/closure_linter/indentation.py | 543 |
1 files changed, 543 insertions, 0 deletions
diff --git a/tools/closure_linter-2.3.4/closure_linter/indentation.py b/tools/closure_linter-2.3.4/closure_linter/indentation.py new file mode 100755 index 0000000..d740607 --- /dev/null +++ b/tools/closure_linter-2.3.4/closure_linter/indentation.py @@ -0,0 +1,543 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Methods for checking EcmaScript files for indentation issues.""" + +__author__ = ('robbyw@google.com (Robert Walker)') + +from closure_linter import ecmametadatapass +from closure_linter import errors +from closure_linter import javascripttokens +from closure_linter import tokenutil +from closure_linter.common import error +from closure_linter.common import position + +import gflags as flags + +flags.DEFINE_boolean('debug_indentation', False, + 'Whether to print debugging information for indentation.') + + +# Shorthand +Context = ecmametadatapass.EcmaContext +Error = error.Error +Position = position.Position +Type = javascripttokens.JavaScriptTokenType + + +# The general approach: +# +# 1. Build a stack of tokens that can affect indentation. +# For each token, we determine if it is a block or continuation token. +# Some tokens need to be temporarily overwritten in case they are removed +# before the end of the line. +# Much of the work here is determining which tokens to keep on the stack +# at each point. Operators, for example, should be removed once their +# expression or line is gone, while parentheses must stay until the matching +# end parentheses is found. +# +# 2. Given that stack, determine the allowable indentations. +# Due to flexible indentation rules in JavaScript, there may be many +# allowable indentations for each stack. We follows the general +# "no false positives" approach of GJsLint and build the most permissive +# set possible. + + +class TokenInfo(object): + """Stores information about a token. + + Attributes: + token: The token + is_block: Whether the token represents a block indentation. + is_transient: Whether the token should be automatically removed without + finding a matching end token. + overridden_by: TokenInfo for a token that overrides the indentation that + this token would require. + is_permanent_override: Whether the override on this token should persist + even after the overriding token is removed from the stack. For example: + x([ + 1], + 2); + needs this to be set so the last line is not required to be a continuation + indent. + line_number: The effective line number of this token. Will either be the + actual line number or the one before it in the case of a mis-wrapped + operator. + """ + + def __init__(self, token, is_block=False): + """Initializes a TokenInfo object. + + Args: + token: The token + is_block: Whether the token represents a block indentation. + """ + self.token = token + self.overridden_by = None + self.is_permanent_override = False + self.is_block = is_block + self.is_transient = not is_block and not token.type in ( + Type.START_PAREN, Type.START_PARAMETERS) + self.line_number = token.line_number + + def __repr__(self): + result = '\n %s' % self.token + if self.overridden_by: + result = '%s OVERRIDDEN [by "%s"]' % ( + result, self.overridden_by.token.string) + result += ' {is_block: %s, is_transient: %s}' % ( + self.is_block, self.is_transient) + return result + + +class IndentationRules(object): + """EmcaScript indentation rules. + + Can be used to find common indentation errors in JavaScript, ActionScript and + other Ecma like scripting languages. + """ + + def __init__(self): + """Initializes the IndentationRules checker.""" + self._stack = [] + + # Map from line number to number of characters it is off in indentation. + self._start_index_offset = {} + + def Finalize(self): + if self._stack: + old_stack = self._stack + self._stack = [] + raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" % + old_stack) + + def CheckToken(self, token, state): + """Checks a token for indentation errors. + + Args: + token: The current token under consideration + state: Additional information about the current tree state + + Returns: + An error array [error code, error string, error token] if the token is + improperly indented, or None if indentation is correct. + """ + + token_type = token.type + indentation_errors = [] + stack = self._stack + is_first = self._IsFirstNonWhitespaceTokenInLine(token) + + # Add tokens that could decrease indentation before checking. + if token_type == Type.END_PAREN: + self._PopTo(Type.START_PAREN) + + elif token_type == Type.END_PARAMETERS: + self._PopTo(Type.START_PARAMETERS) + + elif token_type == Type.END_BRACKET: + self._PopTo(Type.START_BRACKET) + + elif token_type == Type.END_BLOCK: + self._PopTo(Type.START_BLOCK) + + elif token_type == Type.KEYWORD and token.string in ('case', 'default'): + self._Add(self._PopTo(Type.START_BLOCK)) + + elif is_first and token.string == '.': + # This token should have been on the previous line, so treat it as if it + # was there. + info = TokenInfo(token) + info.line_number = token.line_number - 1 + self._Add(info) + + elif token_type == Type.SEMICOLON: + self._PopTransient() + + not_binary_operator = (token_type != Type.OPERATOR or + token.metadata.IsUnaryOperator()) + not_dot = token.string != '.' + if is_first and not_binary_operator and not_dot and token.type not in ( + Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT): + if flags.FLAGS.debug_indentation: + print 'Line #%d: stack %r' % (token.line_number, stack) + + # Ignore lines that start in JsDoc since we don't check them properly yet. + # TODO(robbyw): Support checking JsDoc indentation. + # Ignore lines that start as multi-line strings since indentation is N/A. + # Ignore lines that start with operators since we report that already. + # Ignore lines with tabs since we report that already. + expected = self._GetAllowableIndentations() + actual = self._GetActualIndentation(token) + + # Special case comments describing else, case, and default. Allow them + # to outdent to the parent block. + if token_type in Type.COMMENT_TYPES: + next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + if next_code and next_code.type == Type.END_BLOCK: + next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES) + if next_code and next_code.string in ('else', 'case', 'default'): + # TODO(robbyw): This almost certainly introduces false negatives. + expected |= self._AddToEach(expected, -2) + + if actual >= 0 and actual not in expected: + expected = sorted(expected) + indentation_errors.append([ + errors.WRONG_INDENTATION, + 'Wrong indentation: expected any of {%s} but got %d' % ( + ', '.join( + ['%d' % x for x in expected]), actual), + token, + Position(actual, expected[0])]) + self._start_index_offset[token.line_number] = expected[0] - actual + + # Add tokens that could increase indentation. + if token_type == Type.START_BRACKET: + self._Add(TokenInfo(token=token, + is_block=token.metadata.context.type == Context.ARRAY_LITERAL)) + + elif token_type == Type.START_BLOCK or token.metadata.is_implied_block: + self._Add(TokenInfo(token=token, is_block=True)) + + elif token_type in (Type.START_PAREN, Type.START_PARAMETERS): + self._Add(TokenInfo(token=token, is_block=False)) + + elif token_type == Type.KEYWORD and token.string == 'return': + self._Add(TokenInfo(token)) + + elif not token.IsLastInLine() and ( + token.IsAssignment() or token.IsOperator('?')): + self._Add(TokenInfo(token=token)) + + # Handle implied block closes. + if token.metadata.is_implied_block_close: + self._PopToImpliedBlock() + + # Add some tokens only if they appear at the end of the line. + is_last = self._IsLastCodeInLine(token) + if is_last: + if token_type == Type.OPERATOR: + if token.string == ':': + if (stack and stack[-1].token.string == '?'): + # When a ternary : is on a different line than its '?', it doesn't + # add indentation. + if (token.line_number == stack[-1].token.line_number): + self._Add(TokenInfo(token)) + elif token.metadata.context.type == Context.CASE_BLOCK: + # Pop transient tokens from say, line continuations, e.g., + # case x. + # y: + # Want to pop the transient 4 space continuation indent. + self._PopTransient() + # Starting the body of the case statement, which is a type of + # block. + self._Add(TokenInfo(token=token, is_block=True)) + elif token.metadata.context.type == Context.LITERAL_ELEMENT: + # When in an object literal, acts as operator indicating line + # continuations. + self._Add(TokenInfo(token)) + pass + else: + # ':' might also be a statement label, no effect on indentation in + # this case. + pass + + elif token.string != ',': + self._Add(TokenInfo(token)) + else: + # The token is a comma. + if token.metadata.context.type == Context.VAR: + self._Add(TokenInfo(token)) + elif token.metadata.context.type != Context.PARAMETERS: + self._PopTransient() + + elif (token.string.endswith('.') + and token_type in (Type.IDENTIFIER, Type.NORMAL)): + self._Add(TokenInfo(token)) + elif token_type == Type.PARAMETERS and token.string.endswith(','): + # Parameter lists. + self._Add(TokenInfo(token)) + elif token.metadata.is_implied_semicolon: + self._PopTransient() + elif token.IsAssignment(): + self._Add(TokenInfo(token)) + + return indentation_errors + + def _AddToEach(self, original, amount): + """Returns a new set with the given amount added to each element. + + Args: + original: The original set of numbers + amount: The amount to add to each element + + Returns: + A new set containing each element of the original set added to the amount. + """ + return set([x + amount for x in original]) + + _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS, + Type.START_BRACKET) + + _HARD_STOP_STRINGS = ('return', '?') + + def _IsHardStop(self, token): + """Determines if the given token can have a hard stop after it. + + Hard stops are indentations defined by the position of another token as in + indentation lined up with return, (, [, and ?. + """ + return (token.type in self._HARD_STOP_TYPES or + token.string in self._HARD_STOP_STRINGS or + token.IsAssignment()) + + def _GetAllowableIndentations(self): + """Computes the set of allowable indentations. + + Returns: + The set of allowable indentations, given the current stack. + """ + expected = set([0]) + hard_stops = set([]) + + # Whether the tokens are still in the same continuation, meaning additional + # indentation is optional. As an example: + # x = 5 + + # 6 + + # 7; + # The second '+' does not add any required indentation. + in_same_continuation = False + + for token_info in self._stack: + token = token_info.token + + # Handle normal additive indentation tokens. + if not token_info.overridden_by and token.string != 'return': + if token_info.is_block: + expected = self._AddToEach(expected, 2) + hard_stops = self._AddToEach(hard_stops, 2) + in_same_continuation = False + elif in_same_continuation: + expected |= self._AddToEach(expected, 4) + hard_stops |= self._AddToEach(hard_stops, 4) + else: + expected = self._AddToEach(expected, 4) + hard_stops |= self._AddToEach(hard_stops, 4) + in_same_continuation = True + + # Handle hard stops after (, [, return, =, and ? + if self._IsHardStop(token): + override_is_hard_stop = (token_info.overridden_by and + self._IsHardStop(token_info.overridden_by.token)) + if not override_is_hard_stop: + start_index = token.start_index + if token.line_number in self._start_index_offset: + start_index += self._start_index_offset[token.line_number] + if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and + not token_info.overridden_by): + hard_stops.add(start_index + 1) + + elif token.string == 'return' and not token_info.overridden_by: + hard_stops.add(start_index + 7) + + elif (token.type == Type.START_BRACKET): + hard_stops.add(start_index + 1) + + elif token.IsAssignment(): + hard_stops.add(start_index + len(token.string) + 1) + + elif token.IsOperator('?') and not token_info.overridden_by: + hard_stops.add(start_index + 2) + + return (expected | hard_stops) or set([0]) + + def _GetActualIndentation(self, token): + """Gets the actual indentation of the line containing the given token. + + Args: + token: Any token on the line. + + Returns: + The actual indentation of the line containing the given token. Returns + -1 if this line should be ignored due to the presence of tabs. + """ + # Move to the first token in the line + token = tokenutil.GetFirstTokenInSameLine(token) + + # If it is whitespace, it is the indentation. + if token.type == Type.WHITESPACE: + if token.string.find('\t') >= 0: + return -1 + else: + return len(token.string) + elif token.type == Type.PARAMETERS: + return len(token.string) - len(token.string.lstrip()) + else: + return 0 + + def _IsFirstNonWhitespaceTokenInLine(self, token): + """Determines if the given token is the first non-space token on its line. + + Args: + token: The token. + + Returns: + True if the token is the first non-whitespace token on its line. + """ + if token.type in (Type.WHITESPACE, Type.BLANK_LINE): + return False + if token.IsFirstInLine(): + return True + return (token.previous and token.previous.IsFirstInLine() and + token.previous.type == Type.WHITESPACE) + + def _IsLastCodeInLine(self, token): + """Determines if the given token is the last code token on its line. + + Args: + token: The token. + + Returns: + True if the token is the last code token on its line. + """ + if token.type in Type.NON_CODE_TYPES: + return False + start_token = token + while True: + token = token.next + if not token or token.line_number != start_token.line_number: + return True + if token.type not in Type.NON_CODE_TYPES: + return False + + def _Add(self, token_info): + """Adds the given token info to the stack. + + Args: + token_info: The token information to add. + """ + if self._stack and self._stack[-1].token == token_info.token: + # Don't add the same token twice. + return + + if token_info.is_block or token_info.token.type == Type.START_PAREN: + index = 1 + while index <= len(self._stack): + stack_info = self._stack[-index] + stack_token = stack_info.token + + if stack_info.line_number == token_info.line_number: + # In general, tokens only override each other when they are on + # the same line. + stack_info.overridden_by = token_info + if (token_info.token.type == Type.START_BLOCK and + (stack_token.IsAssignment() or + stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))): + # Multi-line blocks have lasting overrides, as in: + # callFn({ + # a: 10 + # }, + # 30); + close_block = token_info.token.metadata.context.end_token + stack_info.is_permanent_override = \ + close_block.line_number != token_info.token.line_number + elif (token_info.token.type == Type.START_BLOCK and + token_info.token.metadata.context.type == Context.BLOCK and + (stack_token.IsAssignment() or + stack_token.type == Type.IDENTIFIER)): + # When starting a function block, the override can transcend lines. + # For example + # long.long.name = function( + # a) { + # In this case the { and the = are on different lines. But the + # override should still apply. + stack_info.overridden_by = token_info + stack_info.is_permanent_override = True + else: + break + index += 1 + + self._stack.append(token_info) + + def _Pop(self): + """Pops the top token from the stack. + + Returns: + The popped token info. + """ + token_info = self._stack.pop() + if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET): + # Remove any temporary overrides. + self._RemoveOverrides(token_info) + else: + # For braces and brackets, which can be object and array literals, remove + # overrides when the literal is closed on the same line. + token_check = token_info.token + same_type = token_check.type + goal_type = None + if token_info.token.type == Type.START_BRACKET: + goal_type = Type.END_BRACKET + else: + goal_type = Type.END_BLOCK + line_number = token_info.token.line_number + count = 0 + while token_check and token_check.line_number == line_number: + if token_check.type == goal_type: + count -= 1 + if not count: + self._RemoveOverrides(token_info) + break + if token_check.type == same_type: + count += 1 + token_check = token_check.next + return token_info + + def _PopToImpliedBlock(self): + """Pops the stack until an implied block token is found.""" + while not self._Pop().token.metadata.is_implied_block: + pass + + def _PopTo(self, stop_type): + """Pops the stack until a token of the given type is popped. + + Args: + stop_type: The type of token to pop to. + + Returns: + The token info of the given type that was popped. + """ + last = None + while True: + last = self._Pop() + if last.token.type == stop_type: + break + return last + + def _RemoveOverrides(self, token_info): + """Marks any token that was overridden by this token as active again. + + Args: + token_info: The token that is being removed from the stack. + """ + for stack_token in self._stack: + if (stack_token.overridden_by == token_info and + not stack_token.is_permanent_override): + stack_token.overridden_by = None + + def _PopTransient(self): + """Pops all transient tokens - i.e. not blocks, literals, or parens.""" + while self._stack and self._stack[-1].is_transient: + self._Pop() |