diff options
Diffstat (limited to 'tools/closure_linter-2.3.4/closure_linter/ecmametadatapass.py')
-rwxr-xr-x | tools/closure_linter-2.3.4/closure_linter/ecmametadatapass.py | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/tools/closure_linter-2.3.4/closure_linter/ecmametadatapass.py b/tools/closure_linter-2.3.4/closure_linter/ecmametadatapass.py new file mode 100755 index 0000000..2c797b3 --- /dev/null +++ b/tools/closure_linter-2.3.4/closure_linter/ecmametadatapass.py @@ -0,0 +1,521 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Metadata pass for annotating tokens in EcmaScript files.""" + +__author__ = ('robbyw@google.com (Robert Walker)') + +from closure_linter import javascripttokens +from closure_linter import tokenutil + + +TokenType = javascripttokens.JavaScriptTokenType + + +class ParseError(Exception): + """Exception indicating a parse error at the given token. + + Attributes: + token: The token where the parse error occurred. + """ + + def __init__(self, token, message=None): + """Initialize a parse error at the given token with an optional message. + + Args: + token: The token where the parse error occurred. + message: A message describing the parse error. + """ + Exception.__init__(self, message) + self.token = token + + +class EcmaContext(object): + """Context object for EcmaScript languages. + + Attributes: + type: The context type. + start_token: The token where this context starts. + end_token: The token where this context ends. + parent: The parent context. + """ + + # The root context. + ROOT = 'root' + + # A block of code. + BLOCK = 'block' + + # A pseudo-block of code for a given case or default section. + CASE_BLOCK = 'case_block' + + # Block of statements in a for loop's parentheses. + FOR_GROUP_BLOCK = 'for_block' + + # An implied block of code for 1 line if, while, and for statements + IMPLIED_BLOCK = 'implied_block' + + # An index in to an array or object. + INDEX = 'index' + + # An array literal in []. + ARRAY_LITERAL = 'array_literal' + + # An object literal in {}. + OBJECT_LITERAL = 'object_literal' + + # An individual element in an array or object literal. + LITERAL_ELEMENT = 'literal_element' + + # The portion of a ternary statement between ? and : + TERNARY_TRUE = 'ternary_true' + + # The portion of a ternary statment after : + TERNARY_FALSE = 'ternary_false' + + # The entire switch statment. This will contain a GROUP with the variable + # and a BLOCK with the code. + + # Since that BLOCK is not a normal block, it can not contain statements except + # for case and default. + SWITCH = 'switch' + + # A normal comment. + COMMENT = 'comment' + + # A JsDoc comment. + DOC = 'doc' + + # An individual statement. + STATEMENT = 'statement' + + # Code within parentheses. + GROUP = 'group' + + # Parameter names in a function declaration. + PARAMETERS = 'parameters' + + # A set of variable declarations appearing after the 'var' keyword. + VAR = 'var' + + # Context types that are blocks. + BLOCK_TYPES = frozenset([ + ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK]) + + def __init__(self, type, start_token, parent): + """Initializes the context object. + + Args: + type: The context type. + start_token: The token where this context starts. + parent: The parent context. + """ + self.type = type + self.start_token = start_token + self.end_token = None + self.parent = parent + + def __repr__(self): + """Returns a string representation of the context object.""" + stack = [] + context = self + while context: + stack.append(context.type) + context = context.parent + return 'Context(%s)' % ' > '.join(stack) + + +class EcmaMetaData(object): + """Token metadata for EcmaScript languages. + + Attributes: + last_code: The last code token to appear before this one. + context: The context this token appears in. + operator_type: The operator type, will be one of the *_OPERATOR constants + defined below. + """ + + UNARY_OPERATOR = 'unary' + + UNARY_POST_OPERATOR = 'unary_post' + + BINARY_OPERATOR = 'binary' + + TERNARY_OPERATOR = 'ternary' + + def __init__(self): + """Initializes a token metadata object.""" + self.last_code = None + self.context = None + self.operator_type = None + self.is_implied_semicolon = False + self.is_implied_block = False + self.is_implied_block_close = False + + def __repr__(self): + """Returns a string representation of the context object.""" + parts = ['%r' % self.context] + if self.operator_type: + parts.append('optype: %r' % self.operator_type) + if self.is_implied_semicolon: + parts.append('implied;') + return 'MetaData(%s)' % ', '.join(parts) + + def IsUnaryOperator(self): + return self.operator_type in (EcmaMetaData.UNARY_OPERATOR, + EcmaMetaData.UNARY_POST_OPERATOR) + + def IsUnaryPostOperator(self): + return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR + + +class EcmaMetaDataPass(object): + """A pass that iterates over all tokens and builds metadata about them.""" + + def __init__(self): + """Initialize the meta data pass object.""" + self.Reset() + + def Reset(self): + """Resets the metadata pass to prepare for the next file.""" + self._token = None + self._context = None + self._AddContext(EcmaContext.ROOT) + self._last_code = None + + def _CreateContext(self, type): + """Overridable by subclasses to create the appropriate context type.""" + return EcmaContext(type, self._token, self._context) + + def _CreateMetaData(self): + """Overridable by subclasses to create the appropriate metadata type.""" + return EcmaMetaData() + + def _AddContext(self, type): + """Adds a context of the given type to the context stack. + + Args: + type: The type of context to create + """ + self._context = self._CreateContext(type) + + def _PopContext(self): + """Moves up one level in the context stack. + + Returns: + The former context. + + Raises: + ParseError: If the root context is popped. + """ + top_context = self._context + top_context.end_token = self._token + self._context = top_context.parent + if self._context: + return top_context + else: + raise ParseError(self._token) + + def _PopContextType(self, *stop_types): + """Pops the context stack until a context of the given type is popped. + + Args: + stop_types: The types of context to pop to - stops at the first match. + + Returns: + The context object of the given type that was popped. + """ + last = None + while not last or last.type not in stop_types: + last = self._PopContext() + return last + + def _EndStatement(self): + """Process the end of a statement.""" + self._PopContextType(EcmaContext.STATEMENT) + if self._context.type == EcmaContext.IMPLIED_BLOCK: + self._token.metadata.is_implied_block_close = True + self._PopContext() + + def _ProcessContext(self): + """Process the context at the current token. + + Returns: + The context that should be assigned to the current token, or None if + the current context after this method should be used. + + Raises: + ParseError: When the token appears in an invalid context. + """ + token = self._token + token_type = token.type + + if self._context.type in EcmaContext.BLOCK_TYPES: + # Whenever we're in a block, we add a statement context. We make an + # exception for switch statements since they can only contain case: and + # default: and therefore don't directly contain statements. + # The block we add here may be immediately removed in some cases, but + # that causes no harm. + parent = self._context.parent + if not parent or parent.type != EcmaContext.SWITCH: + self._AddContext(EcmaContext.STATEMENT) + + elif self._context.type == EcmaContext.ARRAY_LITERAL: + self._AddContext(EcmaContext.LITERAL_ELEMENT) + + if token_type == TokenType.START_PAREN: + if self._last_code and self._last_code.IsKeyword('for'): + # for loops contain multiple statements in the group unlike while, + # switch, if, etc. + self._AddContext(EcmaContext.FOR_GROUP_BLOCK) + else: + self._AddContext(EcmaContext.GROUP) + + elif token_type == TokenType.END_PAREN: + result = self._PopContextType(EcmaContext.GROUP, + EcmaContext.FOR_GROUP_BLOCK) + keyword_token = result.start_token.metadata.last_code + # keyword_token will not exist if the open paren is the first line of the + # file, for example if all code is wrapped in an immediately executed + # annonymous function. + if keyword_token and keyword_token.string in ('if', 'for', 'while'): + next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) + if next_code.type != TokenType.START_BLOCK: + # Check for do-while. + is_do_while = False + pre_keyword_token = keyword_token.metadata.last_code + if (pre_keyword_token and + pre_keyword_token.type == TokenType.END_BLOCK): + start_block_token = pre_keyword_token.metadata.context.start_token + is_do_while = start_block_token.metadata.last_code.string == 'do' + + # If it's not do-while, it's an implied block. + if not is_do_while: + self._AddContext(EcmaContext.IMPLIED_BLOCK) + token.metadata.is_implied_block = True + + return result + + # else (not else if) with no open brace after it should be considered the + # start of an implied block, similar to the case with if, for, and while + # above. + elif (token_type == TokenType.KEYWORD and + token.string == 'else'): + next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) + if (next_code.type != TokenType.START_BLOCK and + (next_code.type != TokenType.KEYWORD or next_code.string != 'if')): + self._AddContext(EcmaContext.IMPLIED_BLOCK) + token.metadata.is_implied_block = True + + elif token_type == TokenType.START_PARAMETERS: + self._AddContext(EcmaContext.PARAMETERS) + + elif token_type == TokenType.END_PARAMETERS: + return self._PopContextType(EcmaContext.PARAMETERS) + + elif token_type == TokenType.START_BRACKET: + if (self._last_code and + self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES): + self._AddContext(EcmaContext.INDEX) + else: + self._AddContext(EcmaContext.ARRAY_LITERAL) + + elif token_type == TokenType.END_BRACKET: + return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL) + + elif token_type == TokenType.START_BLOCK: + if (self._last_code.type in (TokenType.END_PAREN, + TokenType.END_PARAMETERS) or + self._last_code.IsKeyword('else') or + self._last_code.IsKeyword('do') or + self._last_code.IsKeyword('try') or + self._last_code.IsKeyword('finally') or + (self._last_code.IsOperator(':') and + self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)): + # else, do, try, and finally all might have no () before {. + # Also, handle the bizzare syntax case 10: {...}. + self._AddContext(EcmaContext.BLOCK) + else: + self._AddContext(EcmaContext.OBJECT_LITERAL) + + elif token_type == TokenType.END_BLOCK: + context = self._PopContextType(EcmaContext.BLOCK, + EcmaContext.OBJECT_LITERAL) + if self._context.type == EcmaContext.SWITCH: + # The end of the block also means the end of the switch statement it + # applies to. + return self._PopContext() + return context + + elif token.IsKeyword('switch'): + self._AddContext(EcmaContext.SWITCH) + + elif (token_type == TokenType.KEYWORD and + token.string in ('case', 'default')): + # Pop up to but not including the switch block. + while self._context.parent.type != EcmaContext.SWITCH: + self._PopContext() + + elif token.IsOperator('?'): + self._AddContext(EcmaContext.TERNARY_TRUE) + + elif token.IsOperator(':'): + if self._context.type == EcmaContext.OBJECT_LITERAL: + self._AddContext(EcmaContext.LITERAL_ELEMENT) + + elif self._context.type == EcmaContext.TERNARY_TRUE: + self._PopContext() + self._AddContext(EcmaContext.TERNARY_FALSE) + + # Handle nested ternary statements like: + # foo = bar ? baz ? 1 : 2 : 3 + # When we encounter the second ":" the context is + # ternary_false > ternary_true > statement > root + elif (self._context.type == EcmaContext.TERNARY_FALSE and + self._context.parent.type == EcmaContext.TERNARY_TRUE): + self._PopContext() # Leave current ternary false context. + self._PopContext() # Leave current parent ternary true + self._AddContext(EcmaContext.TERNARY_FALSE) + + elif self._context.parent.type == EcmaContext.SWITCH: + self._AddContext(EcmaContext.CASE_BLOCK) + + elif token.IsKeyword('var'): + self._AddContext(EcmaContext.VAR) + + elif token.IsOperator(','): + while self._context.type not in (EcmaContext.VAR, + EcmaContext.ARRAY_LITERAL, + EcmaContext.OBJECT_LITERAL, + EcmaContext.STATEMENT, + EcmaContext.PARAMETERS, + EcmaContext.GROUP): + self._PopContext() + + elif token_type == TokenType.SEMICOLON: + self._EndStatement() + + def Process(self, first_token): + """Processes the token stream starting with the given token.""" + self._token = first_token + while self._token: + self._ProcessToken() + + if self._token.IsCode(): + self._last_code = self._token + + self._token = self._token.next + + try: + self._PopContextType(self, EcmaContext.ROOT) + except ParseError: + # Ignore the "popped to root" error. + pass + + def _ProcessToken(self): + """Process the given token.""" + token = self._token + token.metadata = self._CreateMetaData() + context = (self._ProcessContext() or self._context) + token.metadata.context = context + token.metadata.last_code = self._last_code + + # Determine the operator type of the token, if applicable. + if token.type == TokenType.OPERATOR: + token.metadata.operator_type = self._GetOperatorType(token) + + # Determine if there is an implied semicolon after the token. + if token.type != TokenType.SEMICOLON: + next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) + # A statement like if (x) does not need a semicolon after it + is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK + is_last_code_in_line = token.IsCode() and ( + not next_code or next_code.line_number != token.line_number) + is_continued_identifier = (token.type == TokenType.IDENTIFIER and + token.string.endswith('.')) + is_continued_operator = (token.type == TokenType.OPERATOR and + not token.metadata.IsUnaryPostOperator()) + is_continued_dot = token.string == '.' + next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR + next_code_is_dot = next_code and next_code.string == '.' + is_end_of_block = (token.type == TokenType.END_BLOCK and + token.metadata.context.type != EcmaContext.OBJECT_LITERAL) + is_multiline_string = token.type == TokenType.STRING_TEXT + next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK + if (is_last_code_in_line and + self._StatementCouldEndInContext() and + not is_multiline_string and + not is_end_of_block and + not is_continued_identifier and + not is_continued_operator and + not is_continued_dot and + not next_code_is_dot and + not next_code_is_operator and + not is_implied_block and + not next_code_is_block): + token.metadata.is_implied_semicolon = True + self._EndStatement() + + def _StatementCouldEndInContext(self): + """Returns whether the current statement (if any) may end in this context.""" + # In the basic statement or variable declaration context, statement can + # always end in this context. + if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR): + return True + + # End of a ternary false branch inside a statement can also be the + # end of the statement, for example: + # var x = foo ? foo.bar() : null + # In this case the statement ends after the null, when the context stack + # looks like ternary_false > var > statement > root. + if (self._context.type == EcmaContext.TERNARY_FALSE and + self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)): + return True + + # In all other contexts like object and array literals, ternary true, etc. + # the statement can't yet end. + return False + + def _GetOperatorType(self, token): + """Returns the operator type of the given operator token. + + Args: + token: The token to get arity for. + + Returns: + The type of the operator. One of the *_OPERATOR constants defined in + EcmaMetaData. + """ + if token.string == '?': + return EcmaMetaData.TERNARY_OPERATOR + + if token.string in TokenType.UNARY_OPERATORS: + return EcmaMetaData.UNARY_OPERATOR + + last_code = token.metadata.last_code + if not last_code or last_code.type == TokenType.END_BLOCK: + return EcmaMetaData.UNARY_OPERATOR + + if (token.string in TokenType.UNARY_POST_OPERATORS and + last_code.type in TokenType.EXPRESSION_ENDER_TYPES): + return EcmaMetaData.UNARY_POST_OPERATOR + + if (token.string in TokenType.UNARY_OK_OPERATORS and + last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and + last_code.string not in TokenType.UNARY_POST_OPERATORS): + return EcmaMetaData.UNARY_OPERATOR + + return EcmaMetaData.BINARY_OPERATOR |