1 files changed, 367 insertions, 0 deletions
diff --git a/tools/closure_linter-2.3.4/closure_linter/javascripttokenizer.py b/tools/closure_linter-2.3.4/closure_linter/javascripttokenizer.py
new file mode 100755
index 0000000..991ff80
--- /dev/null
+++ b/tools/closure_linter-2.3.4/closure_linter/javascripttokenizer.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python
+#
+# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Regular expression based JavaScript parsing classes."""
+
+__author__ = ('robbyw@google.com (Robert Walker)',
+              'ajp@google.com (Andy Perelson)')
+
+import copy
+import re
+
+from closure_linter import javascripttokens
+from closure_linter.common import matcher
+from closure_linter.common import tokenizer
+
+# Shorthand
+Type = javascripttokens.JavaScriptTokenType
+Matcher = matcher.Matcher
+
+
+class JavaScriptModes(object):
+  """Enumeration of the different matcher modes used for JavaScript."""
+  TEXT_MODE = 'text'
+  SINGLE_QUOTE_STRING_MODE = 'single_quote_string'
+  DOUBLE_QUOTE_STRING_MODE = 'double_quote_string'
+  BLOCK_COMMENT_MODE = 'block_comment'
+  DOC_COMMENT_MODE = 'doc_comment'
+  DOC_COMMENT_LEX_SPACES_MODE = 'doc_comment_spaces'
+  LINE_COMMENT_MODE = 'line_comment'
+  PARAMETER_MODE = 'parameter'
+  FUNCTION_MODE = 'function'
+
+
+class JavaScriptTokenizer(tokenizer.Tokenizer):
+  """JavaScript tokenizer.
+
+  Convert JavaScript code in to an array of tokens.
+  """
+
+  # Useful patterns for JavaScript parsing.
+  IDENTIFIER_CHAR = r'A-Za-z0-9_$.';
+
+  # Number patterns based on:
+  # http://www.mozilla.org/js/language/js20-2000-07/formal/lexer-grammar.html
+  MANTISSA = r"""
+             (\d+(?!\.)) |                # Matches '10'
+             (\d+\.(?!\d)) |              # Matches '10.'
+             (\d*\.\d+)                   # Matches '.5' or '10.5'
+             """
+  DECIMAL_LITERAL = r'(%s)([eE][-+]?\d+)?' % MANTISSA
+  HEX_LITERAL = r'0[xX][0-9a-fA-F]+'
+  NUMBER = re.compile(r"""
+                      ((%s)|(%s))
+                      """ % (HEX_LITERAL, DECIMAL_LITERAL), re.VERBOSE)
+
+  # Strings come in three parts - first we match the start of the string, then
+  # the contents, then the end.  The contents consist of any character except a
+  # backslash or end of string, or a backslash followed by any character, or a
+  # backslash followed by end of line to support correct parsing of multi-line
+  # strings.
+  SINGLE_QUOTE = re.compile(r"'")
+  SINGLE_QUOTE_TEXT = re.compile(r"([^'\\]|\\(.|$))+")
+  DOUBLE_QUOTE = re.compile(r'"')
+  DOUBLE_QUOTE_TEXT = re.compile(r'([^"\\]|\\(.|$))+')
+
+  START_SINGLE_LINE_COMMENT = re.compile(r'//')
+  END_OF_LINE_SINGLE_LINE_COMMENT = re.compile(r'//$')
+
+  START_DOC_COMMENT = re.compile(r'/\*\*')
+  START_BLOCK_COMMENT = re.compile(r'/\*')
+  END_BLOCK_COMMENT = re.compile(r'\*/')
+  BLOCK_COMMENT_TEXT = re.compile(r'([^*]|\*(?!/))+')
+
+  # Comment text is anything that we are not going to parse into another special
+  # token like (inline) flags or end comments. Complicated regex to match
+  # most normal characters, and '*', '{', '}', and '@' when we are sure that
+  # it is safe. Expression [^*{\s]@ must come first, or the other options will
+  # match everything before @, and we won't match @'s that aren't part of flags
+  # like in email addresses in the @author tag.
+  DOC_COMMENT_TEXT = re.compile(r'([^*{}\s]@|[^*{}@]|\*(?!/))+')
+  DOC_COMMENT_NO_SPACES_TEXT = re.compile(r'([^*{}\s]@|[^*{}@\s]|\*(?!/))+')
+
+  # Match the prefix ' * ' that starts every line of jsdoc. Want to include
+  # spaces after the '*', but nothing else that occurs after a '*', and don't
+  # want to match the '*' in '*/'.
+  DOC_PREFIX = re.compile(r'\s*\*(\s+|(?!/))')
+
+  START_BLOCK = re.compile('{')
+  END_BLOCK = re.compile('}')
+
+  REGEX_CHARACTER_CLASS = r"""
+                          \[               # Opening bracket
+                          ([^\]\\]|\\.)*   # Anything but a ] or \,
+                                           # or a backslash followed by anything
+                          \]               # Closing bracket
+                          """
+  # We ensure the regex is followed by one of the above tokens to avoid
+  # incorrectly parsing something like x / y / z as x REGEX(/ y /) z
+  POST_REGEX_LIST = [
+      ';', ',', r'\.', r'\)', r'\]', '$', r'\/\/', r'\/\*', ':', '}']
+
+  REGEX = re.compile(r"""
+                     /                      # opening slash
+                     (?!\*)                 # not the start of a comment
+                     (\\.|[^\[\/\\]|(%s))*  # a backslash followed by anything,
+                                            # or anything but a / or [ or \,
+                                            # or a character class
+                     /                      # closing slash
+                     [gimsx]*               # optional modifiers
+                     (?=\s*(%s))
+                     """ % (REGEX_CHARACTER_CLASS, '|'.join(POST_REGEX_LIST)),
+                     re.VERBOSE)
+
+  ANYTHING = re.compile(r'.*')
+  PARAMETERS = re.compile(r'[^\)]+')
+  CLOSING_PAREN_WITH_SPACE = re.compile(r'\)\s*')
+
+  FUNCTION_DECLARATION = re.compile(r'\bfunction\b')
+
+  OPENING_PAREN = re.compile(r'\(')
+  CLOSING_PAREN = re.compile(r'\)')
+
+  OPENING_BRACKET = re.compile(r'\[')
+  CLOSING_BRACKET = re.compile(r'\]')
+
+  # We omit these JS keywords from the list:
+  #   function - covered by FUNCTION_DECLARATION.
+  #   delete, in, instanceof, new, typeof - included as operators.
+  #   this - included in identifiers.
+  #   null, undefined - not included, should go in some "special constant" list.
+  KEYWORD_LIST = ['break', 'case', 'catch', 'continue', 'default', 'do', 'else',
+      'finally', 'for', 'if', 'return', 'switch', 'throw', 'try', 'var',
+      'while', 'with']
+  # Match a keyword string followed by a non-identifier character in order to
+  # not match something like doSomething as do + Something.
+  KEYWORD = re.compile('(%s)((?=[^%s])|$)' % (
+      '|'.join(KEYWORD_LIST), IDENTIFIER_CHAR))
+
+  # List of regular expressions to match as operators.  Some notes: for our
+  # purposes, the comma behaves similarly enough to a normal operator that we
+  # include it here.  r'\bin\b' actually matches 'in' surrounded by boundary
+  # characters - this may not match some very esoteric uses of the in operator.
+  # Operators that are subsets of larger operators must come later in this list
+  # for proper matching, e.g., '>>' must come AFTER '>>>'.
+  OPERATOR_LIST = [',', r'\+\+', '===', '!==', '>>>=', '>>>', '==', '>=', '<=',
+                   '!=', '<<=', '>>=', '<<', '>>', '>', '<', r'\+=', r'\+',
+                   '--', '\^=', '-=', '-', '/=', '/', r'\*=', r'\*', '%=', '%',
+                   '&&', r'\|\|', '&=', '&', r'\|=', r'\|', '=', '!', ':', '\?',
+                   r'\bdelete\b', r'\bin\b', r'\binstanceof\b', r'\bnew\b',
+                   r'\btypeof\b', r'\bvoid\b']
+  OPERATOR = re.compile('|'.join(OPERATOR_LIST))
+
+  WHITESPACE = re.compile(r'\s+')
+  SEMICOLON = re.compile(r';')
+  # Technically JavaScript identifiers can't contain '.', but we treat a set of
+  # nested identifiers as a single identifier.
+  NESTED_IDENTIFIER = r'[a-zA-Z_$][%s.]*' % IDENTIFIER_CHAR
+  IDENTIFIER = re.compile(NESTED_IDENTIFIER)
+
+  SIMPLE_LVALUE = re.compile(r"""
+                             (?P<identifier>%s)      # a valid identifier
+                             (?=\s*                  # optional whitespace
+                             \=                      # look ahead to equal sign
+                             (?!=))                  # not follwed by equal
+                             """ % NESTED_IDENTIFIER, re.VERBOSE)
+
+  # A doc flag is a @ sign followed by non-space characters that appears at the
+  # beginning of the line, after whitespace, or after a '{'.  The look-behind
+  # check is necessary to not match someone@google.com as a flag.
+  DOC_FLAG = re.compile(r'(^|(?<=\s))@(?P<name>[a-zA-Z]+)')
+  # To properly parse parameter names, we need to tokenize whitespace into a
+  # token.
+  DOC_FLAG_LEX_SPACES = re.compile(r'(^|(?<=\s))@(?P<name>%s)\b' %
+                                     '|'.join(['param']))
+
+  DOC_INLINE_FLAG = re.compile(r'(?<={)@(?P<name>[a-zA-Z]+)')
+
+  # Star followed by non-slash, i.e a star that does not end a comment.
+  # This is used for TYPE_GROUP below.
+  SAFE_STAR = r'(\*(?!/))'
+
+  COMMON_DOC_MATCHERS = [
+      # Find the end of the comment.
+      Matcher(END_BLOCK_COMMENT, Type.END_DOC_COMMENT,
+              JavaScriptModes.TEXT_MODE),
+
+      # Tokenize documented flags like @private.
+      Matcher(DOC_INLINE_FLAG, Type.DOC_INLINE_FLAG),
+      Matcher(DOC_FLAG_LEX_SPACES, Type.DOC_FLAG,
+              JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE),
+
+      # Encountering a doc flag should leave lex spaces mode.
+      Matcher(DOC_FLAG, Type.DOC_FLAG, JavaScriptModes.DOC_COMMENT_MODE),
+
+      # Tokenize braces so we can find types.
+      Matcher(START_BLOCK, Type.DOC_START_BRACE),
+      Matcher(END_BLOCK, Type.DOC_END_BRACE),
+      Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)]
+
+
+  # The token matcher groups work as follows: it is an list of  Matcher objects.
+  # The matchers will be tried in this order, and the first to match will be
+  # returned.  Hence the order is important because the matchers that come first
+  # overrule the matchers that come later.
+  JAVASCRIPT_MATCHERS = {
+    # Matchers for basic text mode.
+    JavaScriptModes.TEXT_MODE: [
+      # Check a big group - strings, starting comments, and regexes - all
+      # of which could be intertwined.  'string with /regex/',
+      # /regex with 'string'/, /* comment with /regex/ and string */ (and so on)
+      Matcher(START_DOC_COMMENT, Type.START_DOC_COMMENT,
+              JavaScriptModes.DOC_COMMENT_MODE),
+      Matcher(START_BLOCK_COMMENT, Type.START_BLOCK_COMMENT,
+              JavaScriptModes.BLOCK_COMMENT_MODE),
+      Matcher(END_OF_LINE_SINGLE_LINE_COMMENT,
+              Type.START_SINGLE_LINE_COMMENT),
+      Matcher(START_SINGLE_LINE_COMMENT, Type.START_SINGLE_LINE_COMMENT,
+              JavaScriptModes.LINE_COMMENT_MODE),
+      Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_START,
+              JavaScriptModes.SINGLE_QUOTE_STRING_MODE),
+      Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START,
+              JavaScriptModes.DOUBLE_QUOTE_STRING_MODE),
+      Matcher(REGEX, Type.REGEX),
+
+      # Next we check for start blocks appearing outside any of the items above.
+      Matcher(START_BLOCK, Type.START_BLOCK),
+      Matcher(END_BLOCK, Type.END_BLOCK),
+
+      # Then we search for function declarations.
+      Matcher(FUNCTION_DECLARATION, Type.FUNCTION_DECLARATION,
+              JavaScriptModes.FUNCTION_MODE),
+
+      # Next, we convert non-function related parens to tokens.
+      Matcher(OPENING_PAREN, Type.START_PAREN),
+      Matcher(CLOSING_PAREN, Type.END_PAREN),
+
+      # Next, we convert brackets to tokens.
+      Matcher(OPENING_BRACKET, Type.START_BRACKET),
+      Matcher(CLOSING_BRACKET, Type.END_BRACKET),
+
+      # Find numbers.  This has to happen before operators because scientific
+      # notation numbers can have + and - in them.
+      Matcher(NUMBER, Type.NUMBER),
+
+      # Find operators and simple assignments
+      Matcher(SIMPLE_LVALUE, Type.SIMPLE_LVALUE),
+      Matcher(OPERATOR, Type.OPERATOR),
+
+      # Find key words and whitespace
+      Matcher(KEYWORD, Type.KEYWORD),
+      Matcher(WHITESPACE, Type.WHITESPACE),
+
+      # Find identifiers
+      Matcher(IDENTIFIER, Type.IDENTIFIER),
+
+      # Finally, we convert semicolons to tokens.
+      Matcher(SEMICOLON, Type.SEMICOLON)],
+
+
+    # Matchers for single quote strings.
+    JavaScriptModes.SINGLE_QUOTE_STRING_MODE: [
+        Matcher(SINGLE_QUOTE_TEXT, Type.STRING_TEXT),
+        Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_END,
+            JavaScriptModes.TEXT_MODE)],
+
+
+    # Matchers for double quote strings.
+    JavaScriptModes.DOUBLE_QUOTE_STRING_MODE: [
+        Matcher(DOUBLE_QUOTE_TEXT, Type.STRING_TEXT),
+        Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END,
+            JavaScriptModes.TEXT_MODE)],
+
+
+    # Matchers for block comments.
+    JavaScriptModes.BLOCK_COMMENT_MODE: [
+      # First we check for exiting a block comment.
+      Matcher(END_BLOCK_COMMENT, Type.END_BLOCK_COMMENT,
+              JavaScriptModes.TEXT_MODE),
+
+      # Match non-comment-ending text..
+      Matcher(BLOCK_COMMENT_TEXT, Type.COMMENT)],
+
+
+    # Matchers for doc comments.
+    JavaScriptModes.DOC_COMMENT_MODE: COMMON_DOC_MATCHERS + [
+      Matcher(DOC_COMMENT_TEXT, Type.COMMENT)],
+
+    JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: COMMON_DOC_MATCHERS + [
+      Matcher(WHITESPACE, Type.COMMENT),
+      Matcher(DOC_COMMENT_NO_SPACES_TEXT, Type.COMMENT)],
+
+    # Matchers for single line comments.
+    JavaScriptModes.LINE_COMMENT_MODE: [
+      # We greedy match until the end of the line in line comment mode.
+      Matcher(ANYTHING, Type.COMMENT, JavaScriptModes.TEXT_MODE)],
+
+
+    # Matchers for code after the function keyword.
+    JavaScriptModes.FUNCTION_MODE: [
+      # Must match open paren before anything else and move into parameter mode,
+      # otherwise everything inside the parameter list is parsed incorrectly.
+      Matcher(OPENING_PAREN, Type.START_PARAMETERS,
+              JavaScriptModes.PARAMETER_MODE),
+      Matcher(WHITESPACE, Type.WHITESPACE),
+      Matcher(IDENTIFIER, Type.FUNCTION_NAME)],
+
+
+    # Matchers for function parameters
+    JavaScriptModes.PARAMETER_MODE: [
+      # When in function parameter mode, a closing paren is treated specially.
+      # Everything else is treated as lines of parameters.
+      Matcher(CLOSING_PAREN_WITH_SPACE, Type.END_PARAMETERS,
+              JavaScriptModes.TEXT_MODE),
+      Matcher(PARAMETERS, Type.PARAMETERS, JavaScriptModes.PARAMETER_MODE)]}
+
+
+  # When text is not matched, it is given this default type based on mode.
+  # If unspecified in this map, the default default is Type.NORMAL.
+  JAVASCRIPT_DEFAULT_TYPES = {
+    JavaScriptModes.DOC_COMMENT_MODE: Type.COMMENT,
+    JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: Type.COMMENT
+  }
+
+  def __init__(self, parse_js_doc = True):
+    """Create a tokenizer object.
+
+    Args:
+      parse_js_doc: Whether to do detailed parsing of javascript doc comments,
+          or simply treat them as normal comments.  Defaults to parsing JsDoc.
+    """
+    matchers = self.JAVASCRIPT_MATCHERS
+    if not parse_js_doc:
+      # Make a copy so the original doesn't get modified.
+      matchers = copy.deepcopy(matchers)
+      matchers[JavaScriptModes.DOC_COMMENT_MODE] = matchers[
+          JavaScriptModes.BLOCK_COMMENT_MODE]
+
+    tokenizer.Tokenizer.__init__(self, JavaScriptModes.TEXT_MODE, matchers,
+        self.JAVASCRIPT_DEFAULT_TYPES)
+
+  def _CreateToken(self, string, token_type, line, line_number, values=None):
+    """Creates a new JavaScriptToken object.
+
+    Args:
+      string: The string of input the token contains.
+      token_type: The type of token.
+      line: The text of the line this token is in.
+      line_number: The line number of the token.
+      values: A dict of named values within the token.  For instance, a
+        function declaration may have a value called 'name' which captures the
+        name of the function.
+    """
+    return javascripttokens.JavaScriptToken(string, token_type, line,
+                                            line_number, values)