aboutsummaryrefslogtreecommitdiff
path: root/tools/closure_linter-2.3.4/closure_linter/common/tokenizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/closure_linter-2.3.4/closure_linter/common/tokenizer.py')
-rwxr-xr-xtools/closure_linter-2.3.4/closure_linter/common/tokenizer.py184
1 files changed, 184 insertions, 0 deletions
diff --git a/tools/closure_linter-2.3.4/closure_linter/common/tokenizer.py b/tools/closure_linter-2.3.4/closure_linter/common/tokenizer.py
new file mode 100755
index 0000000..0234720
--- /dev/null
+++ b/tools/closure_linter-2.3.4/closure_linter/common/tokenizer.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python
+#
+# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Regular expression based lexer."""
+
+__author__ = ('robbyw@google.com (Robert Walker)',
+ 'ajp@google.com (Andy Perelson)')
+
+from closure_linter.common import tokens
+
+# Shorthand
+Type = tokens.TokenType
+
+
+class Tokenizer(object):
+ """General purpose tokenizer.
+
+ Attributes:
+ mode: The latest mode of the tokenizer. This allows patterns to distinguish
+ if they are mid-comment, mid-parameter list, etc.
+ matchers: Dictionary of modes to sequences of matchers that define the
+ patterns to check at any given time.
+ default_types: Dictionary of modes to types, defining what type to give
+ non-matched text when in the given mode. Defaults to Type.NORMAL.
+ """
+
+ def __init__(self, starting_mode, matchers, default_types):
+ """Initialize the tokenizer.
+
+ Args:
+ starting_mode: Mode to start in.
+ matchers: Dictionary of modes to sequences of matchers that defines the
+ patterns to check at any given time.
+ default_types: Dictionary of modes to types, defining what type to give
+ non-matched text when in the given mode. Defaults to Type.NORMAL.
+ """
+ self.__starting_mode = starting_mode
+ self.matchers = matchers
+ self.default_types = default_types
+
+ def TokenizeFile(self, file):
+ """Tokenizes the given file.
+
+ Args:
+ file: An iterable that yields one line of the file at a time.
+
+ Returns:
+ The first token in the file
+ """
+ # The current mode.
+ self.mode = self.__starting_mode
+ # The first token in the stream.
+ self.__first_token = None
+ # The last token added to the token stream.
+ self.__last_token = None
+ # The current line number.
+ self.__line_number = 0
+
+ for line in file:
+ self.__line_number += 1
+ self.__TokenizeLine(line)
+
+ return self.__first_token
+
+ def _CreateToken(self, string, token_type, line, line_number, values=None):
+ """Creates a new Token object (or subclass).
+
+ Args:
+ string: The string of input the token represents.
+ token_type: The type of token.
+ line: The text of the line this token is in.
+ line_number: The line number of the token.
+ values: A dict of named values within the token. For instance, a
+ function declaration may have a value called 'name' which captures the
+ name of the function.
+
+ Returns:
+ The newly created Token object.
+ """
+ return tokens.Token(string, token_type, line, line_number, values)
+
+ def __TokenizeLine(self, line):
+ """Tokenizes the given line.
+
+ Args:
+ line: The contents of the line.
+ """
+ string = line.rstrip('\n\r\f')
+ line_number = self.__line_number
+ self.__start_index = 0
+
+ if not string:
+ self.__AddToken(self._CreateToken('', Type.BLANK_LINE, line, line_number))
+ return
+
+ normal_token = ''
+ index = 0
+ while index < len(string):
+ for matcher in self.matchers[self.mode]:
+ if matcher.line_start and index > 0:
+ continue
+
+ match = matcher.regex.match(string, index)
+
+ if match:
+ if normal_token:
+ self.__AddToken(
+ self.__CreateNormalToken(self.mode, normal_token, line,
+ line_number))
+ normal_token = ''
+
+ # Add the match.
+ self.__AddToken(self._CreateToken(match.group(), matcher.type, line,
+ line_number, match.groupdict()))
+
+ # Change the mode to the correct one for after this match.
+ self.mode = matcher.result_mode or self.mode
+
+ # Shorten the string to be matched.
+ index = match.end()
+
+ break
+
+ else:
+ # If the for loop finishes naturally (i.e. no matches) we just add the
+ # first character to the string of consecutive non match characters.
+ # These will constitute a NORMAL token.
+ if string:
+ normal_token += string[index:index + 1]
+ index += 1
+
+ if normal_token:
+ self.__AddToken(
+ self.__CreateNormalToken(self.mode, normal_token, line, line_number))
+
+ def __CreateNormalToken(self, mode, string, line, line_number):
+ """Creates a normal token.
+
+ Args:
+ mode: The current mode.
+ string: The string to tokenize.
+ line: The line of text.
+ line_number: The line number within the file.
+
+ Returns:
+ A Token object, of the default type for the current mode.
+ """
+ type = Type.NORMAL
+ if mode in self.default_types:
+ type = self.default_types[mode]
+ return self._CreateToken(string, type, line, line_number)
+
+ def __AddToken(self, token):
+ """Add the given token to the token stream.
+
+ Args:
+ token: The token to add.
+ """
+ # Store the first token, or point the previous token to this one.
+ if not self.__first_token:
+ self.__first_token = token
+ else:
+ self.__last_token.next = token
+
+ # Establish the doubly linked list
+ token.previous = self.__last_token
+ self.__last_token = token
+
+ # Compute the character indices
+ token.start_index = self.__start_index
+ self.__start_index += token.length