aboutsummaryrefslogtreecommitdiff
path: root/third_party/googletest/googlemock/scripts/generator/cpp/tokenize.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/googletest/googlemock/scripts/generator/cpp/tokenize.py')
-rwxr-xr-xthird_party/googletest/googlemock/scripts/generator/cpp/tokenize.py287
1 files changed, 0 insertions, 287 deletions
diff --git a/third_party/googletest/googlemock/scripts/generator/cpp/tokenize.py b/third_party/googletest/googlemock/scripts/generator/cpp/tokenize.py
deleted file mode 100755
index 359d556..0000000
--- a/third_party/googletest/googlemock/scripts/generator/cpp/tokenize.py
+++ /dev/null
@@ -1,287 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2007 Neal Norwitz
-# Portions Copyright 2007 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tokenize C++ source code."""
-
-__author__ = 'nnorwitz@google.com (Neal Norwitz)'
-
-
-try:
- # Python 3.x
- import builtins
-except ImportError:
- # Python 2.x
- import __builtin__ as builtins
-
-
-import sys
-
-from cpp import utils
-
-
-if not hasattr(builtins, 'set'):
- # Nominal support for Python 2.3.
- from sets import Set as set
-
-
-# Add $ as a valid identifier char since so much code uses it.
-_letters = 'abcdefghijklmnopqrstuvwxyz'
-VALID_IDENTIFIER_CHARS = set(_letters + _letters.upper() + '_0123456789$')
-HEX_DIGITS = set('0123456789abcdefABCDEF')
-INT_OR_FLOAT_DIGITS = set('01234567890eE-+')
-
-
-# C++0x string preffixes.
-_STR_PREFIXES = set(('R', 'u8', 'u8R', 'u', 'uR', 'U', 'UR', 'L', 'LR'))
-
-
-# Token types.
-UNKNOWN = 'UNKNOWN'
-SYNTAX = 'SYNTAX'
-CONSTANT = 'CONSTANT'
-NAME = 'NAME'
-PREPROCESSOR = 'PREPROCESSOR'
-
-# Where the token originated from. This can be used for backtracking.
-# It is always set to WHENCE_STREAM in this code.
-WHENCE_STREAM, WHENCE_QUEUE = range(2)
-
-
-class Token(object):
- """Data container to represent a C++ token.
-
- Tokens can be identifiers, syntax char(s), constants, or
- pre-processor directives.
-
- start contains the index of the first char of the token in the source
- end contains the index of the last char of the token in the source
- """
-
- def __init__(self, token_type, name, start, end):
- self.token_type = token_type
- self.name = name
- self.start = start
- self.end = end
- self.whence = WHENCE_STREAM
-
- def __str__(self):
- if not utils.DEBUG:
- return 'Token(%r)' % self.name
- return 'Token(%r, %s, %s)' % (self.name, self.start, self.end)
-
- __repr__ = __str__
-
-
-def _GetString(source, start, i):
- i = source.find('"', i+1)
- while source[i-1] == '\\':
- # Count the trailing backslashes.
- backslash_count = 1
- j = i - 2
- while source[j] == '\\':
- backslash_count += 1
- j -= 1
- # When trailing backslashes are even, they escape each other.
- if (backslash_count % 2) == 0:
- break
- i = source.find('"', i+1)
- return i + 1
-
-
-def _GetChar(source, start, i):
- # NOTE(nnorwitz): may not be quite correct, should be good enough.
- i = source.find("'", i+1)
- while source[i-1] == '\\':
- # Need to special case '\\'.
- if (i - 2) > start and source[i-2] == '\\':
- break
- i = source.find("'", i+1)
- # Try to handle unterminated single quotes (in a #if 0 block).
- if i < 0:
- i = start
- return i + 1
-
-
-def GetTokens(source):
- """Returns a sequence of Tokens.
-
- Args:
- source: string of C++ source code.
-
- Yields:
- Token that represents the next token in the source.
- """
- # Cache various valid character sets for speed.
- valid_identifier_chars = VALID_IDENTIFIER_CHARS
- hex_digits = HEX_DIGITS
- int_or_float_digits = INT_OR_FLOAT_DIGITS
- int_or_float_digits2 = int_or_float_digits | set('.')
-
- # Only ignore errors while in a #if 0 block.
- ignore_errors = False
- count_ifs = 0
-
- i = 0
- end = len(source)
- while i < end:
- # Skip whitespace.
- while i < end and source[i].isspace():
- i += 1
- if i >= end:
- return
-
- token_type = UNKNOWN
- start = i
- c = source[i]
- if c.isalpha() or c == '_': # Find a string token.
- token_type = NAME
- while source[i] in valid_identifier_chars:
- i += 1
- # String and character constants can look like a name if
- # they are something like L"".
- if (source[i] == "'" and (i - start) == 1 and
- source[start:i] in 'uUL'):
- # u, U, and L are valid C++0x character preffixes.
- token_type = CONSTANT
- i = _GetChar(source, start, i)
- elif source[i] == "'" and source[start:i] in _STR_PREFIXES:
- token_type = CONSTANT
- i = _GetString(source, start, i)
- elif c == '/' and source[i+1] == '/': # Find // comments.
- i = source.find('\n', i)
- if i == -1: # Handle EOF.
- i = end
- continue
- elif c == '/' and source[i+1] == '*': # Find /* comments. */
- i = source.find('*/', i) + 2
- continue
- elif c in ':+-<>&|*=': # : or :: (plus other chars).
- token_type = SYNTAX
- i += 1
- new_ch = source[i]
- if new_ch == c and c != '>': # Treat ">>" as two tokens.
- i += 1
- elif c == '-' and new_ch == '>':
- i += 1
- elif new_ch == '=':
- i += 1
- elif c in '()[]{}~!?^%;/.,': # Handle single char tokens.
- token_type = SYNTAX
- i += 1
- if c == '.' and source[i].isdigit():
- token_type = CONSTANT
- i += 1
- while source[i] in int_or_float_digits:
- i += 1
- # Handle float suffixes.
- for suffix in ('l', 'f'):
- if suffix == source[i:i+1].lower():
- i += 1
- break
- elif c.isdigit(): # Find integer.
- token_type = CONSTANT
- if c == '0' and source[i+1] in 'xX':
- # Handle hex digits.
- i += 2
- while source[i] in hex_digits:
- i += 1
- else:
- while source[i] in int_or_float_digits2:
- i += 1
- # Handle integer (and float) suffixes.
- for suffix in ('ull', 'll', 'ul', 'l', 'f', 'u'):
- size = len(suffix)
- if suffix == source[i:i+size].lower():
- i += size
- break
- elif c == '"': # Find string.
- token_type = CONSTANT
- i = _GetString(source, start, i)
- elif c == "'": # Find char.
- token_type = CONSTANT
- i = _GetChar(source, start, i)
- elif c == '#': # Find pre-processor command.
- token_type = PREPROCESSOR
- got_if = source[i:i+3] == '#if' and source[i+3:i+4].isspace()
- if got_if:
- count_ifs += 1
- elif source[i:i+6] == '#endif':
- count_ifs -= 1
- if count_ifs == 0:
- ignore_errors = False
-
- # TODO(nnorwitz): handle preprocessor statements (\ continuations).
- while 1:
- i1 = source.find('\n', i)
- i2 = source.find('//', i)
- i3 = source.find('/*', i)
- i4 = source.find('"', i)
- # NOTE(nnorwitz): doesn't handle comments in #define macros.
- # Get the first important symbol (newline, comment, EOF/end).
- i = min([x for x in (i1, i2, i3, i4, end) if x != -1])
-
- # Handle #include "dir//foo.h" properly.
- if source[i] == '"':
- i = source.find('"', i+1) + 1
- assert i > 0
- continue
- # Keep going if end of the line and the line ends with \.
- if not (i == i1 and source[i-1] == '\\'):
- if got_if:
- condition = source[start+4:i].lstrip()
- if (condition.startswith('0') or
- condition.startswith('(0)')):
- ignore_errors = True
- break
- i += 1
- elif c == '\\': # Handle \ in code.
- # This is different from the pre-processor \ handling.
- i += 1
- continue
- elif ignore_errors:
- # The tokenizer seems to be in pretty good shape. This
- # raise is conditionally disabled so that bogus code
- # in an #if 0 block can be handled. Since we will ignore
- # it anyways, this is probably fine. So disable the
- # exception and return the bogus char.
- i += 1
- else:
- sys.stderr.write('Got invalid token in %s @ %d token:%s: %r\n' %
- ('?', i, c, source[i-10:i+10]))
- raise RuntimeError('unexpected token')
-
- if i <= 0:
- print('Invalid index, exiting now.')
- return
- yield Token(token_type, source[start:i], start, i)
-
-
-if __name__ == '__main__':
- def main(argv):
- """Driver mostly for testing purposes."""
- for filename in argv[1:]:
- source = utils.ReadFile(filename)
- if source is None:
- continue
-
- for token in GetTokens(source):
- print('%-12s: %s' % (token.token_type, token.name))
- # print('\r%6.2f%%' % (100.0 * index / token.end),)
- sys.stdout.write('\n')
-
-
- main(sys.argv)