diff options
Diffstat (limited to 'third_party/py/abseil/absl/flags/_helpers.py')
-rw-r--r-- | third_party/py/abseil/absl/flags/_helpers.py | 430 |
1 files changed, 430 insertions, 0 deletions
diff --git a/third_party/py/abseil/absl/flags/_helpers.py b/third_party/py/abseil/absl/flags/_helpers.py new file mode 100644 index 0000000000..a3734189bf --- /dev/null +++ b/third_party/py/abseil/absl/flags/_helpers.py @@ -0,0 +1,430 @@ +# Copyright 2017 The Abseil Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Internal helper functions for Abseil Python flags library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import re +import struct +import sys +import textwrap +try: + import fcntl +except ImportError: + fcntl = None +try: + # Importing termios will fail on non-unix platforms. + import termios +except ImportError: + termios = None + +import six +from six.moves import range # pylint: disable=redefined-builtin + + +_DEFAULT_HELP_WIDTH = 80 # Default width of help output. +_MIN_HELP_WIDTH = 40 # Minimal "sane" width of help output. We assume that any + # value below 40 is unreasonable. + +# Define the allowed error rate in an input string to get suggestions. +# +# We lean towards a high threshold because we tend to be matching a phrase, +# and the simple algorithm used here is geared towards correcting word +# spellings. +# +# For manual testing, consider "<command> --list" which produced a large number +# of spurious suggestions when we used "least_errors > 0.5" instead of +# "least_erros >= 0.5". +_SUGGESTION_ERROR_RATE_THRESHOLD = 0.50 + +# Characters that cannot appear or are highly discouraged in an XML 1.0 +# document. (See http://www.w3.org/TR/REC-xml/#charsets or +# https://en.wikipedia.org/wiki/Valid_characters_in_XML#XML_1.0) +_ILLEGAL_XML_CHARS_REGEX = re.compile( + u'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]') + +# This is a set of module ids for the modules that disclaim key flags. +# This module is explicitly added to this set so that we never consider it to +# define key flag. +disclaim_module_ids = set([id(sys.modules[__name__])]) + + +# Define special flags here so that help may be generated for them. +# NOTE: Please do NOT use SPECIAL_FLAGS from outside flags module. +# Initialized inside flagvalues.py. +SPECIAL_FLAGS = None + + +# This points to the flags module, initialized in flags/__init__.py. +# This should only be used in adopt_module_key_flags to take SPECIAL_FLAGS into +# account. +FLAGS_MODULE = None + + +class _ModuleObjectAndName( + collections.namedtuple('_ModuleObjectAndName', 'module module_name')): + """Module object and name. + + Fields: + - module: object, module object. + - module_name: str, module name. + """ + + +def get_module_object_and_name(globals_dict): + """Returns the module that defines a global environment, and its name. + + Args: + globals_dict: A dictionary that should correspond to an environment + providing the values of the globals. + + Returns: + _ModuleObjectAndName - pair of module object & module name. + Returns (None, None) if the module could not be identified. + """ + name = globals_dict.get('__name__', None) + module = sys.modules.get(name, None) + # Pick a more informative name for the main module. + return _ModuleObjectAndName(module, + (sys.argv[0] if name == '__main__' else name)) + + +def get_calling_module_object_and_name(): + """Returns the module that's calling into this module. + + We generally use this function to get the name of the module calling a + DEFINE_foo... function. + + Returns: + The module object that called into this one. + + Raises: + AssertionError: Raised when no calling module could be identified. + """ + for depth in range(1, sys.getrecursionlimit()): + # sys._getframe is the right thing to use here, as it's the best + # way to walk up the call stack. + globals_for_frame = sys._getframe(depth).f_globals # pylint: disable=protected-access + module, module_name = get_module_object_and_name(globals_for_frame) + if id(module) not in disclaim_module_ids and module_name is not None: + return _ModuleObjectAndName(module, module_name) + raise AssertionError('No module was found') + + +def get_calling_module(): + """Returns the name of the module that's calling into this module.""" + return get_calling_module_object_and_name().module_name + + +def str_or_unicode(value): + """Converts a value to a python string. + + Behavior of this function is intentionally different in Python2/3. + + In Python2, the given value is attempted to convert to a str (byte string). + If it contains non-ASCII characters, it is converted to a unicode instead. + + In Python3, the given value is always converted to a str (unicode string). + + This behavior reflects the (bad) practice in Python2 to try to represent + a string as str as long as it contains ASCII characters only. + + Args: + value: An object to be converted to a string. + + Returns: + A string representation of the given value. See the description above + for its type. + """ + try: + return str(value) + except UnicodeEncodeError: + return unicode(value) # Python3 should never come here + + +def create_xml_dom_element(doc, name, value): + """Returns an XML DOM element with name and text value. + + Args: + doc: minidom.Document, the DOM document it should create nodes from. + name: str, the tag of XML element. + value: object, whose string representation will be used + as the value of the XML element. Illegal or highly discouraged xml 1.0 + characters are stripped. + + Returns: + An instance of minidom.Element. + """ + s = str_or_unicode(value) + if six.PY2 and not isinstance(s, unicode): + # Get a valid unicode string. + s = s.decode('utf-8', 'ignore') + if isinstance(value, bool): + # Display boolean values as the C++ flag library does: no caps. + s = s.lower() + # Remove illegal xml characters. + s = _ILLEGAL_XML_CHARS_REGEX.sub(u'', s) + + e = doc.createElement(name) + e.appendChild(doc.createTextNode(s)) + return e + + +def get_help_width(): + """Returns the integer width of help lines that is used in TextWrap.""" + if not sys.stdout.isatty() or termios is None or fcntl is None: + return _DEFAULT_HELP_WIDTH + try: + data = fcntl.ioctl(sys.stdout, termios.TIOCGWINSZ, '1234') + columns = struct.unpack('hh', data)[1] + # Emacs mode returns 0. + # Here we assume that any value below 40 is unreasonable. + if columns >= _MIN_HELP_WIDTH: + return columns + # Returning an int as default is fine, int(int) just return the int. + return int(os.getenv('COLUMNS', _DEFAULT_HELP_WIDTH)) + + except (TypeError, IOError, struct.error): + return _DEFAULT_HELP_WIDTH + + +def get_flag_suggestions(attempt, longopt_list): + """Returns helpful similar matches for an invalid flag.""" + # Don't suggest on very short strings, or if no longopts are specified. + if len(attempt) <= 2 or not longopt_list: + return [] + + option_names = [v.split('=')[0] for v in longopt_list] + + # Find close approximations in flag prefixes. + # This also handles the case where the flag is spelled right but ambiguous. + distances = [(_damerau_levenshtein(attempt, option[0:len(attempt)]), option) + for option in option_names] + distances.sort(key=lambda t: t[0]) + + least_errors, _ = distances[0] + # Don't suggest excessively bad matches. + if least_errors >= _SUGGESTION_ERROR_RATE_THRESHOLD * len(attempt): + return [] + + suggestions = [] + for errors, name in distances: + if errors == least_errors: + suggestions.append(name) + else: + break + return suggestions + + +def _damerau_levenshtein(a, b): + """Returns Damerau-Levenshtein edit distance from a to b.""" + memo = {} + + def distance(x, y): + """Recursively defined string distance with memoization.""" + if (x, y) in memo: + return memo[x, y] + if not x: + d = len(y) + elif not y: + d = len(x) + else: + d = min( + distance(x[1:], y) + 1, # correct an insertion error + distance(x, y[1:]) + 1, # correct a deletion error + distance(x[1:], y[1:]) + (x[0] != y[0])) # correct a wrong character + if len(x) >= 2 and len(y) >= 2 and x[0] == y[1] and x[1] == y[0]: + # Correct a transposition. + t = distance(x[2:], y[2:]) + 1 + if d > t: + d = t + + memo[x, y] = d + return d + return distance(a, b) + + +def text_wrap(text, length=None, indent='', firstline_indent=None): + """Wraps a given text to a maximum line length and returns it. + + It turns lines that only contain whitespace into empty lines, keeps new lines, + and expands tabs using 4 spaces. + + Args: + text: str, text to wrap. + length: int, maximum length of a line, includes indentation. + If this is None then use get_help_width() + indent: str, indent for all but first line. + firstline_indent: str, indent for first line; if None, fall back to indent. + + Returns: + str, the wrapped text. + + Raises: + ValueError: Raised if indent or firstline_indent not shorter than length. + """ + # Get defaults where callee used None + if length is None: + length = get_help_width() + if indent is None: + indent = '' + if firstline_indent is None: + firstline_indent = indent + + if len(indent) >= length: + raise ValueError('Length of indent exceeds length') + if len(firstline_indent) >= length: + raise ValueError('Length of first line indent exceeds length') + + text = text.expandtabs(4) + + result = [] + # Create one wrapper for the first paragraph and one for subsequent + # paragraphs that does not have the initial wrapping. + wrapper = textwrap.TextWrapper( + width=length, initial_indent=firstline_indent, subsequent_indent=indent) + subsequent_wrapper = textwrap.TextWrapper( + width=length, initial_indent=indent, subsequent_indent=indent) + + # textwrap does not have any special treatment for newlines. From the docs: + # "...newlines may appear in the middle of a line and cause strange output. + # For this reason, text should be split into paragraphs (using + # str.splitlines() or similar) which are wrapped separately." + for paragraph in (p.strip() for p in text.splitlines()): + if paragraph: + result.extend(wrapper.wrap(paragraph)) + else: + result.append('') # Keep empty lines. + # Replace initial wrapper with wrapper for subsequent paragraphs. + wrapper = subsequent_wrapper + + return '\n'.join(result) + + +def flag_dict_to_args(flag_map): + """Convert a dict of values into process call parameters. + + This method is used to convert a dictionary into a sequence of parameters + for a binary that parses arguments using this module. + + Args: + flag_map: dict, a mapping where the keys are flag names (strings). + values are treated according to their type: + * If value is None, then only the name is emitted. + * If value is True, then only the name is emitted. + * If value is False, then only the name prepended with 'no' is emitted. + * If value is a string then --name=value is emitted. + * If value is a collection, this will emit --name=value1,value2,value3. + * Everything else is converted to string an passed as such. + Yields: + sequence of string suitable for a subprocess execution. + """ + for key, value in six.iteritems(flag_map): + if value is None: + yield '--%s' % key + elif isinstance(value, bool): + if value: + yield '--%s' % key + else: + yield '--no%s' % key + elif isinstance(value, (bytes, type(u''))): + # We don't want strings to be handled like python collections. + yield '--%s=%s' % (key, value) + else: + # Now we attempt to deal with collections. + try: + yield '--%s=%s' % (key, ','.join(str(item) for item in value)) + except TypeError: + # Default case. + yield '--%s=%s' % (key, value) + + +def trim_docstring(docstring): + """Removes indentation from triple-quoted strings. + + This is the function specified in PEP 257 to handle docstrings: + https://www.python.org/dev/peps/pep-0257/. + + Args: + docstring: str, a python docstring. + + Returns: + str, docstring with indentation removed. + """ + if not docstring: + return '' + + # If you've got a line longer than this you have other problems... + max_indent = 1 << 29 + + # Convert tabs to spaces (following the normal Python rules) + # and split into a list of lines: + lines = docstring.expandtabs().splitlines() + + # Determine minimum indentation (first line doesn't count): + indent = max_indent + for line in lines[1:]: + stripped = line.lstrip() + if stripped: + indent = min(indent, len(line) - len(stripped)) + # Remove indentation (first line is special): + trimmed = [lines[0].strip()] + if indent < max_indent: + for line in lines[1:]: + trimmed.append(line[indent:].rstrip()) + # Strip off trailing and leading blank lines: + while trimmed and not trimmed[-1]: + trimmed.pop() + while trimmed and not trimmed[0]: + trimmed.pop(0) + # Return a single string: + return '\n'.join(trimmed) + + +def doc_to_help(doc): + """Takes a __doc__ string and reformats it as help.""" + + # Get rid of starting and ending white space. Using lstrip() or even + # strip() could drop more than maximum of first line and right space + # of last line. + doc = doc.strip() + + # Get rid of all empty lines. + whitespace_only_line = re.compile('^[ \t]+$', re.M) + doc = whitespace_only_line.sub('', doc) + + # Cut out common space at line beginnings. + doc = trim_docstring(doc) + + # Just like this module's comment, comments tend to be aligned somehow. + # In other words they all start with the same amount of white space. + # 1) keep double new lines; + # 2) keep ws after new lines if not empty line; + # 3) all other new lines shall be changed to a space; + # Solution: Match new lines between non white space and replace with space. + doc = re.sub(r'(?<=\S)\n(?=\S)', ' ', doc, flags=re.M) + + return doc + + +def is_bytes_or_string(maybe_string): + if str is bytes: + return isinstance(maybe_string, basestring) + else: + return isinstance(maybe_string, (str, bytes)) |