aboutsummaryrefslogtreecommitdiffhomepage
path: root/python/google/protobuf/text_format.py
diff options
context:
space:
mode:
authorGravatar xiaofeng@google.com <xiaofeng@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2012-09-22 02:40:50 +0000
committerGravatar xiaofeng@google.com <xiaofeng@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2012-09-22 02:40:50 +0000
commitb55a20fa2c669b181f47ea9219b8e74d1263da19 (patch)
tree3936a0e7c22196587a6d8397372de41434fe2129 /python/google/protobuf/text_format.py
parent9ced30caf94bb4e7e9629c199679ff44e8ca7389 (diff)
Down-integrate from internal branch
Diffstat (limited to 'python/google/protobuf/text_format.py')
-rwxr-xr-xpython/google/protobuf/text_format.py257
1 files changed, 150 insertions, 107 deletions
diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py
index c3a1cf60..0714c39d 100755
--- a/python/google/protobuf/text_format.py
+++ b/python/google/protobuf/text_format.py
@@ -43,10 +43,12 @@ __all__ = [ 'MessageToString', 'PrintMessage', 'PrintField',
'PrintFieldValue', 'Merge' ]
-# Infinity and NaN are not explicitly supported by Python pre-2.6, and
-# float('inf') does not work on Windows (pre-2.6).
-_INFINITY = 1e10000 # overflows, thus will actually be infinity.
-_NAN = _INFINITY * 0
+_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
+ type_checkers.Int32ValueChecker(),
+ type_checkers.Uint64ValueChecker(),
+ type_checkers.Int64ValueChecker())
+_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
+_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
class ParseError(Exception):
@@ -120,7 +122,11 @@ def PrintFieldValue(field, value, out, indent=0,
PrintMessage(value, out, indent + 2, as_utf8, as_one_line)
out.write(' ' * indent + '}')
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
- out.write(field.enum_type.values_by_number[value].name)
+ enum_value = field.enum_type.values_by_number.get(value, None)
+ if enum_value is not None:
+ out.write(enum_value.name)
+ else:
+ out.write(str(value))
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
out.write('\"')
if type(value) is unicode:
@@ -271,24 +277,7 @@ def _MergeScalarField(tokenizer, message, field):
elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
value = tokenizer.ConsumeByteString()
elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
- # Enum can be specified by a number (the enum value), or by
- # a string literal (the enum name).
- enum_descriptor = field.enum_type
- if tokenizer.LookingAtInteger():
- number = tokenizer.ConsumeInt32()
- enum_value = enum_descriptor.values_by_number.get(number, None)
- if enum_value is None:
- raise tokenizer.ParseErrorPreviousToken(
- 'Enum type "%s" has no value with number %d.' % (
- enum_descriptor.full_name, number))
- else:
- identifier = tokenizer.ConsumeIdentifier()
- enum_value = enum_descriptor.values_by_name.get(identifier, None)
- if enum_value is None:
- raise tokenizer.ParseErrorPreviousToken(
- 'Enum type "%s" has no value named %s.' % (
- enum_descriptor.full_name, identifier))
- value = enum_value.number
+ value = tokenizer.ConsumeEnum(field)
else:
raise RuntimeError('Unknown field type %d' % field.type)
@@ -320,12 +309,6 @@ class _Tokenizer(object):
'\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|' # a double-quoted string
'\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)') # a single-quoted string
_IDENTIFIER = re.compile('\w+')
- _INTEGER_CHECKERS = [type_checkers.Uint32ValueChecker(),
- type_checkers.Int32ValueChecker(),
- type_checkers.Uint64ValueChecker(),
- type_checkers.Int64ValueChecker()]
- _FLOAT_INFINITY = re.compile('-?inf(inity)?f?', re.IGNORECASE)
- _FLOAT_NAN = re.compile("nanf?", re.IGNORECASE)
def __init__(self, text_message):
self._text_message = text_message
@@ -394,17 +377,6 @@ class _Tokenizer(object):
if not self.TryConsume(token):
raise self._ParseError('Expected "%s".' % token)
- def LookingAtInteger(self):
- """Checks if the current token is an integer.
-
- Returns:
- True iff the current token is an integer.
- """
- if not self.token:
- return False
- c = self.token[0]
- return (c >= '0' and c <= '9') or c == '-' or c == '+'
-
def ConsumeIdentifier(self):
"""Consumes protocol message field identifier.
@@ -430,9 +402,9 @@ class _Tokenizer(object):
ParseError: If a signed 32bit integer couldn't be consumed.
"""
try:
- result = self._ParseInteger(self.token, is_signed=True, is_long=False)
+ result = ParseInteger(self.token, is_signed=True, is_long=False)
except ValueError, e:
- raise self._IntegerParseError(e)
+ raise self._ParseError(str(e))
self.NextToken()
return result
@@ -446,9 +418,9 @@ class _Tokenizer(object):
ParseError: If an unsigned 32bit integer couldn't be consumed.
"""
try:
- result = self._ParseInteger(self.token, is_signed=False, is_long=False)
+ result = ParseInteger(self.token, is_signed=False, is_long=False)
except ValueError, e:
- raise self._IntegerParseError(e)
+ raise self._ParseError(str(e))
self.NextToken()
return result
@@ -462,9 +434,9 @@ class _Tokenizer(object):
ParseError: If a signed 64bit integer couldn't be consumed.
"""
try:
- result = self._ParseInteger(self.token, is_signed=True, is_long=True)
+ result = ParseInteger(self.token, is_signed=True, is_long=True)
except ValueError, e:
- raise self._IntegerParseError(e)
+ raise self._ParseError(str(e))
self.NextToken()
return result
@@ -478,9 +450,9 @@ class _Tokenizer(object):
ParseError: If an unsigned 64bit integer couldn't be consumed.
"""
try:
- result = self._ParseInteger(self.token, is_signed=False, is_long=True)
+ result = ParseInteger(self.token, is_signed=False, is_long=True)
except ValueError, e:
- raise self._IntegerParseError(e)
+ raise self._ParseError(str(e))
self.NextToken()
return result
@@ -493,21 +465,10 @@ class _Tokenizer(object):
Raises:
ParseError: If a floating point number couldn't be consumed.
"""
- text = self.token
- if self._FLOAT_INFINITY.match(text):
- self.NextToken()
- if text.startswith('-'):
- return -_INFINITY
- return _INFINITY
-
- if self._FLOAT_NAN.match(text):
- self.NextToken()
- return _NAN
-
try:
- result = float(text)
+ result = ParseFloat(self.token)
except ValueError, e:
- raise self._FloatParseError(e)
+ raise self._ParseError(str(e))
self.NextToken()
return result
@@ -520,14 +481,12 @@ class _Tokenizer(object):
Raises:
ParseError: If a boolean value couldn't be consumed.
"""
- if self.token in ('true', 't', '1'):
- self.NextToken()
- return True
- elif self.token in ('false', 'f', '0'):
- self.NextToken()
- return False
- else:
- raise self._ParseError('Expected "true" or "false".')
+ try:
+ result = ParseBool(self.token)
+ except ValueError, e:
+ raise self._ParseError(str(e))
+ self.NextToken()
+ return result
def ConsumeString(self):
"""Consumes a string value.
@@ -567,7 +526,7 @@ class _Tokenizer(object):
"""
text = self.token
if len(text) < 1 or text[0] not in ('\'', '"'):
- raise self._ParseError('Exptected string.')
+ raise self._ParseError('Expected string.')
if len(text) < 2 or text[-1] != text[0]:
raise self._ParseError('String missing ending quote.')
@@ -579,36 +538,12 @@ class _Tokenizer(object):
self.NextToken()
return result
- def _ParseInteger(self, text, is_signed=False, is_long=False):
- """Parses an integer.
-
- Args:
- text: The text to parse.
- is_signed: True if a signed integer must be parsed.
- is_long: True if a long integer must be parsed.
-
- Returns:
- The integer value.
-
- Raises:
- ValueError: Thrown Iff the text is not a valid integer.
- """
- pos = 0
- if text.startswith('-'):
- pos += 1
-
- base = 10
- if text.startswith('0x', pos) or text.startswith('0X', pos):
- base = 16
- elif text.startswith('0', pos):
- base = 8
-
- # Do the actual parsing. Exception handling is propagated to caller.
- result = int(text, base)
-
- # Check if the integer is sane. Exceptions handled by callers.
- checker = self._INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
- checker.CheckValue(result)
+ def ConsumeEnum(self, field):
+ try:
+ result = ParseEnum(field, self.token)
+ except ValueError, e:
+ raise self._ParseError(str(e))
+ self.NextToken()
return result
def ParseErrorPreviousToken(self, message):
@@ -626,13 +561,7 @@ class _Tokenizer(object):
def _ParseError(self, message):
"""Creates and *returns* a ParseError for the current token."""
return ParseError('%d:%d : %s' % (
- self._line + 1, self._column - len(self.token) + 1, message))
-
- def _IntegerParseError(self, e):
- return self._ParseError('Couldn\'t parse integer: ' + str(e))
-
- def _FloatParseError(self, e):
- return self._ParseError('Couldn\'t parse number: ' + str(e))
+ self._line + 1, self._column + 1, message))
def _StringParseError(self, e):
return self._ParseError('Couldn\'t parse string: ' + str(e))
@@ -689,3 +618,117 @@ def _CUnescape(text):
# allow single-digit hex escapes (like '\xf').
result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
return result.decode('string_escape')
+
+
+def ParseInteger(text, is_signed=False, is_long=False):
+ """Parses an integer.
+
+ Args:
+ text: The text to parse.
+ is_signed: True if a signed integer must be parsed.
+ is_long: True if a long integer must be parsed.
+
+ Returns:
+ The integer value.
+
+ Raises:
+ ValueError: Thrown Iff the text is not a valid integer.
+ """
+ # Do the actual parsing. Exception handling is propagated to caller.
+ try:
+ result = int(text, 0)
+ except ValueError:
+ raise ValueError('Couldn\'t parse integer: %s' % text)
+
+ # Check if the integer is sane. Exceptions handled by callers.
+ checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
+ checker.CheckValue(result)
+ return result
+
+
+def ParseFloat(text):
+ """Parse a floating point number.
+
+ Args:
+ text: Text to parse.
+
+ Returns:
+ The number parsed.
+
+ Raises:
+ ValueError: If a floating point number couldn't be parsed.
+ """
+ try:
+ # Assume Python compatible syntax.
+ return float(text)
+ except ValueError:
+ # Check alternative spellings.
+ if _FLOAT_INFINITY.match(text):
+ if text[0] == '-':
+ return float('-inf')
+ else:
+ return float('inf')
+ elif _FLOAT_NAN.match(text):
+ return float('nan')
+ else:
+ # assume '1.0f' format
+ try:
+ return float(text.rstrip('f'))
+ except ValueError:
+ raise ValueError('Couldn\'t parse float: %s' % text)
+
+
+def ParseBool(text):
+ """Parse a boolean value.
+
+ Args:
+ text: Text to parse.
+
+ Returns:
+ Boolean values parsed
+
+ Raises:
+ ValueError: If text is not a valid boolean.
+ """
+ if text in ('true', 't', '1'):
+ return True
+ elif text in ('false', 'f', '0'):
+ return False
+ else:
+ raise ValueError('Expected "true" or "false".')
+
+
+def ParseEnum(field, value):
+ """Parse an enum value.
+
+ The value can be specified by a number (the enum value), or by
+ a string literal (the enum name).
+
+ Args:
+ field: Enum field descriptor.
+ value: String value.
+
+ Returns:
+ Enum value number.
+
+ Raises:
+ ValueError: If the enum value could not be parsed.
+ """
+ enum_descriptor = field.enum_type
+ try:
+ number = int(value, 0)
+ except ValueError:
+ # Identifier.
+ enum_value = enum_descriptor.values_by_name.get(value, None)
+ if enum_value is None:
+ raise ValueError(
+ 'Enum type "%s" has no value named %s.' % (
+ enum_descriptor.full_name, value))
+ else:
+ # Numeric value.
+ enum_value = enum_descriptor.values_by_number.get(number, None)
+ if enum_value is None:
+ raise ValueError(
+ 'Enum type "%s" has no value with number %d.' % (
+ enum_descriptor.full_name, number))
+ return enum_value.number