From f336d4b7a5c1d369ed508e513d482c885705e939 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 13 Jan 2015 14:21:29 -0500 Subject: Prepare for Python2-Python3 straddle. - Remove PY25 cruft. - Selectively apply cleanups from 'python-modernize': - New exception syntax. - Use 'six' to handle module renames. - Use 'six' to handle text / binary stuff. This PR covers most of the work from #66 which falls inside `python` (rather than the Python code generation stuff in 'src'). --- python/google/protobuf/text_format.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'python/google/protobuf/text_format.py') diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index fb54c50c..87b5c222 100755 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -28,8 +28,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#PY25 compatible for GAE. -# # Copyright 2007 Google Inc. All Rights Reserved. """Contains routines for printing protocol messages in text format.""" @@ -39,6 +37,8 @@ __author__ = 'kenton@google.com (Kenton Varda)' import cStringIO import re +import six + from google.protobuf.internal import type_checkers from google.protobuf import descriptor from google.protobuf import text_encoding @@ -195,7 +195,7 @@ def PrintFieldValue(field, value, out, indent=0, as_utf8=False, out.write(str(value)) elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: out.write('\"') - if isinstance(value, unicode): + if isinstance(value, six.text_type): out_value = value.encode('utf-8') else: out_value = value @@ -505,7 +505,7 @@ class _Tokenizer(object): def _PopLine(self): while len(self._current_line) <= self._column: try: - self._current_line = self._lines.next() + self._current_line = next(self._lines) except StopIteration: self._current_line = '' self._more_lines = False @@ -575,7 +575,7 @@ class _Tokenizer(object): """ try: result = ParseInteger(self.token, is_signed=True, is_long=False) - except ValueError, e: + except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result @@ -591,7 +591,7 @@ class _Tokenizer(object): """ try: result = ParseInteger(self.token, is_signed=False, is_long=False) - except ValueError, e: + except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result @@ -607,7 +607,7 @@ class _Tokenizer(object): """ try: result = ParseInteger(self.token, is_signed=True, is_long=True) - except ValueError, e: + except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result @@ -623,7 +623,7 @@ class _Tokenizer(object): """ try: result = ParseInteger(self.token, is_signed=False, is_long=True) - except ValueError, e: + except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result @@ -639,7 +639,7 @@ class _Tokenizer(object): """ try: result = ParseFloat(self.token) - except ValueError, e: + except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result @@ -655,7 +655,7 @@ class _Tokenizer(object): """ try: result = ParseBool(self.token) - except ValueError, e: + except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result @@ -671,8 +671,8 @@ class _Tokenizer(object): """ the_bytes = self.ConsumeByteString() try: - return unicode(the_bytes, 'utf-8') - except UnicodeDecodeError, e: + return six.text_type(the_bytes, 'utf-8') + except UnicodeDecodeError as e: raise self._StringParseError(e) def ConsumeByteString(self): @@ -687,8 +687,7 @@ class _Tokenizer(object): the_list = [self._ConsumeSingleByteString()] while self.token and self.token[0] in ('\'', '"'): the_list.append(self._ConsumeSingleByteString()) - return ''.encode('latin1').join(the_list) ##PY25 -##!PY25 return b''.join(the_list) + return b''.join(the_list) def _ConsumeSingleByteString(self): """Consume one token of a string literal. @@ -706,7 +705,7 @@ class _Tokenizer(object): try: result = text_encoding.CUnescape(text[1:-1]) - except ValueError, e: + except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result @@ -714,7 +713,7 @@ class _Tokenizer(object): def ConsumeEnum(self, field): try: result = ParseEnum(field, self.token) - except ValueError, e: + except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result @@ -779,7 +778,7 @@ def ParseInteger(text, is_signed=False, is_long=False): # alternate implementations where the distinction is more significant # (e.g. the C++ implementation) simpler. if is_long: - result = long(text, 0) + result = int(text, 0) else: result = int(text, 0) except ValueError: -- cgit v1.2.3 From 47ee4d37c17db8e97fe5b15cf918ab56ff93bb18 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 13 Jan 2015 15:04:41 -0500 Subject: Use 'io.BytesIO' rather than 'cStringIO.StringIO'. --- python/google/protobuf/internal/encoder.py | 2 +- python/google/protobuf/text_format.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'python/google/protobuf/text_format.py') diff --git a/python/google/protobuf/internal/encoder.py b/python/google/protobuf/internal/encoder.py index 21ed2ed7..fa22a9dd 100755 --- a/python/google/protobuf/internal/encoder.py +++ b/python/google/protobuf/internal/encoder.py @@ -43,7 +43,7 @@ FieldDescriptor) we construct two functions: a "sizer" and an "encoder". The sizer takes a value of this field's type and computes its byte size. The encoder takes a writer function and a value. It encodes the value into byte strings and invokes the writer function to write those strings. Typically the -writer function is the write() method of a cStringIO. +writer function is the write() method of a BytesIO. We try to do as much work as possible when constructing the writer and the sizer rather than when calling them. In particular: diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index 87b5c222..c50930ef 100755 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -34,7 +34,7 @@ __author__ = 'kenton@google.com (Kenton Varda)' -import cStringIO +import io import re import six @@ -89,7 +89,7 @@ def MessageToString(message, as_utf8=False, as_one_line=False, Returns: A string of the text formatted protocol buffer message. """ - out = cStringIO.StringIO() + out = io.BytesIO() PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line, pointy_brackets=pointy_brackets, use_index_order=use_index_order, -- cgit v1.2.3 From fe7d9379df3ce7c951bc0652a451413cff02382a Mon Sep 17 00:00:00 2001 From: Dan O'Reilly Date: Fri, 14 Aug 2015 15:26:33 -0400 Subject: Fixing some long/int bugs Signed-off-by: Dan O'Reilly --- python/google/protobuf/internal/decoder.py | 7 +++++-- python/google/protobuf/internal/reflection_test.py | 10 +++++----- python/google/protobuf/internal/type_checkers.py | 11 +++++++---- python/google/protobuf/text_format.py | 5 ++++- 4 files changed, 21 insertions(+), 12 deletions(-) (limited to 'python/google/protobuf/text_format.py') diff --git a/python/google/protobuf/internal/decoder.py b/python/google/protobuf/internal/decoder.py index 130386f2..4fd7a864 100755 --- a/python/google/protobuf/internal/decoder.py +++ b/python/google/protobuf/internal/decoder.py @@ -86,6 +86,9 @@ import struct import six +if six.PY3: + long = int + from google.protobuf.internal import encoder from google.protobuf.internal import wire_format from google.protobuf import message @@ -157,8 +160,8 @@ def _SignedVarintDecoder(mask, result_type): # alternate implementations where the distinction is more significant # (e.g. the C++ implementation) simpler. -_DecodeVarint = _VarintDecoder((1 << 64) - 1, int) -_DecodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1, int) +_DecodeVarint = _VarintDecoder((1 << 64) - 1, long) +_DecodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1, long) # Use these versions for values which must be limited to 32 bits. _DecodeVarint32 = _VarintDecoder((1 << 32) - 1, int) diff --git a/python/google/protobuf/internal/reflection_test.py b/python/google/protobuf/internal/reflection_test.py index d1c18c1f..9fe3abee 100755 --- a/python/google/protobuf/internal/reflection_test.py +++ b/python/google/protobuf/internal/reflection_test.py @@ -630,17 +630,17 @@ class ReflectionTest(unittest.TestCase): TestGetAndDeserialize('optional_int32', 1, int) TestGetAndDeserialize('optional_int32', 1 << 30, int) TestGetAndDeserialize('optional_uint32', 1 << 30, int) + try: + integer_64 = long + except NameError: # Python3 + integer_64 = int if struct.calcsize('L') == 4: # Python only has signed ints, so 32-bit python can't fit an uint32 # in an int. - TestGetAndDeserialize('optional_uint32', 1 << 31, int) + TestGetAndDeserialize('optional_uint32', 1 << 31, long) else: # 64-bit python can fit uint32 inside an int TestGetAndDeserialize('optional_uint32', 1 << 31, int) - try: - integer_64 = long - except NameError: # Python3 - integer_64 = int TestGetAndDeserialize('optional_int64', 1 << 30, integer_64) TestGetAndDeserialize('optional_int64', 1 << 60, integer_64) TestGetAndDeserialize('optional_uint64', 1 << 30, integer_64) diff --git a/python/google/protobuf/internal/type_checkers.py b/python/google/protobuf/internal/type_checkers.py index 363018ed..8fa3d8c8 100755 --- a/python/google/protobuf/internal/type_checkers.py +++ b/python/google/protobuf/internal/type_checkers.py @@ -49,6 +49,9 @@ __author__ = 'robinson@google.com (Will Robinson)' import six +if six.PY3: + long = int + from google.protobuf.internal import decoder from google.protobuf.internal import encoder from google.protobuf.internal import wire_format @@ -195,13 +198,13 @@ class Uint32ValueChecker(IntValueChecker): class Int64ValueChecker(IntValueChecker): _MIN = -(1 << 63) _MAX = (1 << 63) - 1 - _TYPE = int + _TYPE = long class Uint64ValueChecker(IntValueChecker): _MIN = 0 _MAX = (1 << 64) - 1 - _TYPE = int + _TYPE = long # Type-checkers for all scalar CPPTYPEs. @@ -211,9 +214,9 @@ _VALUE_CHECKERS = { _FieldDescriptor.CPPTYPE_UINT32: Uint32ValueChecker(), _FieldDescriptor.CPPTYPE_UINT64: Uint64ValueChecker(), _FieldDescriptor.CPPTYPE_DOUBLE: TypeChecker( - float, int, int), + float, int, long), _FieldDescriptor.CPPTYPE_FLOAT: TypeChecker( - float, int, int), + float, int, long), _FieldDescriptor.CPPTYPE_BOOL: TypeChecker(bool, int), _FieldDescriptor.CPPTYPE_STRING: TypeChecker(bytes), } diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index 6dd7f551..d4c4610f 100755 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -39,6 +39,9 @@ import re import six +if six.PY3: + long = int + from google.protobuf.internal import type_checkers from google.protobuf import descriptor from google.protobuf import text_encoding @@ -813,7 +816,7 @@ def ParseInteger(text, is_signed=False, is_long=False): # alternate implementations where the distinction is more significant # (e.g. the C++ implementation) simpler. if is_long: - result = int(text, 0) + result = long(text, 0) else: result = int(text, 0) except ValueError: -- cgit v1.2.3 From 3d5aa6aef97f7ba9394f226778fdba91a9f89d59 Mon Sep 17 00:00:00 2001 From: Dan O'Reilly Date: Fri, 14 Aug 2015 16:12:34 -0400 Subject: Fix some more Python 3 compat issues Signed-off-by: Dan O'Reilly --- python/google/protobuf/internal/message_test.py | 25 +++++++++++++--------- .../google/protobuf/internal/text_format_test.py | 2 +- python/google/protobuf/text_format.py | 12 ++++++++--- 3 files changed, 25 insertions(+), 14 deletions(-) (limited to 'python/google/protobuf/text_format.py') diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py index 4dc92752..66356c92 100755 --- a/python/google/protobuf/internal/message_test.py +++ b/python/google/protobuf/internal/message_test.py @@ -49,6 +49,11 @@ import operator import pickle import sys +import six + +if six.PY3: + long = int + import unittest from google.protobuf.internal import _parameterized from google.protobuf import map_unittest_pb2 @@ -675,7 +680,7 @@ class MessageTest(unittest.TestCase): in the value being converted to a Unicode string.""" m = message_module.TestAllTypes() m.optional_string = str('') - self.assertTrue(isinstance(m.optional_string, unicode)) + self.assertTrue(isinstance(m.optional_string, six.text_type)) # TODO(haberman): why are these tests Google-internal only? @@ -1228,7 +1233,7 @@ class Proto3Test(unittest.TestCase): self.assertTrue('abc' in msg.map_string_string) self.assertTrue(888 in msg.map_int32_enum) - self.assertTrue(isinstance(msg.map_string_string['abc'], unicode)) + self.assertTrue(isinstance(msg.map_string_string['abc'], six.text_type)) # Accessing an unset key still throws TypeError of the type of the key # is incorrect. @@ -1311,13 +1316,13 @@ class Proto3Test(unittest.TestCase): msg.map_string_string[bytes_obj] = bytes_obj - (key, value) = msg.map_string_string.items()[0] + (key, value) = list(msg.map_string_string.items())[0] self.assertEqual(key, unicode_obj) self.assertEqual(value, unicode_obj) - self.assertTrue(isinstance(key, unicode)) - self.assertTrue(isinstance(value, unicode)) + self.assertTrue(isinstance(key, six.text_type)) + self.assertTrue(isinstance(value, six.text_type)) def testMessageMap(self): msg = map_unittest_pb2.TestMap() @@ -1502,7 +1507,7 @@ class Proto3Test(unittest.TestCase): def testMapIteration(self): msg = map_unittest_pb2.TestMap() - for k, v in msg.map_int32_int32.iteritems(): + for k, v in msg.map_int32_int32.items(): # Should not be reached. self.assertTrue(False) @@ -1512,7 +1517,7 @@ class Proto3Test(unittest.TestCase): self.assertEqual(3, len(msg.map_int32_int32)) matching_dict = {2: 4, 3: 6, 4: 8} - self.assertMapIterEquals(msg.map_int32_int32.iteritems(), matching_dict) + self.assertMapIterEquals(msg.map_int32_int32.items(), matching_dict) def testMapIterationClearMessage(self): # Iterator needs to work even if message and map are deleted. @@ -1522,7 +1527,7 @@ class Proto3Test(unittest.TestCase): msg.map_int32_int32[3] = 6 msg.map_int32_int32[4] = 8 - it = msg.map_int32_int32.iteritems() + it = msg.map_int32_int32.items() del msg matching_dict = {2: 4, 3: 6, 4: 8} @@ -1550,7 +1555,7 @@ class Proto3Test(unittest.TestCase): msg.ClearField('map_int32_int32') matching_dict = {2: 4, 3: 6, 4: 8} - self.assertMapIterEquals(map.iteritems(), matching_dict) + self.assertMapIterEquals(map.items(), matching_dict) def testMapIterValidAfterFieldCleared(self): # Map iterator needs to work even if field is cleared. @@ -1562,7 +1567,7 @@ class Proto3Test(unittest.TestCase): msg.map_int32_int32[3] = 6 msg.map_int32_int32[4] = 8 - it = msg.map_int32_int32.iteritems() + it = msg.map_int32_int32.items() msg.ClearField('map_int32_int32') matching_dict = {2: 4, 3: 6, 4: 8} diff --git a/python/google/protobuf/internal/text_format_test.py b/python/google/protobuf/internal/text_format_test.py index 55b32249..49e6332c 100755 --- a/python/google/protobuf/internal/text_format_test.py +++ b/python/google/protobuf/internal/text_format_test.py @@ -101,7 +101,7 @@ class TextFormatTest(TextFormatBase): 'repeated_string: "\\303\\274\\352\\234\\237"\n') def testPrintExoticUnicodeSubclass(self, message_module): - class UnicodeSub(unicode): + class UnicodeSub(six.text_type): pass message = message_module.TestAllTypes() message.repeated_string.append(UnicodeSub(u'\u00fc\ua71f')) diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index d4c4610f..5e4d10b1 100755 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -92,7 +92,10 @@ def MessageToString(message, as_utf8=False, as_one_line=False, Returns: A string of the text formatted protocol buffer message. """ - out = io.BytesIO() + if as_utf8: + out = io.BytesIO() + else: + out = io.BytesIO() PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line, pointy_brackets=pointy_brackets, use_index_order=use_index_order, @@ -139,7 +142,6 @@ def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, use_index_order=use_index_order, float_format=float_format) - def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, pointy_brackets=False, use_index_order=False, float_format=None): """Print a single field name/value pair. For repeated fields, the value @@ -160,7 +162,11 @@ def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, # For groups, use the capitalized name. out.write(field.message_type.name) else: - out.write(field.name) + if isinstance(field.name, six.text_type): + name = field.name.encode('utf-8') + else: + name = field.name + out.write(name) if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE: # The colon is optional in this case, but our cross-language golden files -- cgit v1.2.3 From 7601551f7c967da66f0ae1d20fcdd23d77c46b95 Mon Sep 17 00:00:00 2001 From: Dan O'Reilly Date: Fri, 14 Aug 2015 23:22:47 -0400 Subject: Just always uses BytseIO in text_format for now Signed-off-by: Dan O'Reilly --- python/google/protobuf/text_format.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'python/google/protobuf/text_format.py') diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index 5e4d10b1..950bec16 100755 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -92,10 +92,7 @@ def MessageToString(message, as_utf8=False, as_one_line=False, Returns: A string of the text formatted protocol buffer message. """ - if as_utf8: - out = io.BytesIO() - else: - out = io.BytesIO() + out = io.BytesIO() PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line, pointy_brackets=pointy_brackets, use_index_order=use_index_order, -- cgit v1.2.3