From f336d4b7a5c1d369ed508e513d482c885705e939 Mon Sep 17 00:00:00 2001
From: Tres Seaver <tseaver@palladion.com>
Date: Tue, 13 Jan 2015 14:21:29 -0500
Subject: Prepare for Python2-Python3 straddle.

- Remove PY25 cruft.

- Selectively apply cleanups from 'python-modernize':

  - New exception syntax.
  - Use 'six' to handle module renames.
  - Use 'six' to handle text / binary stuff.

This PR covers most of the work from #66 which falls inside `python`
(rather than the Python code generation stuff in 'src').
---
 python/google/protobuf/text_format.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

(limited to 'python/google/protobuf/text_format.py')

diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py
index fb54c50c..87b5c222 100755
--- a/python/google/protobuf/text_format.py
+++ b/python/google/protobuf/text_format.py
@@ -28,8 +28,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#PY25 compatible for GAE.
-#
 # Copyright 2007 Google Inc. All Rights Reserved.
 
 """Contains routines for printing protocol messages in text format."""
@@ -39,6 +37,8 @@ __author__ = 'kenton@google.com (Kenton Varda)'
 import cStringIO
 import re
 
+import six
+
 from google.protobuf.internal import type_checkers
 from google.protobuf import descriptor
 from google.protobuf import text_encoding
@@ -195,7 +195,7 @@ def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
       out.write(str(value))
   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
     out.write('\"')
-    if isinstance(value, unicode):
+    if isinstance(value, six.text_type):
       out_value = value.encode('utf-8')
     else:
       out_value = value
@@ -505,7 +505,7 @@ class _Tokenizer(object):
   def _PopLine(self):
     while len(self._current_line) <= self._column:
       try:
-        self._current_line = self._lines.next()
+        self._current_line = next(self._lines)
       except StopIteration:
         self._current_line = ''
         self._more_lines = False
@@ -575,7 +575,7 @@ class _Tokenizer(object):
     """
     try:
       result = ParseInteger(self.token, is_signed=True, is_long=False)
-    except ValueError, e:
+    except ValueError as e:
       raise self._ParseError(str(e))
     self.NextToken()
     return result
@@ -591,7 +591,7 @@ class _Tokenizer(object):
     """
     try:
       result = ParseInteger(self.token, is_signed=False, is_long=False)
-    except ValueError, e:
+    except ValueError as e:
       raise self._ParseError(str(e))
     self.NextToken()
     return result
@@ -607,7 +607,7 @@ class _Tokenizer(object):
     """
     try:
       result = ParseInteger(self.token, is_signed=True, is_long=True)
-    except ValueError, e:
+    except ValueError as e:
       raise self._ParseError(str(e))
     self.NextToken()
     return result
@@ -623,7 +623,7 @@ class _Tokenizer(object):
     """
     try:
       result = ParseInteger(self.token, is_signed=False, is_long=True)
-    except ValueError, e:
+    except ValueError as e:
       raise self._ParseError(str(e))
     self.NextToken()
     return result
@@ -639,7 +639,7 @@ class _Tokenizer(object):
     """
     try:
       result = ParseFloat(self.token)
-    except ValueError, e:
+    except ValueError as e:
       raise self._ParseError(str(e))
     self.NextToken()
     return result
@@ -655,7 +655,7 @@ class _Tokenizer(object):
     """
     try:
       result = ParseBool(self.token)
-    except ValueError, e:
+    except ValueError as e:
       raise self._ParseError(str(e))
     self.NextToken()
     return result
@@ -671,8 +671,8 @@ class _Tokenizer(object):
     """
     the_bytes = self.ConsumeByteString()
     try:
-      return unicode(the_bytes, 'utf-8')
-    except UnicodeDecodeError, e:
+      return six.text_type(the_bytes, 'utf-8')
+    except UnicodeDecodeError as e:
       raise self._StringParseError(e)
 
   def ConsumeByteString(self):
@@ -687,8 +687,7 @@ class _Tokenizer(object):
     the_list = [self._ConsumeSingleByteString()]
     while self.token and self.token[0] in ('\'', '"'):
       the_list.append(self._ConsumeSingleByteString())
-    return ''.encode('latin1').join(the_list)  ##PY25
-##!PY25    return b''.join(the_list)
+    return b''.join(the_list)
 
   def _ConsumeSingleByteString(self):
     """Consume one token of a string literal.
@@ -706,7 +705,7 @@ class _Tokenizer(object):
 
     try:
       result = text_encoding.CUnescape(text[1:-1])
-    except ValueError, e:
+    except ValueError as e:
       raise self._ParseError(str(e))
     self.NextToken()
     return result
@@ -714,7 +713,7 @@ class _Tokenizer(object):
   def ConsumeEnum(self, field):
     try:
       result = ParseEnum(field, self.token)
-    except ValueError, e:
+    except ValueError as e:
       raise self._ParseError(str(e))
     self.NextToken()
     return result
@@ -779,7 +778,7 @@ def ParseInteger(text, is_signed=False, is_long=False):
     # alternate implementations where the distinction is more significant
     # (e.g. the C++ implementation) simpler.
     if is_long:
-      result = long(text, 0)
+      result = int(text, 0)
     else:
       result = int(text, 0)
   except ValueError:
-- 
cgit v1.2.3


From 47ee4d37c17db8e97fe5b15cf918ab56ff93bb18 Mon Sep 17 00:00:00 2001
From: Tres Seaver <tseaver@palladion.com>
Date: Tue, 13 Jan 2015 15:04:41 -0500
Subject: Use 'io.BytesIO' rather than 'cStringIO.StringIO'.

---
 python/google/protobuf/internal/encoder.py | 2 +-
 python/google/protobuf/text_format.py      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'python/google/protobuf/text_format.py')

diff --git a/python/google/protobuf/internal/encoder.py b/python/google/protobuf/internal/encoder.py
index 21ed2ed7..fa22a9dd 100755
--- a/python/google/protobuf/internal/encoder.py
+++ b/python/google/protobuf/internal/encoder.py
@@ -43,7 +43,7 @@ FieldDescriptor) we construct two functions:  a "sizer" and an "encoder".  The
 sizer takes a value of this field's type and computes its byte size.  The
 encoder takes a writer function and a value.  It encodes the value into byte
 strings and invokes the writer function to write those strings.  Typically the
-writer function is the write() method of a cStringIO.
+writer function is the write() method of a BytesIO.
 
 We try to do as much work as possible when constructing the writer and the
 sizer rather than when calling them.  In particular:
diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py
index 87b5c222..c50930ef 100755
--- a/python/google/protobuf/text_format.py
+++ b/python/google/protobuf/text_format.py
@@ -34,7 +34,7 @@
 
 __author__ = 'kenton@google.com (Kenton Varda)'
 
-import cStringIO
+import io
 import re
 
 import six
@@ -89,7 +89,7 @@ def MessageToString(message, as_utf8=False, as_one_line=False,
   Returns:
     A string of the text formatted protocol buffer message.
   """
-  out = cStringIO.StringIO()
+  out = io.BytesIO()
   PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
                pointy_brackets=pointy_brackets,
                use_index_order=use_index_order,
-- 
cgit v1.2.3


From fe7d9379df3ce7c951bc0652a451413cff02382a Mon Sep 17 00:00:00 2001
From: Dan O'Reilly <oreilldf@gmail.com>
Date: Fri, 14 Aug 2015 15:26:33 -0400
Subject: Fixing some long/int bugs

Signed-off-by: Dan O'Reilly <oreilldf@gmail.com>
---
 python/google/protobuf/internal/decoder.py         |  7 +++++--
 python/google/protobuf/internal/reflection_test.py | 10 +++++-----
 python/google/protobuf/internal/type_checkers.py   | 11 +++++++----
 python/google/protobuf/text_format.py              |  5 ++++-
 4 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'python/google/protobuf/text_format.py')

diff --git a/python/google/protobuf/internal/decoder.py b/python/google/protobuf/internal/decoder.py
index 130386f2..4fd7a864 100755
--- a/python/google/protobuf/internal/decoder.py
+++ b/python/google/protobuf/internal/decoder.py
@@ -86,6 +86,9 @@ import struct
 
 import six
 
+if six.PY3:
+  long = int
+
 from google.protobuf.internal import encoder
 from google.protobuf.internal import wire_format
 from google.protobuf import message
@@ -157,8 +160,8 @@ def _SignedVarintDecoder(mask, result_type):
 # alternate implementations where the distinction is more significant
 # (e.g. the C++ implementation) simpler.
 
-_DecodeVarint = _VarintDecoder((1 << 64) - 1, int)
-_DecodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1, int)
+_DecodeVarint = _VarintDecoder((1 << 64) - 1, long)
+_DecodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1, long)
 
 # Use these versions for values which must be limited to 32 bits.
 _DecodeVarint32 = _VarintDecoder((1 << 32) - 1, int)
diff --git a/python/google/protobuf/internal/reflection_test.py b/python/google/protobuf/internal/reflection_test.py
index d1c18c1f..9fe3abee 100755
--- a/python/google/protobuf/internal/reflection_test.py
+++ b/python/google/protobuf/internal/reflection_test.py
@@ -630,17 +630,17 @@ class ReflectionTest(unittest.TestCase):
     TestGetAndDeserialize('optional_int32', 1, int)
     TestGetAndDeserialize('optional_int32', 1 << 30, int)
     TestGetAndDeserialize('optional_uint32', 1 << 30, int)
+    try:
+      integer_64 = long
+    except NameError: # Python3
+      integer_64 = int
     if struct.calcsize('L') == 4:
       # Python only has signed ints, so 32-bit python can't fit an uint32
       # in an int.
-      TestGetAndDeserialize('optional_uint32', 1 << 31, int)
+      TestGetAndDeserialize('optional_uint32', 1 << 31, long)
     else:
       # 64-bit python can fit uint32 inside an int
       TestGetAndDeserialize('optional_uint32', 1 << 31, int)
-    try:
-      integer_64 = long
-    except NameError: # Python3
-      integer_64 = int
     TestGetAndDeserialize('optional_int64', 1 << 30, integer_64)
     TestGetAndDeserialize('optional_int64', 1 << 60, integer_64)
     TestGetAndDeserialize('optional_uint64', 1 << 30, integer_64)
diff --git a/python/google/protobuf/internal/type_checkers.py b/python/google/protobuf/internal/type_checkers.py
index 363018ed..8fa3d8c8 100755
--- a/python/google/protobuf/internal/type_checkers.py
+++ b/python/google/protobuf/internal/type_checkers.py
@@ -49,6 +49,9 @@ __author__ = 'robinson@google.com (Will Robinson)'
 
 import six
 
+if six.PY3:
+  long = int
+
 from google.protobuf.internal import decoder
 from google.protobuf.internal import encoder
 from google.protobuf.internal import wire_format
@@ -195,13 +198,13 @@ class Uint32ValueChecker(IntValueChecker):
 class Int64ValueChecker(IntValueChecker):
   _MIN = -(1 << 63)
   _MAX = (1 << 63) - 1
-  _TYPE = int
+  _TYPE = long
 
 
 class Uint64ValueChecker(IntValueChecker):
   _MIN = 0
   _MAX = (1 << 64) - 1
-  _TYPE = int
+  _TYPE = long
 
 
 # Type-checkers for all scalar CPPTYPEs.
@@ -211,9 +214,9 @@ _VALUE_CHECKERS = {
     _FieldDescriptor.CPPTYPE_UINT32: Uint32ValueChecker(),
     _FieldDescriptor.CPPTYPE_UINT64: Uint64ValueChecker(),
     _FieldDescriptor.CPPTYPE_DOUBLE: TypeChecker(
-        float, int, int),
+        float, int, long),
     _FieldDescriptor.CPPTYPE_FLOAT: TypeChecker(
-        float, int, int),
+        float, int, long),
     _FieldDescriptor.CPPTYPE_BOOL: TypeChecker(bool, int),
     _FieldDescriptor.CPPTYPE_STRING: TypeChecker(bytes),
     }
diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py
index 6dd7f551..d4c4610f 100755
--- a/python/google/protobuf/text_format.py
+++ b/python/google/protobuf/text_format.py
@@ -39,6 +39,9 @@ import re
 
 import six
 
+if six.PY3:
+  long = int
+
 from google.protobuf.internal import type_checkers
 from google.protobuf import descriptor
 from google.protobuf import text_encoding
@@ -813,7 +816,7 @@ def ParseInteger(text, is_signed=False, is_long=False):
     # alternate implementations where the distinction is more significant
     # (e.g. the C++ implementation) simpler.
     if is_long:
-      result = int(text, 0)
+      result = long(text, 0)
     else:
       result = int(text, 0)
   except ValueError:
-- 
cgit v1.2.3


From 3d5aa6aef97f7ba9394f226778fdba91a9f89d59 Mon Sep 17 00:00:00 2001
From: Dan O'Reilly <oreilldf@gmail.com>
Date: Fri, 14 Aug 2015 16:12:34 -0400
Subject: Fix some more Python 3 compat issues

Signed-off-by: Dan O'Reilly <oreilldf@gmail.com>
---
 python/google/protobuf/internal/message_test.py    | 25 +++++++++++++---------
 .../google/protobuf/internal/text_format_test.py   |  2 +-
 python/google/protobuf/text_format.py              | 12 ++++++++---
 3 files changed, 25 insertions(+), 14 deletions(-)

(limited to 'python/google/protobuf/text_format.py')

diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py
index 4dc92752..66356c92 100755
--- a/python/google/protobuf/internal/message_test.py
+++ b/python/google/protobuf/internal/message_test.py
@@ -49,6 +49,11 @@ import operator
 import pickle
 import sys
 
+import six
+
+if six.PY3:
+  long = int
+
 import unittest
 from google.protobuf.internal import _parameterized
 from google.protobuf import map_unittest_pb2
@@ -675,7 +680,7 @@ class MessageTest(unittest.TestCase):
     in the value being converted to a Unicode string."""
     m = message_module.TestAllTypes()
     m.optional_string = str('')
-    self.assertTrue(isinstance(m.optional_string, unicode))
+    self.assertTrue(isinstance(m.optional_string, six.text_type))
 
 # TODO(haberman): why are these tests Google-internal only?
 
@@ -1228,7 +1233,7 @@ class Proto3Test(unittest.TestCase):
     self.assertTrue('abc' in msg.map_string_string)
     self.assertTrue(888 in msg.map_int32_enum)
 
-    self.assertTrue(isinstance(msg.map_string_string['abc'], unicode))
+    self.assertTrue(isinstance(msg.map_string_string['abc'], six.text_type))
 
     # Accessing an unset key still throws TypeError of the type of the key
     # is incorrect.
@@ -1311,13 +1316,13 @@ class Proto3Test(unittest.TestCase):
 
     msg.map_string_string[bytes_obj] = bytes_obj
 
-    (key, value) = msg.map_string_string.items()[0]
+    (key, value) = list(msg.map_string_string.items())[0]
 
     self.assertEqual(key, unicode_obj)
     self.assertEqual(value, unicode_obj)
 
-    self.assertTrue(isinstance(key, unicode))
-    self.assertTrue(isinstance(value, unicode))
+    self.assertTrue(isinstance(key, six.text_type))
+    self.assertTrue(isinstance(value, six.text_type))
 
   def testMessageMap(self):
     msg = map_unittest_pb2.TestMap()
@@ -1502,7 +1507,7 @@ class Proto3Test(unittest.TestCase):
   def testMapIteration(self):
     msg = map_unittest_pb2.TestMap()
 
-    for k, v in msg.map_int32_int32.iteritems():
+    for k, v in msg.map_int32_int32.items():
       # Should not be reached.
       self.assertTrue(False)
 
@@ -1512,7 +1517,7 @@ class Proto3Test(unittest.TestCase):
     self.assertEqual(3, len(msg.map_int32_int32))
 
     matching_dict = {2: 4, 3: 6, 4: 8}
-    self.assertMapIterEquals(msg.map_int32_int32.iteritems(), matching_dict)
+    self.assertMapIterEquals(msg.map_int32_int32.items(), matching_dict)
 
   def testMapIterationClearMessage(self):
     # Iterator needs to work even if message and map are deleted.
@@ -1522,7 +1527,7 @@ class Proto3Test(unittest.TestCase):
     msg.map_int32_int32[3] = 6
     msg.map_int32_int32[4] = 8
 
-    it = msg.map_int32_int32.iteritems()
+    it = msg.map_int32_int32.items()
     del msg
 
     matching_dict = {2: 4, 3: 6, 4: 8}
@@ -1550,7 +1555,7 @@ class Proto3Test(unittest.TestCase):
 
     msg.ClearField('map_int32_int32')
     matching_dict = {2: 4, 3: 6, 4: 8}
-    self.assertMapIterEquals(map.iteritems(), matching_dict)
+    self.assertMapIterEquals(map.items(), matching_dict)
 
   def testMapIterValidAfterFieldCleared(self):
     # Map iterator needs to work even if field is cleared.
@@ -1562,7 +1567,7 @@ class Proto3Test(unittest.TestCase):
     msg.map_int32_int32[3] = 6
     msg.map_int32_int32[4] = 8
 
-    it = msg.map_int32_int32.iteritems()
+    it = msg.map_int32_int32.items()
 
     msg.ClearField('map_int32_int32')
     matching_dict = {2: 4, 3: 6, 4: 8}
diff --git a/python/google/protobuf/internal/text_format_test.py b/python/google/protobuf/internal/text_format_test.py
index 55b32249..49e6332c 100755
--- a/python/google/protobuf/internal/text_format_test.py
+++ b/python/google/protobuf/internal/text_format_test.py
@@ -101,7 +101,7 @@ class TextFormatTest(TextFormatBase):
         'repeated_string: "\\303\\274\\352\\234\\237"\n')
 
   def testPrintExoticUnicodeSubclass(self, message_module):
-    class UnicodeSub(unicode):
+    class UnicodeSub(six.text_type):
       pass
     message = message_module.TestAllTypes()
     message.repeated_string.append(UnicodeSub(u'\u00fc\ua71f'))
diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py
index d4c4610f..5e4d10b1 100755
--- a/python/google/protobuf/text_format.py
+++ b/python/google/protobuf/text_format.py
@@ -92,7 +92,10 @@ def MessageToString(message, as_utf8=False, as_one_line=False,
   Returns:
     A string of the text formatted protocol buffer message.
   """
-  out = io.BytesIO()
+  if as_utf8:
+    out = io.BytesIO()
+  else:
+    out = io.BytesIO()
   PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
                pointy_brackets=pointy_brackets,
                use_index_order=use_index_order,
@@ -139,7 +142,6 @@ def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
                  use_index_order=use_index_order,
                  float_format=float_format)
 
-
 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
                pointy_brackets=False, use_index_order=False, float_format=None):
   """Print a single field name/value pair.  For repeated fields, the value
@@ -160,7 +162,11 @@ def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
     # For groups, use the capitalized name.
     out.write(field.message_type.name)
   else:
-    out.write(field.name)
+    if isinstance(field.name, six.text_type):
+      name = field.name.encode('utf-8')
+    else:
+      name = field.name
+    out.write(name)
 
   if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
     # The colon is optional in this case, but our cross-language golden files
-- 
cgit v1.2.3


From 7601551f7c967da66f0ae1d20fcdd23d77c46b95 Mon Sep 17 00:00:00 2001
From: Dan O'Reilly <oreilldf@gmail.com>
Date: Fri, 14 Aug 2015 23:22:47 -0400
Subject: Just always uses BytseIO in text_format for now

Signed-off-by: Dan O'Reilly <oreilldf@gmail.com>
---
 python/google/protobuf/text_format.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'python/google/protobuf/text_format.py')

diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py
index 5e4d10b1..950bec16 100755
--- a/python/google/protobuf/text_format.py
+++ b/python/google/protobuf/text_format.py
@@ -92,10 +92,7 @@ def MessageToString(message, as_utf8=False, as_one_line=False,
   Returns:
     A string of the text formatted protocol buffer message.
   """
-  if as_utf8:
-    out = io.BytesIO()
-  else:
-    out = io.BytesIO()
+  out = io.BytesIO()
   PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
                pointy_brackets=pointy_brackets,
                use_index_order=use_index_order,
-- 
cgit v1.2.3