From 1a7a7fca804afa1cf67f8be5e71092898ba40334 Mon Sep 17 00:00:00 2001 From: Jisi Liu Date: Wed, 18 Oct 2017 12:22:18 -0700 Subject: Merge from google internal --- python/google/protobuf/internal/any_test.proto | 1 + .../google/protobuf/internal/api_implementation.py | 2 +- .../google/protobuf/internal/json_format_test.py | 30 ++++++++----- .../protobuf/internal/message_factory_test.py | 13 +++++- python/google/protobuf/internal/message_test.py | 14 +++++- .../google/protobuf/internal/text_format_test.py | 52 ++++++++++++++++------ .../protobuf/internal/unknown_fields_test.py | 1 - .../google/protobuf/internal/well_known_types.py | 34 +++++++++++++- .../protobuf/internal/well_known_types_test.py | 43 ++++++++++++++++++ python/google/protobuf/json_format.py | 5 +++ python/google/protobuf/message_factory.py | 13 +++++- python/google/protobuf/pyext/extension_dict.cc | 5 ++- python/google/protobuf/pyext/message.cc | 5 +++ python/google/protobuf/text_format.py | 15 ++++++- 14 files changed, 195 insertions(+), 38 deletions(-) (limited to 'python') diff --git a/python/google/protobuf/internal/any_test.proto b/python/google/protobuf/internal/any_test.proto index 76a7ebd6..1a563fd9 100644 --- a/python/google/protobuf/internal/any_test.proto +++ b/python/google/protobuf/internal/any_test.proto @@ -39,6 +39,7 @@ import "google/protobuf/any.proto"; message TestAny { optional google.protobuf.Any value = 1; optional int32 int_value = 2; + map map_value = 3; extensions 10 to max; } diff --git a/python/google/protobuf/internal/api_implementation.py b/python/google/protobuf/internal/api_implementation.py index bce71bb8..553fcdb6 100755 --- a/python/google/protobuf/internal/api_implementation.py +++ b/python/google/protobuf/internal/api_implementation.py @@ -159,7 +159,7 @@ if _implementation_type == 'cpp': # Unrecognized cpp implementation. Skipping the unknown fields APIs. pass else: - _python_proto3_preserve_unknowns_default = False + _python_proto3_preserve_unknowns_default = True def GetPythonProto3PreserveUnknownsDefault(): return _python_proto3_preserve_unknowns_default diff --git a/python/google/protobuf/internal/json_format_test.py b/python/google/protobuf/internal/json_format_test.py index b2cf7622..19182b7f 100644 --- a/python/google/protobuf/internal/json_format_test.py +++ b/python/google/protobuf/internal/json_format_test.py @@ -50,6 +50,7 @@ from google.protobuf import struct_pb2 from google.protobuf import timestamp_pb2 from google.protobuf import wrappers_pb2 from google.protobuf import unittest_mset_pb2 +from google.protobuf import unittest_pb2 from google.protobuf.internal import well_known_types from google.protobuf import json_format from google.protobuf.util import json_format_proto3_pb2 @@ -159,15 +160,15 @@ class JsonFormatTest(JsonFormatBase): json_format.Parse(text, parsed_message) self.assertEqual(message, parsed_message) - def testUnknownEnumToJsonError(self): + def testUnknownEnumToJsonAndBack(self): + text = '{\n "enumValue": 999\n}' message = json_format_proto3_pb2.TestMessage() message.enum_value = 999 - # TODO(jieluo): should accept numeric unknown enum for proto3. - with self.assertRaises(json_format.SerializeToJsonError) as e: - json_format.MessageToJson(message) - self.assertEqual(str(e.exception), - 'Enum field contains an integer value which can ' - 'not mapped to an enum value.') + self.assertEqual(json_format.MessageToJson(message), + text) + parsed_message = json_format_proto3_pb2.TestMessage() + json_format.Parse(text, parsed_message) + self.assertEqual(message, parsed_message) def testExtensionToJsonAndBack(self): message = unittest_mset_pb2.TestMessageSetContainer() @@ -757,11 +758,16 @@ class JsonFormatTest(JsonFormatBase): '{"enumValue": "baz"}', 'Failed to parse enumValue field: Invalid enum value baz ' 'for enum type proto3.EnumType.') - # TODO(jieluo): fix json format to accept numeric unknown enum for proto3. - self.CheckError( - '{"enumValue": 12345}', - 'Failed to parse enumValue field: Invalid enum value 12345 ' - 'for enum type proto3.EnumType.') + # Proto3 accepts numeric unknown enums. + text = '{"enumValue": 12345}' + json_format.Parse(text, message) + # Proto2 does not accept unknown enums. + message = unittest_pb2.TestAllTypes() + self.assertRaisesRegexp( + json_format.ParseError, + 'Failed to parse optionalNestedEnum field: Invalid enum value 12345 ' + 'for enum type protobuf_unittest.TestAllTypes.NestedEnum.', + json_format.Parse, '{"optionalNestedEnum": 12345}', message) def testParseBadIdentifer(self): self.CheckError('{int32Value: 1}', diff --git a/python/google/protobuf/internal/message_factory_test.py b/python/google/protobuf/internal/message_factory_test.py index a1b6bb81..6df52ed2 100644 --- a/python/google/protobuf/internal/message_factory_test.py +++ b/python/google/protobuf/internal/message_factory_test.py @@ -107,8 +107,17 @@ class MessageFactoryTest(unittest.TestCase): def testGetMessages(self): # performed twice because multiple calls with the same input must be allowed for _ in range(2): - messages = message_factory.GetMessages([self.factory_test1_fd, - self.factory_test2_fd]) + # GetMessage should work regardless of the order the FileDescriptorProto + # are provided. In particular, the function should succeed when the files + # are not in the topological order of dependencies. + + # Assuming factory_test2_fd depends on factory_test1_fd. + self.assertIn(self.factory_test1_fd.name, + self.factory_test2_fd.dependency) + # Get messages should work when a file comes before its dependencies: + # factory_test2_fd comes before factory_test1_fd. + messages = message_factory.GetMessages([self.factory_test2_fd, + self.factory_test1_fd]) self.assertTrue( set(['google.protobuf.python.internal.Factory2Message', 'google.protobuf.python.internal.Factory1Message'], diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py index 4622f10f..a303b1aa 100755 --- a/python/google/protobuf/internal/message_test.py +++ b/python/google/protobuf/internal/message_test.py @@ -51,6 +51,7 @@ import operator import pickle import six import sys +import warnings try: import unittest2 as unittest # PY26 @@ -146,13 +147,22 @@ class MessageTest(BaseTestCase): msg = message_module.TestAllTypes() self.assertRaises(TypeError, msg.FromString, 0) self.assertRaises(Exception, msg.FromString, '0') - # TODO(jieluo): Fix cpp extension to check unexpected end-group tag. + # TODO(jieluo): Fix cpp extension to raise error instead of warning. # b/27494216 + end_tag = encoder.TagBytes(1, 4) if api_implementation.Type() == 'python': - end_tag = encoder.TagBytes(1, 4) with self.assertRaises(message.DecodeError) as context: msg.FromString(end_tag) self.assertEqual('Unexpected end-group tag.', str(context.exception)) + else: + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter('always') + msg.FromString(end_tag) + assert len(w) == 1 + assert issubclass(w[-1].category, RuntimeWarning) + self.assertEqual('Unexpected end-group tag: Not all data was converted', + str(w[-1].message)) def testDeterminismParameters(self, message_module): # This message is always deterministically serialized, even if determinism diff --git a/python/google/protobuf/internal/text_format_test.py b/python/google/protobuf/internal/text_format_test.py index 1214c3ea..a52f133f 100755 --- a/python/google/protobuf/internal/text_format_test.py +++ b/python/google/protobuf/internal/text_format_test.py @@ -1,4 +1,5 @@ #! /usr/bin/env python +# -*- coding: utf-8 -*- # # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. @@ -298,6 +299,33 @@ class TextFormatTest(TextFormatBase): if message_module is unittest_pb2: test_util.ExpectAllFieldsSet(self, message) + def testParseAndMergeUtf8(self, message_module): + message = message_module.TestAllTypes() + test_util.SetAllFields(message) + ascii_text = text_format.MessageToString(message) + ascii_text = ascii_text.encode('utf-8') + + parsed_message = message_module.TestAllTypes() + text_format.Parse(ascii_text, parsed_message) + self.assertEqual(message, parsed_message) + if message_module is unittest_pb2: + test_util.ExpectAllFieldsSet(self, message) + + parsed_message.Clear() + text_format.Merge(ascii_text, parsed_message) + self.assertEqual(message, parsed_message) + if message_module is unittest_pb2: + test_util.ExpectAllFieldsSet(self, message) + + if six.PY2: + msg2 = message_module.TestAllTypes() + text = (u'optional_string: "café"') + text_format.Merge(text, msg2) + self.assertEqual(msg2.optional_string, u'café') + msg2.Clear() + text_format.Parse(text, msg2) + self.assertEqual(msg2.optional_string, u'café') + def testParseExotic(self, message_module): message = message_module.TestAllTypes() text = ('repeated_int64: -9223372036854775808\n' @@ -399,13 +427,6 @@ class TextFormatTest(TextFormatBase): r'has no value named BARR.'), text_format.Parse, text, message) - message = message_module.TestAllTypes() - text = 'optional_nested_enum: 100' - six.assertRaisesRegex(self, text_format.ParseError, - (r'1:23 : Enum type "\w+.TestAllTypes.NestedEnum" ' - r'has no value with number 100.'), text_format.Parse, - text, message) - def testParseBadIntValue(self, message_module): message = message_module.TestAllTypes() text = 'optional_int32: bork' @@ -920,6 +941,14 @@ class Proto2Tests(TextFormatBase): '1:2 : Message type "protobuf_unittest.TestAllTypes" does not have ' 'extensions.'), text_format.Parse, text, message) + def testParseNumericUnknownEnum(self): + message = unittest_pb2.TestAllTypes() + text = 'optional_nested_enum: 100' + six.assertRaisesRegex(self, text_format.ParseError, + (r'1:23 : Enum type "\w+.TestAllTypes.NestedEnum" ' + r'has no value with number 100.'), text_format.Parse, + text, message) + def testMergeDuplicateExtensionScalars(self): message = unittest_pb2.TestAllExtensions() text = ('[protobuf_unittest.optional_int32_extension]: 42 ' @@ -1113,13 +1142,8 @@ class Proto3Tests(unittest.TestCase): message2 = unittest_proto3_arena_pb2.TestAllTypes() message.optional_nested_enum = 999 text_string = text_format.MessageToString(message) - # TODO(jieluo): proto3 should support numeric unknown enum. - with self.assertRaises(text_format.ParseError) as e: - text_format.Parse(text_string, message2) - self.assertEqual(999, message2.optional_nested_enum) - self.assertEqual(str(e.exception), - '1:23 : Enum type "proto3_arena_unittest.TestAllTypes.' - 'NestedEnum" has no value with number 999.') + text_format.Parse(text_string, message2) + self.assertEqual(999, message2.optional_nested_enum) def testMergeExpandedAny(self): message = any_test_pb2.TestAny() diff --git a/python/google/protobuf/internal/unknown_fields_test.py b/python/google/protobuf/internal/unknown_fields_test.py index 9bdb6f27..8b7de2e7 100755 --- a/python/google/protobuf/internal/unknown_fields_test.py +++ b/python/google/protobuf/internal/unknown_fields_test.py @@ -92,7 +92,6 @@ class UnknownFieldsTest(BaseTestCase): # Verify that proto3 unknown fields behavior. default_preserve = (api_implementation .GetPythonProto3PreserveUnknownsDefault()) - self.assertEqual(False, default_preserve) self.expectSerializeProto3(default_preserve) api_implementation.SetPythonProto3PreserveUnknownsDefault( not default_preserve) diff --git a/python/google/protobuf/internal/well_known_types.py b/python/google/protobuf/internal/well_known_types.py index 57b96998..3573770b 100644 --- a/python/google/protobuf/internal/well_known_types.py +++ b/python/google/protobuf/internal/well_known_types.py @@ -40,6 +40,7 @@ This files defines well known classes which need extra maintenance including: __author__ = 'jieluo@google.com (Jie Luo)' +import collections from datetime import datetime from datetime import timedelta import six @@ -67,13 +68,14 @@ class ParseError(Error): class Any(object): """Class for Any Message type.""" - def Pack(self, msg, type_url_prefix='type.googleapis.com/'): + def Pack(self, msg, type_url_prefix='type.googleapis.com/', + deterministic=None): """Packs the specified message into current Any message.""" if len(type_url_prefix) < 1 or type_url_prefix[-1] != '/': self.type_url = '%s/%s' % (type_url_prefix, msg.DESCRIPTOR.full_name) else: self.type_url = '%s%s' % (type_url_prefix, msg.DESCRIPTOR.full_name) - self.value = msg.SerializeToString() + self.value = msg.SerializeToString(deterministic=deterministic) def Unpack(self, msg): """Unpacks the current Any message into specified message.""" @@ -734,9 +736,30 @@ class Struct(object): def __getitem__(self, key): return _GetStructValue(self.fields[key]) + def __contains__(self, item): + return item in self.fields + def __setitem__(self, key, value): _SetStructValue(self.fields[key], value) + def __delitem__(self, key): + del self.fields[key] + + def __len__(self): + return len(self.fields) + + def __iter__(self): + return iter(self.fields) + + def keys(self): # pylint: disable=invalid-name + return self.fields.keys() + + def values(self): # pylint: disable=invalid-name + return [self[key] for key in self] + + def items(self): # pylint: disable=invalid-name + return [(key, self[key]) for key in self] + def get_or_create_list(self, key): """Returns a list for this key, creating if it didn't exist already.""" if not self.fields[key].HasField('list_value'): @@ -755,6 +778,8 @@ class Struct(object): for key, value in dictionary.items(): _SetStructValue(self.fields[key], value) +collections.MutableMapping.register(Struct) + class ListValue(object): """Class for ListValue message type.""" @@ -776,6 +801,9 @@ class ListValue(object): def __setitem__(self, index, value): _SetStructValue(self.values.__getitem__(index), value) + def __delitem__(self, key): + del self.values[key] + def items(self): for i in range(len(self)): yield self[i] @@ -794,6 +822,8 @@ class ListValue(object): list_value.Clear() return list_value +collections.MutableSequence.register(ListValue) + WKTBASES = { 'google.protobuf.Any': Any, diff --git a/python/google/protobuf/internal/well_known_types_test.py b/python/google/protobuf/internal/well_known_types_test.py index 70975da1..291fe4e8 100644 --- a/python/google/protobuf/internal/well_known_types_test.py +++ b/python/google/protobuf/internal/well_known_types_test.py @@ -34,6 +34,7 @@ __author__ = 'jieluo@google.com (Jie Luo)' +import collections from datetime import datetime try: @@ -667,6 +668,8 @@ class StructTest(unittest.TestCase): def testStruct(self): struct = struct_pb2.Struct() + self.assertIsInstance(struct, collections.Mapping) + self.assertEqual(0, len(struct)) struct_class = struct.__class__ struct['key1'] = 5 @@ -674,11 +677,13 @@ class StructTest(unittest.TestCase): struct['key3'] = True struct.get_or_create_struct('key4')['subkey'] = 11.0 struct_list = struct.get_or_create_list('key5') + self.assertIsInstance(struct_list, collections.Sequence) struct_list.extend([6, 'seven', True, False, None]) struct_list.add_struct()['subkey2'] = 9 struct['key6'] = {'subkey': {}} struct['key7'] = [2, False] + self.assertEqual(7, len(struct)) self.assertTrue(isinstance(struct, well_known_types.Struct)) self.assertEqual(5, struct['key1']) self.assertEqual('abc', struct['key2']) @@ -696,6 +701,20 @@ class StructTest(unittest.TestCase): struct2.ParseFromString(serialized) self.assertEqual(struct, struct2) + for key, value in struct.items(): + self.assertIn(key, struct) + self.assertIn(key, struct2) + self.assertEqual(value, struct2[key]) + + self.assertEqual(7, len(struct.keys())) + self.assertEqual(7, len(struct.values())) + for key in struct.keys(): + self.assertIn(key, struct) + self.assertIn(key, struct2) + self.assertEqual(struct[key], struct2[key]) + + item = (next(iter(struct.keys())), next(iter(struct.values()))) + self.assertEqual(item, next(iter(struct.items()))) self.assertTrue(isinstance(struct2, well_known_types.Struct)) self.assertEqual(5, struct2['key1']) @@ -756,6 +775,16 @@ class StructTest(unittest.TestCase): empty_struct = list2[1] self.assertEqual({}, dict(empty_struct.fields)) + self.assertEqual(9, len(struct)) + del struct['key3'] + del struct['key4'] + self.assertEqual(7, len(struct)) + self.assertEqual(6, len(struct['key5'])) + del struct['key5'][1] + self.assertEqual(5, len(struct['key5'])) + self.assertEqual([6, True, False, None, inner_struct], + list(struct['key5'].items())) + def testMergeFrom(self): struct = struct_pb2.Struct() struct_class = struct.__class__ @@ -863,6 +892,20 @@ class AnyTest(unittest.TestCase): self.assertTrue(msg.Unpack(unpacked_message)) self.assertEqual(submessage, unpacked_message) + def testPackDeterministic(self): + submessage = any_test_pb2.TestAny() + for i in range(10): + submessage.map_value[str(i)] = i * 2 + msg = any_pb2.Any() + msg.Pack(submessage, deterministic=True) + serialized = msg.SerializeToString(deterministic=True) + golden = (b'\n4type.googleapis.com/google.protobuf.internal.TestAny\x12F' + b'\x1a\x05\n\x010\x10\x00\x1a\x05\n\x011\x10\x02\x1a\x05\n\x01' + b'2\x10\x04\x1a\x05\n\x013\x10\x06\x1a\x05\n\x014\x10\x08\x1a' + b'\x05\n\x015\x10\n\x1a\x05\n\x016\x10\x0c\x1a\x05\n\x017\x10' + b'\x0e\x1a\x05\n\x018\x10\x10\x1a\x05\n\x019\x10\x12') + self.assertEqual(golden, serialized) + if __name__ == '__main__': unittest.main() diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py index 801eed60..878291db 100644 --- a/python/google/protobuf/json_format.py +++ b/python/google/protobuf/json_format.py @@ -251,6 +251,8 @@ class _Printer(object): if enum_value is not None: return enum_value.name else: + if field.file.syntax == 'proto3': + return value raise SerializeToJsonError('Enum field contains an integer value ' 'which can not mapped to an enum value.') elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: @@ -675,6 +677,9 @@ def _ConvertScalarFieldValue(value, field, require_str=False): raise ParseError('Invalid enum value {0} for enum type {1}.'.format( value, field.enum_type.full_name)) if enum_value is None: + if field.file.syntax == 'proto3': + # Proto3 accepts unknown enums. + return number raise ParseError('Invalid enum value {0} for enum type {1}.'.format( value, field.enum_type.full_name)) return enum_value.number diff --git a/python/google/protobuf/message_factory.py b/python/google/protobuf/message_factory.py index 15740280..e4fb065e 100644 --- a/python/google/protobuf/message_factory.py +++ b/python/google/protobuf/message_factory.py @@ -130,13 +130,22 @@ def GetMessages(file_protos): """Builds a dictionary of all the messages available in a set of files. Args: - file_protos: A sequence of file protos to build messages out of. + file_protos: Iterable of FileDescriptorProto to build messages out of. Returns: A dictionary mapping proto names to the message classes. This will include any dependent messages as well as any messages defined in the same file as a specified message. """ - for file_proto in file_protos: + # The cpp implementation of the protocol buffer library requires to add the + # message in topological order of the dependency graph. + file_by_name = {file_proto.name: file_proto for file_proto in file_protos} + def _AddFile(file_proto): + for dependency in file_proto.dependency: + if dependency in file_by_name: + # Remove from elements to be visited, in order to cut cycles. + _AddFile(file_by_name.pop(dependency)) _FACTORY.pool.Add(file_proto) + while file_by_name: + _AddFile(file_by_name.popitem()[1]) return _FACTORY.GetMessages([file_proto.name for file_proto in file_protos]) diff --git a/python/google/protobuf/pyext/extension_dict.cc b/python/google/protobuf/pyext/extension_dict.cc index 43ee5d15..6830b10d 100644 --- a/python/google/protobuf/pyext/extension_dict.cc +++ b/python/google/protobuf/pyext/extension_dict.cc @@ -32,6 +32,10 @@ // Author: tibell@google.com (Johan Tibell) #include +#include +#ifndef _SHARED_PTR_H +#include +#endif #include #include @@ -45,7 +49,6 @@ #include #include #include -#include #if PY_MAJOR_VERSION >= 3 #if PY_VERSION_HEX < 0x03030000 diff --git a/python/google/protobuf/pyext/message.cc b/python/google/protobuf/pyext/message.cc index 0f54506b..ef75acab 100644 --- a/python/google/protobuf/pyext/message.cc +++ b/python/google/protobuf/pyext/message.cc @@ -2065,6 +2065,11 @@ static PyObject* MergeFromString(CMessage* self, PyObject* arg) { input.SetExtensionRegistry(factory->pool->pool, factory->message_factory); bool success = self->message->MergePartialFromCodedStream(&input); if (success) { + if (!input.ConsumedEntireMessage()) { + // TODO(jieluo): Raise error and return NULL instead. + // b/27494216 + PyErr_Warn(NULL, "Unexpected end-group tag: Not all data was converted"); + } return PyInt_FromLong(input.CurrentPosition()); } else { PyErr_Format(DecodeError_class, "Error parsing message"); diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index aaca78ad..6b12632e 100755 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -485,7 +485,10 @@ def Parse(text, ParseError: On text parsing problems. """ if not isinstance(text, str): - text = text.decode('utf-8') + if six.PY3: + text = text.decode('utf-8') + else: + text = text.encode('utf-8') return ParseLines(text.split('\n'), message, allow_unknown_extension, @@ -517,6 +520,11 @@ def Merge(text, Raises: ParseError: On text parsing problems. """ + if not isinstance(text, str): + if six.PY3: + text = text.decode('utf-8') + else: + text = text.encode('utf-8') return MergeLines( text.split('\n'), message, @@ -1559,6 +1567,11 @@ def ParseEnum(field, value): (enum_descriptor.full_name, value)) else: # Numeric value. + if hasattr(field.file, 'syntax'): + # Attribute is checked for compatibility. + if field.file.syntax == 'proto3': + # Proto3 accept numeric unknown enums. + return number enum_value = enum_descriptor.values_by_number.get(number, None) if enum_value is None: raise ValueError('Enum type "%s" has no value with number %d.' % -- cgit v1.2.3