diff options
author | liujisi@google.com <liujisi@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2010-11-02 13:14:58 +0000 |
---|---|---|
committer | liujisi@google.com <liujisi@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2010-11-02 13:14:58 +0000 |
commit | 33165fe0d5c265c92f2a67fc2b437b567c24e294 (patch) | |
tree | 52def0850ddd2e976da238d1a437fbda79c96e44 /python/google/protobuf/internal | |
parent | 80aa23df6c63750e8cdfdcf3996fbc37d63cac61 (diff) |
Submit recent changes from internal branch. See CHANGES.txt for more details.
Diffstat (limited to 'python/google/protobuf/internal')
-rwxr-xr-x | python/google/protobuf/internal/api_implementation.py | 64 | ||||
-rwxr-xr-x | python/google/protobuf/internal/containers.py | 27 | ||||
-rwxr-xr-x | python/google/protobuf/internal/cpp_message.py | 616 | ||||
-rwxr-xr-x | python/google/protobuf/internal/decoder.py | 77 | ||||
-rwxr-xr-x | python/google/protobuf/internal/encoder.py | 87 | ||||
-rwxr-xr-x | python/google/protobuf/internal/generator_test.py | 31 | ||||
-rwxr-xr-x | python/google/protobuf/internal/message_test.py | 254 | ||||
-rwxr-xr-x | python/google/protobuf/internal/python_message.py | 1098 | ||||
-rwxr-xr-x | python/google/protobuf/internal/reflection_test.py | 226 | ||||
-rwxr-xr-x | python/google/protobuf/internal/text_format_test.py | 158 |
10 files changed, 2571 insertions, 67 deletions
diff --git a/python/google/protobuf/internal/api_implementation.py b/python/google/protobuf/internal/api_implementation.py new file mode 100755 index 00000000..b3e412e2 --- /dev/null +++ b/python/google/protobuf/internal/api_implementation.py @@ -0,0 +1,64 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# http://code.google.com/p/protobuf/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This module is the central entity that determines which implementation of the +API is used. +""" + +__author__ = 'petar@google.com (Petar Petrov)' + +import os +# This environment variable can be used to switch to a certain implementation +# of the Python API. Right now only 'python' and 'cpp' are valid values. Any +# other value will be ignored. +_implementation_type = os.getenv('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', + 'python') + + +if _implementation_type != 'python': + # For now, by default use the pure-Python implementation. + # The code below checks if the C extension is available and + # uses it if it is available. + _implementation_type = 'cpp' + ## Determine automatically which implementation to use. + #try: + # from google.protobuf.internal import cpp_message + # _implementation_type = 'cpp' + #except ImportError, e: + # _implementation_type = 'python' + + +# Usage of this function is discouraged. Clients shouldn't care which +# implementation of the API is in use. Note that there is no guarantee +# that differences between APIs will be maintained. +# Please don't use this function if possible. +def Type(): + return _implementation_type diff --git a/python/google/protobuf/internal/containers.py b/python/google/protobuf/internal/containers.py index 5cc7d6d0..097a3c26 100755 --- a/python/google/protobuf/internal/containers.py +++ b/python/google/protobuf/internal/containers.py @@ -72,9 +72,15 @@ class BaseContainer(object): # The concrete classes should define __eq__. return not self == other + def __hash__(self): + raise TypeError('unhashable object') + def __repr__(self): return repr(self._values) + def sort(self, sort_function=cmp): + self._values.sort(sort_function) + class RepeatedScalarFieldContainer(BaseContainer): @@ -198,28 +204,37 @@ class RepeatedCompositeFieldContainer(BaseContainer): super(RepeatedCompositeFieldContainer, self).__init__(message_listener) self._message_descriptor = message_descriptor - def add(self): - new_element = self._message_descriptor._concrete_class() + def add(self, **kwargs): + """Adds a new element at the end of the list and returns it. Keyword + arguments may be used to initialize the element. + """ + new_element = self._message_descriptor._concrete_class(**kwargs) new_element._SetListener(self._message_listener) self._values.append(new_element) if not self._message_listener.dirty: self._message_listener.Modified() return new_element - def MergeFrom(self, other): - """Appends the contents of another repeated field of the same type to this - one, copying each individual message. + def extend(self, elem_seq): + """Extends by appending the given sequence of elements of the same type + as this one, copying each individual message. """ message_class = self._message_descriptor._concrete_class listener = self._message_listener values = self._values - for message in other._values: + for message in elem_seq: new_element = message_class() new_element._SetListener(listener) new_element.MergeFrom(message) values.append(new_element) listener.Modified() + def MergeFrom(self, other): + """Appends the contents of another repeated field of the same type to this + one, copying each individual message. + """ + self.extend(other._values) + def __getslice__(self, start, stop): """Retrieves the subset of items from between the specified indices.""" return self._values[start:stop] diff --git a/python/google/protobuf/internal/cpp_message.py b/python/google/protobuf/internal/cpp_message.py new file mode 100755 index 00000000..3f426502 --- /dev/null +++ b/python/google/protobuf/internal/cpp_message.py @@ -0,0 +1,616 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# http://code.google.com/p/protobuf/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Contains helper functions used to create protocol message classes from +Descriptor objects at runtime backed by the protocol buffer C++ API. +""" + +__author__ = 'petar@google.com (Petar Petrov)' + +import operator +from google.protobuf.internal import _net_proto2___python +from google.protobuf import message + + +_LABEL_REPEATED = _net_proto2___python.LABEL_REPEATED +_LABEL_OPTIONAL = _net_proto2___python.LABEL_OPTIONAL +_CPPTYPE_MESSAGE = _net_proto2___python.CPPTYPE_MESSAGE +_TYPE_MESSAGE = _net_proto2___python.TYPE_MESSAGE + + +def GetDescriptorPool(): + """Creates a new DescriptorPool C++ object.""" + return _net_proto2___python.NewCDescriptorPool() + + +_pool = GetDescriptorPool() + + +def GetFieldDescriptor(full_field_name): + """Searches for a field descriptor given a full field name.""" + return _pool.FindFieldByName(full_field_name) + + +def BuildFile(content): + """Registers a new proto file in the underlying C++ descriptor pool.""" + _net_proto2___python.BuildFile(content) + + +def GetExtensionDescriptor(full_extension_name): + """Searches for extension descriptor given a full field name.""" + return _pool.FindExtensionByName(full_extension_name) + + +def NewCMessage(full_message_name): + """Creates a new C++ protocol message by its name.""" + return _net_proto2___python.NewCMessage(full_message_name) + + +def ScalarProperty(cdescriptor): + """Returns a scalar property for the given descriptor.""" + + def Getter(self): + return self._cmsg.GetScalar(cdescriptor) + + def Setter(self, value): + self._cmsg.SetScalar(cdescriptor, value) + + return property(Getter, Setter) + + +def CompositeProperty(cdescriptor, message_type): + """Returns a Python property the given composite field.""" + + def Getter(self): + sub_message = self._composite_fields.get(cdescriptor.name, None) + if sub_message is None: + cmessage = self._cmsg.NewSubMessage(cdescriptor) + sub_message = message_type._concrete_class(__cmessage=cmessage) + self._composite_fields[cdescriptor.name] = sub_message + return sub_message + + return property(Getter) + + +class RepeatedScalarContainer(object): + """Container for repeated scalar fields.""" + + __slots__ = ['_message', '_cfield_descriptor', '_cmsg'] + + def __init__(self, msg, cfield_descriptor): + self._message = msg + self._cmsg = msg._cmsg + self._cfield_descriptor = cfield_descriptor + + def append(self, value): + self._cmsg.AddRepeatedScalar( + self._cfield_descriptor, value) + + def extend(self, sequence): + for element in sequence: + self.append(element) + + def insert(self, key, value): + values = self[slice(None, None, None)] + values.insert(key, value) + self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values) + + def remove(self, value): + values = self[slice(None, None, None)] + values.remove(value) + self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values) + + def __setitem__(self, key, value): + values = self[slice(None, None, None)] + values[key] = value + self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values) + + def __getitem__(self, key): + return self._cmsg.GetRepeatedScalar(self._cfield_descriptor, key) + + def __delitem__(self, key): + self._cmsg.DeleteRepeatedField(self._cfield_descriptor, key) + + def __len__(self): + return len(self[slice(None, None, None)]) + + def __eq__(self, other): + if self is other: + return True + if not operator.isSequenceType(other): + raise TypeError( + 'Can only compare repeated scalar fields against sequences.') + # We are presumably comparing against some other sequence type. + return other == self[slice(None, None, None)] + + def __ne__(self, other): + return not self == other + + def __hash__(self): + raise TypeError('unhashable object') + + def sort(self, sort_function=cmp): + values = self[slice(None, None, None)] + values.sort(sort_function) + self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values) + + +def RepeatedScalarProperty(cdescriptor): + """Returns a Python property the given repeated scalar field.""" + + def Getter(self): + container = self._composite_fields.get(cdescriptor.name, None) + if container is None: + container = RepeatedScalarContainer(self, cdescriptor) + self._composite_fields[cdescriptor.name] = container + return container + + def Setter(self, new_value): + raise AttributeError('Assignment not allowed to repeated field ' + '"%s" in protocol message object.' % cdescriptor.name) + + doc = 'Magic attribute generated for "%s" proto field.' % cdescriptor.name + return property(Getter, Setter, doc=doc) + + +class RepeatedCompositeContainer(object): + """Container for repeated composite fields.""" + + __slots__ = ['_message', '_subclass', '_cfield_descriptor', '_cmsg'] + + def __init__(self, msg, cfield_descriptor, subclass): + self._message = msg + self._cmsg = msg._cmsg + self._subclass = subclass + self._cfield_descriptor = cfield_descriptor + + def add(self, **kwargs): + cmessage = self._cmsg.AddMessage(self._cfield_descriptor) + return self._subclass(__cmessage=cmessage, __owner=self._message, **kwargs) + + def extend(self, elem_seq): + """Extends by appending the given sequence of elements of the same type + as this one, copying each individual message. + """ + for message in elem_seq: + self.add().MergeFrom(message) + + def MergeFrom(self, other): + for message in other[:]: + self.add().MergeFrom(message) + + def __getitem__(self, key): + cmessages = self._cmsg.GetRepeatedMessage( + self._cfield_descriptor, key) + subclass = self._subclass + if not isinstance(cmessages, list): + return subclass(__cmessage=cmessages, __owner=self._message) + + return [subclass(__cmessage=m, __owner=self._message) for m in cmessages] + + def __delitem__(self, key): + self._cmsg.DeleteRepeatedField( + self._cfield_descriptor, key) + + def __len__(self): + return self._cmsg.FieldLength(self._cfield_descriptor) + + def __eq__(self, other): + """Compares the current instance with another one.""" + if self is other: + return True + if not isinstance(other, self.__class__): + raise TypeError('Can only compare repeated composite fields against ' + 'other repeated composite fields.') + messages = self[slice(None, None, None)] + other_messages = other[slice(None, None, None)] + return messages == other_messages + + def __hash__(self): + raise TypeError('unhashable object') + + def sort(self, sort_function=cmp): + messages = [] + for index in range(len(self)): + # messages[i][0] is where the i-th element of the new array has to come + # from. + # messages[i][1] is where the i-th element of the old array has to go. + messages.append([index, 0, self[index]]) + messages.sort(lambda x,y: sort_function(x[2], y[2])) + + # Remember which position each elements has to move to. + for i in range(len(messages)): + messages[messages[i][0]][1] = i + + # Apply the transposition. + for i in range(len(messages)): + from_position = messages[i][0] + if i == from_position: + continue + self._cmsg.SwapRepeatedFieldElements( + self._cfield_descriptor, i, from_position) + messages[messages[i][1]][0] = from_position + + +def RepeatedCompositeProperty(cdescriptor, message_type): + """Returns a Python property for the given repeated composite field.""" + + def Getter(self): + container = self._composite_fields.get(cdescriptor.name, None) + if container is None: + container = RepeatedCompositeContainer( + self, cdescriptor, message_type._concrete_class) + self._composite_fields[cdescriptor.name] = container + return container + + def Setter(self, new_value): + raise AttributeError('Assignment not allowed to repeated field ' + '"%s" in protocol message object.' % cdescriptor.name) + + doc = 'Magic attribute generated for "%s" proto field.' % cdescriptor.name + return property(Getter, Setter, doc=doc) + + +class ExtensionDict(object): + """Extension dictionary added to each protocol message.""" + + def __init__(self, msg): + self._message = msg + self._cmsg = msg._cmsg + self._values = {} + + def __setitem__(self, extension, value): + from google.protobuf import descriptor + if not isinstance(extension, descriptor.FieldDescriptor): + raise KeyError('Bad extension %r.' % (extension,)) + cdescriptor = extension._cdescriptor + if (cdescriptor.label != _LABEL_OPTIONAL or + cdescriptor.cpp_type == _CPPTYPE_MESSAGE): + raise TypeError('Extension %r is repeated and/or a composite type.' % ( + extension.full_name,)) + self._cmsg.SetScalar(cdescriptor, value) + self._values[extension] = value + + def __getitem__(self, extension): + from google.protobuf import descriptor + if not isinstance(extension, descriptor.FieldDescriptor): + raise KeyError('Bad extension %r.' % (extension,)) + + cdescriptor = extension._cdescriptor + if (cdescriptor.label != _LABEL_REPEATED and + cdescriptor.cpp_type != _CPPTYPE_MESSAGE): + return self._cmsg.GetScalar(cdescriptor) + + ext = self._values.get(extension, None) + if ext is not None: + return ext + + ext = self._CreateNewHandle(extension) + self._values[extension] = ext + return ext + + def ClearExtension(self, extension): + from google.protobuf import descriptor + if not isinstance(extension, descriptor.FieldDescriptor): + raise KeyError('Bad extension %r.' % (extension,)) + self._cmsg.ClearFieldByDescriptor(extension._cdescriptor) + if extension in self._values: + del self._values[extension] + + def HasExtension(self, extension): + from google.protobuf import descriptor + if not isinstance(extension, descriptor.FieldDescriptor): + raise KeyError('Bad extension %r.' % (extension,)) + return self._cmsg.HasFieldByDescriptor(extension._cdescriptor) + + def _FindExtensionByName(self, name): + """Tries to find a known extension with the specified name. + + Args: + name: Extension full name. + + Returns: + Extension field descriptor. + """ + return self._message._extensions_by_name.get(name, None) + + def _CreateNewHandle(self, extension): + cdescriptor = extension._cdescriptor + if (cdescriptor.label != _LABEL_REPEATED and + cdescriptor.cpp_type == _CPPTYPE_MESSAGE): + cmessage = self._cmsg.NewSubMessage(cdescriptor) + return extension.message_type._concrete_class(__cmessage=cmessage) + + if cdescriptor.label == _LABEL_REPEATED: + if cdescriptor.cpp_type == _CPPTYPE_MESSAGE: + return RepeatedCompositeContainer( + self._message, cdescriptor, extension.message_type._concrete_class) + else: + return RepeatedScalarContainer(self._message, cdescriptor) + # This shouldn't happen! + assert False + return None + + +def NewMessage(message_descriptor, dictionary): + """Creates a new protocol message *class*.""" + _AddClassAttributesForNestedExtensions(message_descriptor, dictionary) + _AddEnumValues(message_descriptor, dictionary) + _AddDescriptors(message_descriptor, dictionary) + + +def InitMessage(message_descriptor, cls): + """Constructs a new message instance (called before instance's __init__).""" + cls._extensions_by_name = {} + _AddInitMethod(message_descriptor, cls) + _AddMessageMethods(message_descriptor, cls) + _AddPropertiesForExtensions(message_descriptor, cls) + + +def _AddDescriptors(message_descriptor, dictionary): + """Sets up a new protocol message class dictionary. + + Args: + message_descriptor: A Descriptor instance describing this message type. + dictionary: Class dictionary to which we'll add a '__slots__' entry. + """ + dictionary['__descriptors'] = {} + for field in message_descriptor.fields: + dictionary['__descriptors'][field.name] = GetFieldDescriptor( + field.full_name) + + dictionary['__slots__'] = list(dictionary['__descriptors'].iterkeys()) + [ + '_cmsg', '_owner', '_composite_fields', 'Extensions'] + + +def _AddEnumValues(message_descriptor, dictionary): + """Sets class-level attributes for all enum fields defined in this message. + + Args: + message_descriptor: Descriptor object for this message type. + dictionary: Class dictionary that should be populated. + """ + for enum_type in message_descriptor.enum_types: + for enum_value in enum_type.values: + dictionary[enum_value.name] = enum_value.number + + +def _AddClassAttributesForNestedExtensions(message_descriptor, dictionary): + """Adds class attributes for the nested extensions.""" + extension_dict = message_descriptor.extensions_by_name + for extension_name, extension_field in extension_dict.iteritems(): + assert extension_name not in dictionary + dictionary[extension_name] = extension_field + + +def _AddInitMethod(message_descriptor, cls): + """Adds an __init__ method to cls.""" + + # Create and attach message field properties to the message class. + # This can be done just once per message class, since property setters and + # getters are passed the message instance. + # This makes message instantiation extremely fast, and at the same time it + # doesn't require the creation of property objects for each message instance, + # which saves a lot of memory. + for field in message_descriptor.fields: + field_cdescriptor = cls.__descriptors[field.name] + if field.label == _LABEL_REPEATED: + if field.cpp_type == _CPPTYPE_MESSAGE: + value = RepeatedCompositeProperty(field_cdescriptor, field.message_type) + else: + value = RepeatedScalarProperty(field_cdescriptor) + elif field.cpp_type == _CPPTYPE_MESSAGE: + value = CompositeProperty(field_cdescriptor, field.message_type) + else: + value = ScalarProperty(field_cdescriptor) + setattr(cls, field.name, value) + + # Attach a constant with the field number. + constant_name = field.name.upper() + '_FIELD_NUMBER' + setattr(cls, constant_name, field.number) + + def Init(self, **kwargs): + """Message constructor.""" + cmessage = kwargs.pop('__cmessage', None) + if cmessage is None: + self._cmsg = NewCMessage(message_descriptor.full_name) + else: + self._cmsg = cmessage + + # Keep a reference to the owner, as the owner keeps a reference to the + # underlying protocol buffer message. + owner = kwargs.pop('__owner', None) + if owner is not None: + self._owner = owner + + self.Extensions = ExtensionDict(self) + self._composite_fields = {} + + for field_name, field_value in kwargs.iteritems(): + field_cdescriptor = self.__descriptors.get(field_name, None) + if field_cdescriptor is None: + raise ValueError('Protocol message has no "%s" field.' % field_name) + if field_cdescriptor.label == _LABEL_REPEATED: + if field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE: + for val in field_value: + getattr(self, field_name).add().MergeFrom(val) + else: + getattr(self, field_name).extend(field_value) + elif field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE: + getattr(self, field_name).MergeFrom(field_value) + else: + setattr(self, field_name, field_value) + + Init.__module__ = None + Init.__doc__ = None + cls.__init__ = Init + + +def _IsMessageSetExtension(field): + """Checks if a field is a message set extension.""" + return (field.is_extension and + field.containing_type.has_options and + field.containing_type.GetOptions().message_set_wire_format and + field.type == _TYPE_MESSAGE and + field.message_type == field.extension_scope and + field.label == _LABEL_OPTIONAL) + + +def _AddMessageMethods(message_descriptor, cls): + """Adds the methods to a protocol message class.""" + if message_descriptor.is_extendable: + + def ClearExtension(self, extension): + self.Extensions.ClearExtension(extension) + + def HasExtension(self, extension): + return self.Extensions.HasExtension(extension) + + def HasField(self, field_name): + return self._cmsg.HasField(field_name) + + def ClearField(self, field_name): + if field_name in self._composite_fields: + del self._composite_fields[field_name] + self._cmsg.ClearField(field_name) + + def Clear(self): + return self._cmsg.Clear() + + def IsInitialized(self, errors=None): + if self._cmsg.IsInitialized(): + return True + if errors is not None: + errors.extend(self.FindInitializationErrors()); + return False + + def SerializeToString(self): + if not self.IsInitialized(): + raise message.EncodeError( + 'Message is missing required fields: ' + + ','.join(self.FindInitializationErrors())) + return self._cmsg.SerializeToString() + + def SerializePartialToString(self): + return self._cmsg.SerializePartialToString() + + def ParseFromString(self, serialized): + self.Clear() + self.MergeFromString(serialized) + + def MergeFromString(self, serialized): + byte_size = self._cmsg.MergeFromString(serialized) + if byte_size < 0: + raise message.DecodeError('Unable to merge from string.') + return byte_size + + def MergeFrom(self, msg): + if not isinstance(msg, cls): + raise TypeError( + "Parameter to MergeFrom() must be instance of same class.") + self._cmsg.MergeFrom(msg._cmsg) + + def CopyFrom(self, msg): + self._cmsg.CopyFrom(msg._cmsg) + + def ByteSize(self): + return self._cmsg.ByteSize() + + def SetInParent(self): + return self._cmsg.SetInParent() + + def ListFields(self): + all_fields = [] + field_list = self._cmsg.ListFields() + fields_by_name = cls.DESCRIPTOR.fields_by_name + for is_extension, field_name in field_list: + if is_extension: + extension = cls._extensions_by_name[field_name] + all_fields.append((extension, self.Extensions[extension])) + else: + field_descriptor = fields_by_name[field_name] + all_fields.append( + (field_descriptor, getattr(self, field_name))) + all_fields.sort(key=lambda item: item[0].number) + return all_fields + + def FindInitializationErrors(self): + return self._cmsg.FindInitializationErrors() + + def __str__(self): + return self._cmsg.DebugString() + + def __eq__(self, other): + if self is other: + return True + if not isinstance(other, self.__class__): + return False + return self.ListFields() == other.ListFields() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + raise TypeError('unhashable object') + + def __unicode__(self): + return text_format.MessageToString(self, as_utf8=True).decode('utf-8') + + # Attach the local methods to the message class. + for key, value in locals().copy().iteritems(): + if key not in ('key', 'value', '__builtins__', '__name__', '__doc__'): + setattr(cls, key, value) + + # Static methods: + + def RegisterExtension(extension_handle): + extension_handle.containing_type = cls.DESCRIPTOR + cls._extensions_by_name[extension_handle.full_name] = extension_handle + + if _IsMessageSetExtension(extension_handle): + # MessageSet extension. Also register under type name. + cls._extensions_by_name[ + extension_handle.message_type.full_name] = extension_handle + cls.RegisterExtension = staticmethod(RegisterExtension) + + def FromString(string): + msg = cls() + msg.MergeFromString(string) + return msg + cls.FromString = staticmethod(FromString) + + + +def _AddPropertiesForExtensions(message_descriptor, cls): + """Adds properties for all fields in this protocol message type.""" + extension_dict = message_descriptor.extensions_by_name + for extension_name, extension_field in extension_dict.iteritems(): + constant_name = extension_name.upper() + '_FIELD_NUMBER' + setattr(cls, constant_name, extension_field.number) diff --git a/python/google/protobuf/internal/decoder.py b/python/google/protobuf/internal/decoder.py index 461a30c0..55f746f5 100755 --- a/python/google/protobuf/internal/decoder.py +++ b/python/google/protobuf/internal/decoder.py @@ -86,6 +86,13 @@ from google.protobuf.internal import wire_format from google.protobuf import message +# This will overflow and thus become IEEE-754 "infinity". We would use +# "float('inf')" but it doesn't work on Windows pre-Python-2.6. +_POS_INF = 1e10000 +_NEG_INF = -_POS_INF +_NAN = _POS_INF * 0 + + # This is not for optimization, but rather to avoid conflicts with local # variables named "message". _DecodeError = message.DecodeError @@ -269,6 +276,72 @@ def _StructPackDecoder(wire_type, format): return _SimpleDecoder(wire_type, InnerDecode) +def _FloatDecoder(): + """Returns a decoder for a float field. + + This code works around a bug in struct.unpack for non-finite 32-bit + floating-point values. + """ + + local_unpack = struct.unpack + + def InnerDecode(buffer, pos): + # We expect a 32-bit value in little-endian byte order. Bit 1 is the sign + # bit, bits 2-9 represent the exponent, and bits 10-32 are the significand. + new_pos = pos + 4 + float_bytes = buffer[pos:new_pos] + + # If this value has all its exponent bits set, then it's non-finite. + # In Python 2.4, struct.unpack will convert it to a finite 64-bit value. + # To avoid that, we parse it specially. + if ((float_bytes[3] in '\x7F\xFF') + and (float_bytes[2] >= '\x80')): + # If at least one significand bit is set... + if float_bytes[0:3] != '\x00\x00\x80': + return (_NAN, new_pos) + # If sign bit is set... + if float_bytes[3] == '\xFF': + return (_NEG_INF, new_pos) + return (_POS_INF, new_pos) + + # Note that we expect someone up-stack to catch struct.error and convert + # it to _DecodeError -- this way we don't have to set up exception- + # handling blocks every time we parse one value. + result = local_unpack('<f', float_bytes)[0] + return (result, new_pos) + return _SimpleDecoder(wire_format.WIRETYPE_FIXED32, InnerDecode) + + +def _DoubleDecoder(): + """Returns a decoder for a double field. + + This code works around a bug in struct.unpack for not-a-number. + """ + + local_unpack = struct.unpack + + def InnerDecode(buffer, pos): + # We expect a 64-bit value in little-endian byte order. Bit 1 is the sign + # bit, bits 2-12 represent the exponent, and bits 13-64 are the significand. + new_pos = pos + 8 + double_bytes = buffer[pos:new_pos] + + # If this value has all its exponent bits set and at least one significand + # bit set, it's not a number. In Python 2.4, struct.unpack will treat it + # as inf or -inf. To avoid that, we treat it specially. + if ((double_bytes[7] in '\x7F\xFF') + and (double_bytes[6] >= '\xF0') + and (double_bytes[0:7] != '\x00\x00\x00\x00\x00\x00\xF0')): + return (_NAN, new_pos) + + # Note that we expect someone up-stack to catch struct.error and convert + # it to _DecodeError -- this way we don't have to set up exception- + # handling blocks every time we parse one value. + result = local_unpack('<d', double_bytes)[0] + return (result, new_pos) + return _SimpleDecoder(wire_format.WIRETYPE_FIXED64, InnerDecode) + + # -------------------------------------------------------------------- @@ -294,8 +367,8 @@ Fixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<I') Fixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<Q') SFixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<i') SFixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<q') -FloatDecoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<f') -DoubleDecoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<d') +FloatDecoder = _FloatDecoder() +DoubleDecoder = _DoubleDecoder() BoolDecoder = _ModifiedDecoder( wire_format.WIRETYPE_VARINT, _DecodeVarint, bool) diff --git a/python/google/protobuf/internal/encoder.py b/python/google/protobuf/internal/encoder.py index aa05d5b3..777975e8 100755 --- a/python/google/protobuf/internal/encoder.py +++ b/python/google/protobuf/internal/encoder.py @@ -70,6 +70,12 @@ import struct from google.protobuf.internal import wire_format +# This will overflow and thus become IEEE-754 "infinity". We would use +# "float('inf')" but it doesn't work on Windows pre-Python-2.6. +_POS_INF = 1e10000 +_NEG_INF = -_POS_INF + + def _VarintSize(value): """Compute the size of a varint value.""" if value <= 0x7f: return 1 @@ -502,6 +508,83 @@ def _StructPackEncoder(wire_type, format): return SpecificEncoder +def _FloatingPointEncoder(wire_type, format): + """Return a constructor for an encoder for float fields. + + This is like StructPackEncoder, but catches errors that may be due to + passing non-finite floating-point values to struct.pack, and makes a + second attempt to encode those values. + + Args: + wire_type: The field's wire type, for encoding tags. + format: The format string to pass to struct.pack(). + """ + + value_size = struct.calcsize(format) + if value_size == 4: + def EncodeNonFiniteOrRaise(write, value): + # Remember that the serialized form uses little-endian byte order. + if value == _POS_INF: + write('\x00\x00\x80\x7F') + elif value == _NEG_INF: + write('\x00\x00\x80\xFF') + elif value != value: # NaN + write('\x00\x00\xC0\x7F') + else: + raise + elif value_size == 8: + def EncodeNonFiniteOrRaise(write, value): + if value == _POS_INF: + write('\x00\x00\x00\x00\x00\x00\xF0\x7F') + elif value == _NEG_INF: + write('\x00\x00\x00\x00\x00\x00\xF0\xFF') + elif value != value: # NaN + write('\x00\x00\x00\x00\x00\x00\xF8\x7F') + else: + raise + else: + raise ValueError('Can\'t encode floating-point values that are ' + '%d bytes long (only 4 or 8)' % value_size) + + def SpecificEncoder(field_number, is_repeated, is_packed): + local_struct_pack = struct.pack + if is_packed: + tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED) + local_EncodeVarint = _EncodeVarint + def EncodePackedField(write, value): + write(tag_bytes) + local_EncodeVarint(write, len(value) * value_size) + for element in value: + # This try/except block is going to be faster than any code that + # we could write to check whether element is finite. + try: + write(local_struct_pack(format, element)) + except SystemError: + EncodeNonFiniteOrRaise(write, element) + return EncodePackedField + elif is_repeated: + tag_bytes = TagBytes(field_number, wire_type) + def EncodeRepeatedField(write, value): + for element in value: + write(tag_bytes) + try: + write(local_struct_pack(format, element)) + except SystemError: + EncodeNonFiniteOrRaise(write, element) + return EncodeRepeatedField + else: + tag_bytes = TagBytes(field_number, wire_type) + def EncodeField(write, value): + write(tag_bytes) + try: + write(local_struct_pack(format, value)) + except SystemError: + EncodeNonFiniteOrRaise(write, value) + return EncodeField + + return SpecificEncoder + + # ==================================================================== # Here we declare an encoder constructor for each field type. These work # very similarly to sizer constructors, described earlier. @@ -525,8 +608,8 @@ Fixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<I') Fixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<Q') SFixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<i') SFixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<q') -FloatEncoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<f') -DoubleEncoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<d') +FloatEncoder = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED32, '<f') +DoubleEncoder = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED64, '<d') def BoolEncoder(field_number, is_repeated, is_packed): diff --git a/python/google/protobuf/internal/generator_test.py b/python/google/protobuf/internal/generator_test.py index 78360b53..e4387c85 100755 --- a/python/google/protobuf/internal/generator_test.py +++ b/python/google/protobuf/internal/generator_test.py @@ -42,11 +42,12 @@ further ensures that we can use Python protocol message objects as we expect. __author__ = 'robinson@google.com (Will Robinson)' import unittest +from google.protobuf import unittest_custom_options_pb2 from google.protobuf import unittest_import_pb2 from google.protobuf import unittest_mset_pb2 from google.protobuf import unittest_pb2 from google.protobuf import unittest_no_generic_services_pb2 - +from google.protobuf import service MAX_EXTENSION = 536870912 @@ -140,6 +141,13 @@ class GeneratorTest(unittest.TestCase): proto = unittest_mset_pb2.TestMessageSet() self.assertTrue(proto.DESCRIPTOR.GetOptions().message_set_wire_format) + def testMessageWithCustomOptions(self): + proto = unittest_custom_options_pb2.TestMessageWithCustomOptions() + enum_options = proto.DESCRIPTOR.enum_types_by_name['AnEnum'].GetOptions() + self.assertTrue(enum_options is not None) + # TODO(gps): We really should test for the presense of the enum_opt1 + # extension and for its value to be set to -789. + def testNestedTypes(self): self.assertEquals( set(unittest_pb2.TestAllTypes.DESCRIPTOR.nested_types), @@ -208,12 +216,27 @@ class GeneratorTest(unittest.TestCase): self.assertFalse(unittest_pb2.DESCRIPTOR.serialized_pb is None) def testNoGenericServices(self): - # unittest_no_generic_services.proto should contain defs for everything - # except services. self.assertTrue(hasattr(unittest_no_generic_services_pb2, "TestMessage")) self.assertTrue(hasattr(unittest_no_generic_services_pb2, "FOO")) self.assertTrue(hasattr(unittest_no_generic_services_pb2, "test_extension")) - self.assertFalse(hasattr(unittest_no_generic_services_pb2, "TestService")) + + # Make sure unittest_no_generic_services_pb2 has no services subclassing + # Proto2 Service class. + if hasattr(unittest_no_generic_services_pb2, "TestService"): + self.assertFalse(issubclass(unittest_no_generic_services_pb2.TestService, + service.Service)) + + def testMessageTypesByName(self): + file_type = unittest_pb2.DESCRIPTOR + self.assertEqual( + unittest_pb2._TESTALLTYPES, + file_type.message_types_by_name[unittest_pb2._TESTALLTYPES.name]) + + # Nested messages shouldn't be included in the message_types_by_name + # dictionary (like in the C++ API). + self.assertFalse( + unittest_pb2._TESTALLTYPES_NESTEDMESSAGE.name in + file_type.message_types_by_name) if __name__ == '__main__': diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py index 73a9a3a3..65174373 100755 --- a/python/google/protobuf/internal/message_test.py +++ b/python/google/protobuf/internal/message_test.py @@ -43,11 +43,25 @@ abstract interface. __author__ = 'gps@google.com (Gregory P. Smith)' +import copy +import math import unittest from google.protobuf import unittest_import_pb2 from google.protobuf import unittest_pb2 from google.protobuf.internal import test_util +# Python pre-2.6 does not have isinf() or isnan() functions, so we have +# to provide our own. +def isnan(val): + # NaN is never equal to itself. + return val != val +def isinf(val): + # Infinity times zero equals NaN. + return not isnan(val) and isnan(val * 0) +def IsPosInf(val): + return isinf(val) and (val > 0) +def IsNegInf(val): + return isinf(val) and (val < 0) class MessageTest(unittest.TestCase): @@ -57,6 +71,8 @@ class MessageTest(unittest.TestCase): golden_message.ParseFromString(golden_data) test_util.ExpectAllFieldsSet(self, golden_message) self.assertTrue(golden_message.SerializeToString() == golden_data) + golden_copy = copy.deepcopy(golden_message) + self.assertTrue(golden_copy.SerializeToString() == golden_data) def testGoldenExtensions(self): golden_data = test_util.GoldenFile('golden_message').read() @@ -66,6 +82,8 @@ class MessageTest(unittest.TestCase): test_util.SetAllExtensions(all_set) self.assertEquals(all_set, golden_message) self.assertTrue(golden_message.SerializeToString() == golden_data) + golden_copy = copy.deepcopy(golden_message) + self.assertTrue(golden_copy.SerializeToString() == golden_data) def testGoldenPackedMessage(self): golden_data = test_util.GoldenFile('golden_packed_fields_message').read() @@ -75,6 +93,8 @@ class MessageTest(unittest.TestCase): test_util.SetAllPackedFields(all_set) self.assertEquals(all_set, golden_message) self.assertTrue(all_set.SerializeToString() == golden_data) + golden_copy = copy.deepcopy(golden_message) + self.assertTrue(golden_copy.SerializeToString() == golden_data) def testGoldenPackedExtensions(self): golden_data = test_util.GoldenFile('golden_packed_fields_message').read() @@ -84,6 +104,240 @@ class MessageTest(unittest.TestCase): test_util.SetAllPackedExtensions(all_set) self.assertEquals(all_set, golden_message) self.assertTrue(all_set.SerializeToString() == golden_data) + golden_copy = copy.deepcopy(golden_message) + self.assertTrue(golden_copy.SerializeToString() == golden_data) + + def testPositiveInfinity(self): + golden_data = ('\x5D\x00\x00\x80\x7F' + '\x61\x00\x00\x00\x00\x00\x00\xF0\x7F' + '\xCD\x02\x00\x00\x80\x7F' + '\xD1\x02\x00\x00\x00\x00\x00\x00\xF0\x7F') + golden_message = unittest_pb2.TestAllTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(IsPosInf(golden_message.optional_float)) + self.assertTrue(IsPosInf(golden_message.optional_double)) + self.assertTrue(IsPosInf(golden_message.repeated_float[0])) + self.assertTrue(IsPosInf(golden_message.repeated_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testNegativeInfinity(self): + golden_data = ('\x5D\x00\x00\x80\xFF' + '\x61\x00\x00\x00\x00\x00\x00\xF0\xFF' + '\xCD\x02\x00\x00\x80\xFF' + '\xD1\x02\x00\x00\x00\x00\x00\x00\xF0\xFF') + golden_message = unittest_pb2.TestAllTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(IsNegInf(golden_message.optional_float)) + self.assertTrue(IsNegInf(golden_message.optional_double)) + self.assertTrue(IsNegInf(golden_message.repeated_float[0])) + self.assertTrue(IsNegInf(golden_message.repeated_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testNotANumber(self): + golden_data = ('\x5D\x00\x00\xC0\x7F' + '\x61\x00\x00\x00\x00\x00\x00\xF8\x7F' + '\xCD\x02\x00\x00\xC0\x7F' + '\xD1\x02\x00\x00\x00\x00\x00\x00\xF8\x7F') + golden_message = unittest_pb2.TestAllTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(isnan(golden_message.optional_float)) + self.assertTrue(isnan(golden_message.optional_double)) + self.assertTrue(isnan(golden_message.repeated_float[0])) + self.assertTrue(isnan(golden_message.repeated_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testPositiveInfinityPacked(self): + golden_data = ('\xA2\x06\x04\x00\x00\x80\x7F' + '\xAA\x06\x08\x00\x00\x00\x00\x00\x00\xF0\x7F') + golden_message = unittest_pb2.TestPackedTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(IsPosInf(golden_message.packed_float[0])) + self.assertTrue(IsPosInf(golden_message.packed_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testNegativeInfinityPacked(self): + golden_data = ('\xA2\x06\x04\x00\x00\x80\xFF' + '\xAA\x06\x08\x00\x00\x00\x00\x00\x00\xF0\xFF') + golden_message = unittest_pb2.TestPackedTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(IsNegInf(golden_message.packed_float[0])) + self.assertTrue(IsNegInf(golden_message.packed_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testNotANumberPacked(self): + golden_data = ('\xA2\x06\x04\x00\x00\xC0\x7F' + '\xAA\x06\x08\x00\x00\x00\x00\x00\x00\xF8\x7F') + golden_message = unittest_pb2.TestPackedTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(isnan(golden_message.packed_float[0])) + self.assertTrue(isnan(golden_message.packed_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testExtremeFloatValues(self): + message = unittest_pb2.TestAllTypes() + + # Most positive exponent, no significand bits set. + kMostPosExponentNoSigBits = math.pow(2, 127) + message.optional_float = kMostPosExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == kMostPosExponentNoSigBits) + + # Most positive exponent, one significand bit set. + kMostPosExponentOneSigBit = 1.5 * math.pow(2, 127) + message.optional_float = kMostPosExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == kMostPosExponentOneSigBit) + + # Repeat last two cases with values of same magnitude, but negative. + message.optional_float = -kMostPosExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == -kMostPosExponentNoSigBits) + + message.optional_float = -kMostPosExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == -kMostPosExponentOneSigBit) + + # Most negative exponent, no significand bits set. + kMostNegExponentNoSigBits = math.pow(2, -127) + message.optional_float = kMostNegExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == kMostNegExponentNoSigBits) + + # Most negative exponent, one significand bit set. + kMostNegExponentOneSigBit = 1.5 * math.pow(2, -127) + message.optional_float = kMostNegExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == kMostNegExponentOneSigBit) + + # Repeat last two cases with values of the same magnitude, but negative. + message.optional_float = -kMostNegExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == -kMostNegExponentNoSigBits) + + message.optional_float = -kMostNegExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == -kMostNegExponentOneSigBit) + + def testExtremeFloatValues(self): + message = unittest_pb2.TestAllTypes() + + # Most positive exponent, no significand bits set. + kMostPosExponentNoSigBits = math.pow(2, 1023) + message.optional_double = kMostPosExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == kMostPosExponentNoSigBits) + + # Most positive exponent, one significand bit set. + kMostPosExponentOneSigBit = 1.5 * math.pow(2, 1023) + message.optional_double = kMostPosExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == kMostPosExponentOneSigBit) + + # Repeat last two cases with values of same magnitude, but negative. + message.optional_double = -kMostPosExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == -kMostPosExponentNoSigBits) + + message.optional_double = -kMostPosExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == -kMostPosExponentOneSigBit) + + # Most negative exponent, no significand bits set. + kMostNegExponentNoSigBits = math.pow(2, -1023) + message.optional_double = kMostNegExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == kMostNegExponentNoSigBits) + + # Most negative exponent, one significand bit set. + kMostNegExponentOneSigBit = 1.5 * math.pow(2, -1023) + message.optional_double = kMostNegExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == kMostNegExponentOneSigBit) + + # Repeat last two cases with values of the same magnitude, but negative. + message.optional_double = -kMostNegExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == -kMostNegExponentNoSigBits) + + message.optional_double = -kMostNegExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == -kMostNegExponentOneSigBit) + + def testSortingRepeatedScalarFieldsDefaultComparator(self): + """Check some different types with the default comparator.""" + message = unittest_pb2.TestAllTypes() + + # TODO(mattp): would testing more scalar types strengthen test? + message.repeated_int32.append(1) + message.repeated_int32.append(3) + message.repeated_int32.append(2) + message.repeated_int32.sort() + self.assertEqual(message.repeated_int32[0], 1) + self.assertEqual(message.repeated_int32[1], 2) + self.assertEqual(message.repeated_int32[2], 3) + + message.repeated_float.append(1.1) + message.repeated_float.append(1.3) + message.repeated_float.append(1.2) + message.repeated_float.sort() + self.assertAlmostEqual(message.repeated_float[0], 1.1) + self.assertAlmostEqual(message.repeated_float[1], 1.2) + self.assertAlmostEqual(message.repeated_float[2], 1.3) + + message.repeated_string.append('a') + message.repeated_string.append('c') + message.repeated_string.append('b') + message.repeated_string.sort() + self.assertEqual(message.repeated_string[0], 'a') + self.assertEqual(message.repeated_string[1], 'b') + self.assertEqual(message.repeated_string[2], 'c') + + message.repeated_bytes.append('a') + message.repeated_bytes.append('c') + message.repeated_bytes.append('b') + message.repeated_bytes.sort() + self.assertEqual(message.repeated_bytes[0], 'a') + self.assertEqual(message.repeated_bytes[1], 'b') + self.assertEqual(message.repeated_bytes[2], 'c') + + def testSortingRepeatedScalarFieldsCustomComparator(self): + """Check some different types with custom comparator.""" + message = unittest_pb2.TestAllTypes() + + message.repeated_int32.append(-3) + message.repeated_int32.append(-2) + message.repeated_int32.append(-1) + message.repeated_int32.sort(lambda x,y: cmp(abs(x), abs(y))) + self.assertEqual(message.repeated_int32[0], -1) + self.assertEqual(message.repeated_int32[1], -2) + self.assertEqual(message.repeated_int32[2], -3) + + message.repeated_string.append('aaa') + message.repeated_string.append('bb') + message.repeated_string.append('c') + message.repeated_string.sort(lambda x,y: cmp(len(x), len(y))) + self.assertEqual(message.repeated_string[0], 'c') + self.assertEqual(message.repeated_string[1], 'bb') + self.assertEqual(message.repeated_string[2], 'aaa') + + def testSortingRepeatedCompositeFieldsCustomComparator(self): + """Check passing a custom comparator to sort a repeated composite field.""" + message = unittest_pb2.TestAllTypes() + + message.repeated_nested_message.add().bb = 1 + message.repeated_nested_message.add().bb = 3 + message.repeated_nested_message.add().bb = 2 + message.repeated_nested_message.add().bb = 6 + message.repeated_nested_message.add().bb = 5 + message.repeated_nested_message.add().bb = 4 + message.repeated_nested_message.sort(lambda x,y: cmp(x.bb, y.bb)) + self.assertEqual(message.repeated_nested_message[0].bb, 1) + self.assertEqual(message.repeated_nested_message[1].bb, 2) + self.assertEqual(message.repeated_nested_message[2].bb, 3) + self.assertEqual(message.repeated_nested_message[3].bb, 4) + self.assertEqual(message.repeated_nested_message[4].bb, 5) + self.assertEqual(message.repeated_nested_message[5].bb, 6) + if __name__ == '__main__': unittest.main() diff --git a/python/google/protobuf/internal/python_message.py b/python/google/protobuf/internal/python_message.py new file mode 100755 index 00000000..66fca918 --- /dev/null +++ b/python/google/protobuf/internal/python_message.py @@ -0,0 +1,1098 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# http://code.google.com/p/protobuf/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This code is meant to work on Python 2.4 and above only. +# +# TODO(robinson): Helpers for verbose, common checks like seeing if a +# descriptor's cpp_type is CPPTYPE_MESSAGE. + +"""Contains a metaclass and helper functions used to create +protocol message classes from Descriptor objects at runtime. + +Recall that a metaclass is the "type" of a class. +(A class is to a metaclass what an instance is to a class.) + +In this case, we use the GeneratedProtocolMessageType metaclass +to inject all the useful functionality into the classes +output by the protocol compiler at compile-time. + +The upshot of all this is that the real implementation +details for ALL pure-Python protocol buffers are *here in +this file*. +""" + +__author__ = 'robinson@google.com (Will Robinson)' + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO +import struct +import weakref + +# We use "as" to avoid name collisions with variables. +from google.protobuf.internal import containers +from google.protobuf.internal import decoder +from google.protobuf.internal import encoder +from google.protobuf.internal import message_listener as message_listener_mod +from google.protobuf.internal import type_checkers +from google.protobuf.internal import wire_format +from google.protobuf import descriptor as descriptor_mod +from google.protobuf import message as message_mod +from google.protobuf import text_format + +_FieldDescriptor = descriptor_mod.FieldDescriptor + + +def NewMessage(descriptor, dictionary): + _AddClassAttributesForNestedExtensions(descriptor, dictionary) + _AddSlots(descriptor, dictionary) + + +def InitMessage(descriptor, cls): + cls._decoders_by_tag = {} + cls._extensions_by_name = {} + cls._extensions_by_number = {} + if (descriptor.has_options and + descriptor.GetOptions().message_set_wire_format): + cls._decoders_by_tag[decoder.MESSAGE_SET_ITEM_TAG] = ( + decoder.MessageSetItemDecoder(cls._extensions_by_number)) + + # Attach stuff to each FieldDescriptor for quick lookup later on. + for field in descriptor.fields: + _AttachFieldHelpers(cls, field) + + _AddEnumValues(descriptor, cls) + _AddInitMethod(descriptor, cls) + _AddPropertiesForFields(descriptor, cls) + _AddPropertiesForExtensions(descriptor, cls) + _AddStaticMethods(cls) + _AddMessageMethods(descriptor, cls) + _AddPrivateHelperMethods(cls) + + +# Stateless helpers for GeneratedProtocolMessageType below. +# Outside clients should not access these directly. +# +# I opted not to make any of these methods on the metaclass, to make it more +# clear that I'm not really using any state there and to keep clients from +# thinking that they have direct access to these construction helpers. + + +def _PropertyName(proto_field_name): + """Returns the name of the public property attribute which + clients can use to get and (in some cases) set the value + of a protocol message field. + + Args: + proto_field_name: The protocol message field name, exactly + as it appears (or would appear) in a .proto file. + """ + # TODO(robinson): Escape Python keywords (e.g., yield), and test this support. + # nnorwitz makes my day by writing: + # """ + # FYI. See the keyword module in the stdlib. This could be as simple as: + # + # if keyword.iskeyword(proto_field_name): + # return proto_field_name + "_" + # return proto_field_name + # """ + # Kenton says: The above is a BAD IDEA. People rely on being able to use + # getattr() and setattr() to reflectively manipulate field values. If we + # rename the properties, then every such user has to also make sure to apply + # the same transformation. Note that currently if you name a field "yield", + # you can still access it just fine using getattr/setattr -- it's not even + # that cumbersome to do so. + # TODO(kenton): Remove this method entirely if/when everyone agrees with my + # position. + return proto_field_name + + +def _VerifyExtensionHandle(message, extension_handle): + """Verify that the given extension handle is valid.""" + + if not isinstance(extension_handle, _FieldDescriptor): + raise KeyError('HasExtension() expects an extension handle, got: %s' % + extension_handle) + + if not extension_handle.is_extension: + raise KeyError('"%s" is not an extension.' % extension_handle.full_name) + + if extension_handle.containing_type is not message.DESCRIPTOR: + raise KeyError('Extension "%s" extends message type "%s", but this ' + 'message is of type "%s".' % + (extension_handle.full_name, + extension_handle.containing_type.full_name, + message.DESCRIPTOR.full_name)) + + +def _AddSlots(message_descriptor, dictionary): + """Adds a __slots__ entry to dictionary, containing the names of all valid + attributes for this message type. + + Args: + message_descriptor: A Descriptor instance describing this message type. + dictionary: Class dictionary to which we'll add a '__slots__' entry. + """ + dictionary['__slots__'] = ['_cached_byte_size', + '_cached_byte_size_dirty', + '_fields', + '_is_present_in_parent', + '_listener', + '_listener_for_children', + '__weakref__'] + + +def _IsMessageSetExtension(field): + return (field.is_extension and + field.containing_type.has_options and + field.containing_type.GetOptions().message_set_wire_format and + field.type == _FieldDescriptor.TYPE_MESSAGE and + field.message_type == field.extension_scope and + field.label == _FieldDescriptor.LABEL_OPTIONAL) + + +def _AttachFieldHelpers(cls, field_descriptor): + is_repeated = (field_descriptor.label == _FieldDescriptor.LABEL_REPEATED) + is_packed = (field_descriptor.has_options and + field_descriptor.GetOptions().packed) + + if _IsMessageSetExtension(field_descriptor): + field_encoder = encoder.MessageSetItemEncoder(field_descriptor.number) + sizer = encoder.MessageSetItemSizer(field_descriptor.number) + else: + field_encoder = type_checkers.TYPE_TO_ENCODER[field_descriptor.type]( + field_descriptor.number, is_repeated, is_packed) + sizer = type_checkers.TYPE_TO_SIZER[field_descriptor.type]( + field_descriptor.number, is_repeated, is_packed) + + field_descriptor._encoder = field_encoder + field_descriptor._sizer = sizer + field_descriptor._default_constructor = _DefaultValueConstructorForField( + field_descriptor) + + def AddDecoder(wiretype, is_packed): + tag_bytes = encoder.TagBytes(field_descriptor.number, wiretype) + cls._decoders_by_tag[tag_bytes] = ( + type_checkers.TYPE_TO_DECODER[field_descriptor.type]( + field_descriptor.number, is_repeated, is_packed, + field_descriptor, field_descriptor._default_constructor)) + + AddDecoder(type_checkers.FIELD_TYPE_TO_WIRE_TYPE[field_descriptor.type], + False) + + if is_repeated and wire_format.IsTypePackable(field_descriptor.type): + # To support wire compatibility of adding packed = true, add a decoder for + # packed values regardless of the field's options. + AddDecoder(wire_format.WIRETYPE_LENGTH_DELIMITED, True) + + +def _AddClassAttributesForNestedExtensions(descriptor, dictionary): + extension_dict = descriptor.extensions_by_name + for extension_name, extension_field in extension_dict.iteritems(): + assert extension_name not in dictionary + dictionary[extension_name] = extension_field + + +def _AddEnumValues(descriptor, cls): + """Sets class-level attributes for all enum fields defined in this message. + + Args: + descriptor: Descriptor object for this message type. + cls: Class we're constructing for this message type. + """ + for enum_type in descriptor.enum_types: + for enum_value in enum_type.values: + setattr(cls, enum_value.name, enum_value.number) + + +def _DefaultValueConstructorForField(field): + """Returns a function which returns a default value for a field. + + Args: + field: FieldDescriptor object for this field. + + The returned function has one argument: + message: Message instance containing this field, or a weakref proxy + of same. + + That function in turn returns a default value for this field. The default + value may refer back to |message| via a weak reference. + """ + + if field.label == _FieldDescriptor.LABEL_REPEATED: + if field.default_value != []: + raise ValueError('Repeated field default value not empty list: %s' % ( + field.default_value)) + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + # We can't look at _concrete_class yet since it might not have + # been set. (Depends on order in which we initialize the classes). + message_type = field.message_type + def MakeRepeatedMessageDefault(message): + return containers.RepeatedCompositeFieldContainer( + message._listener_for_children, field.message_type) + return MakeRepeatedMessageDefault + else: + type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type) + def MakeRepeatedScalarDefault(message): + return containers.RepeatedScalarFieldContainer( + message._listener_for_children, type_checker) + return MakeRepeatedScalarDefault + + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + # _concrete_class may not yet be initialized. + message_type = field.message_type + def MakeSubMessageDefault(message): + result = message_type._concrete_class() + result._SetListener(message._listener_for_children) + return result + return MakeSubMessageDefault + + def MakeScalarDefault(message): + return field.default_value + return MakeScalarDefault + + +def _AddInitMethod(message_descriptor, cls): + """Adds an __init__ method to cls.""" + fields = message_descriptor.fields + def init(self, **kwargs): + self._cached_byte_size = 0 + self._cached_byte_size_dirty = len(kwargs) > 0 + self._fields = {} + self._is_present_in_parent = False + self._listener = message_listener_mod.NullMessageListener() + self._listener_for_children = _Listener(self) + for field_name, field_value in kwargs.iteritems(): + field = _GetFieldByName(message_descriptor, field_name) + if field is None: + raise TypeError("%s() got an unexpected keyword argument '%s'" % + (message_descriptor.name, field_name)) + if field.label == _FieldDescriptor.LABEL_REPEATED: + copy = field._default_constructor(self) + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: # Composite + for val in field_value: + copy.add().MergeFrom(val) + else: # Scalar + copy.extend(field_value) + self._fields[field] = copy + elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + copy = field._default_constructor(self) + copy.MergeFrom(field_value) + self._fields[field] = copy + else: + setattr(self, field_name, field_value) + + init.__module__ = None + init.__doc__ = None + cls.__init__ = init + + +def _GetFieldByName(message_descriptor, field_name): + """Returns a field descriptor by field name. + + Args: + message_descriptor: A Descriptor describing all fields in message. + field_name: The name of the field to retrieve. + Returns: + The field descriptor associated with the field name. + """ + try: + return message_descriptor.fields_by_name[field_name] + except KeyError: + raise ValueError('Protocol message has no "%s" field.' % field_name) + + +def _AddPropertiesForFields(descriptor, cls): + """Adds properties for all fields in this protocol message type.""" + for field in descriptor.fields: + _AddPropertiesForField(field, cls) + + if descriptor.is_extendable: + # _ExtensionDict is just an adaptor with no state so we allocate a new one + # every time it is accessed. + cls.Extensions = property(lambda self: _ExtensionDict(self)) + + +def _AddPropertiesForField(field, cls): + """Adds a public property for a protocol message field. + Clients can use this property to get and (in the case + of non-repeated scalar fields) directly set the value + of a protocol message field. + + Args: + field: A FieldDescriptor for this field. + cls: The class we're constructing. + """ + # Catch it if we add other types that we should + # handle specially here. + assert _FieldDescriptor.MAX_CPPTYPE == 10 + + constant_name = field.name.upper() + "_FIELD_NUMBER" + setattr(cls, constant_name, field.number) + + if field.label == _FieldDescriptor.LABEL_REPEATED: + _AddPropertiesForRepeatedField(field, cls) + elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + _AddPropertiesForNonRepeatedCompositeField(field, cls) + else: + _AddPropertiesForNonRepeatedScalarField(field, cls) + + +def _AddPropertiesForRepeatedField(field, cls): + """Adds a public property for a "repeated" protocol message field. Clients + can use this property to get the value of the field, which will be either a + _RepeatedScalarFieldContainer or _RepeatedCompositeFieldContainer (see + below). + + Note that when clients add values to these containers, we perform + type-checking in the case of repeated scalar fields, and we also set any + necessary "has" bits as a side-effect. + + Args: + field: A FieldDescriptor for this field. + cls: The class we're constructing. + """ + proto_field_name = field.name + property_name = _PropertyName(proto_field_name) + + def getter(self): + field_value = self._fields.get(field) + if field_value is None: + # Construct a new object to represent this field. + field_value = field._default_constructor(self) + + # Atomically check if another thread has preempted us and, if not, swap + # in the new object we just created. If someone has preempted us, we + # take that object and discard ours. + # WARNING: We are relying on setdefault() being atomic. This is true + # in CPython but we haven't investigated others. This warning appears + # in several other locations in this file. + field_value = self._fields.setdefault(field, field_value) + return field_value + getter.__module__ = None + getter.__doc__ = 'Getter for %s.' % proto_field_name + + # We define a setter just so we can throw an exception with a more + # helpful error message. + def setter(self, new_value): + raise AttributeError('Assignment not allowed to repeated field ' + '"%s" in protocol message object.' % proto_field_name) + + doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name + setattr(cls, property_name, property(getter, setter, doc=doc)) + + +def _AddPropertiesForNonRepeatedScalarField(field, cls): + """Adds a public property for a nonrepeated, scalar protocol message field. + Clients can use this property to get and directly set the value of the field. + Note that when the client sets the value of a field by using this property, + all necessary "has" bits are set as a side-effect, and we also perform + type-checking. + + Args: + field: A FieldDescriptor for this field. + cls: The class we're constructing. + """ + proto_field_name = field.name + property_name = _PropertyName(proto_field_name) + type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type) + default_value = field.default_value + valid_values = set() + + def getter(self): + return self._fields.get(field, default_value) + getter.__module__ = None + getter.__doc__ = 'Getter for %s.' % proto_field_name + def setter(self, new_value): + type_checker.CheckValue(new_value) + self._fields[field] = new_value + # Check _cached_byte_size_dirty inline to improve performance, since scalar + # setters are called frequently. + if not self._cached_byte_size_dirty: + self._Modified() + + setter.__module__ = None + setter.__doc__ = 'Setter for %s.' % proto_field_name + + # Add a property to encapsulate the getter/setter. + doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name + setattr(cls, property_name, property(getter, setter, doc=doc)) + + +def _AddPropertiesForNonRepeatedCompositeField(field, cls): + """Adds a public property for a nonrepeated, composite protocol message field. + A composite field is a "group" or "message" field. + + Clients can use this property to get the value of the field, but cannot + assign to the property directly. + + Args: + field: A FieldDescriptor for this field. + cls: The class we're constructing. + """ + # TODO(robinson): Remove duplication with similar method + # for non-repeated scalars. + proto_field_name = field.name + property_name = _PropertyName(proto_field_name) + message_type = field.message_type + + def getter(self): + field_value = self._fields.get(field) + if field_value is None: + # Construct a new object to represent this field. + field_value = message_type._concrete_class() + field_value._SetListener(self._listener_for_children) + + # Atomically check if another thread has preempted us and, if not, swap + # in the new object we just created. If someone has preempted us, we + # take that object and discard ours. + # WARNING: We are relying on setdefault() being atomic. This is true + # in CPython but we haven't investigated others. This warning appears + # in several other locations in this file. + field_value = self._fields.setdefault(field, field_value) + return field_value + getter.__module__ = None + getter.__doc__ = 'Getter for %s.' % proto_field_name + + # We define a setter just so we can throw an exception with a more + # helpful error message. + def setter(self, new_value): + raise AttributeError('Assignment not allowed to composite field ' + '"%s" in protocol message object.' % proto_field_name) + + # Add a property to encapsulate the getter. + doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name + setattr(cls, property_name, property(getter, setter, doc=doc)) + + +def _AddPropertiesForExtensions(descriptor, cls): + """Adds properties for all fields in this protocol message type.""" + extension_dict = descriptor.extensions_by_name + for extension_name, extension_field in extension_dict.iteritems(): + constant_name = extension_name.upper() + "_FIELD_NUMBER" + setattr(cls, constant_name, extension_field.number) + + +def _AddStaticMethods(cls): + # TODO(robinson): This probably needs to be thread-safe(?) + def RegisterExtension(extension_handle): + extension_handle.containing_type = cls.DESCRIPTOR + _AttachFieldHelpers(cls, extension_handle) + + # Try to insert our extension, failing if an extension with the same number + # already exists. + actual_handle = cls._extensions_by_number.setdefault( + extension_handle.number, extension_handle) + if actual_handle is not extension_handle: + raise AssertionError( + 'Extensions "%s" and "%s" both try to extend message type "%s" with ' + 'field number %d.' % + (extension_handle.full_name, actual_handle.full_name, + cls.DESCRIPTOR.full_name, extension_handle.number)) + + cls._extensions_by_name[extension_handle.full_name] = extension_handle + + handle = extension_handle # avoid line wrapping + if _IsMessageSetExtension(handle): + # MessageSet extension. Also register under type name. + cls._extensions_by_name[ + extension_handle.message_type.full_name] = extension_handle + + cls.RegisterExtension = staticmethod(RegisterExtension) + + def FromString(s): + message = cls() + message.MergeFromString(s) + return message + cls.FromString = staticmethod(FromString) + + +def _IsPresent(item): + """Given a (FieldDescriptor, value) tuple from _fields, return true if the + value should be included in the list returned by ListFields().""" + + if item[0].label == _FieldDescriptor.LABEL_REPEATED: + return bool(item[1]) + elif item[0].cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + return item[1]._is_present_in_parent + else: + return True + + +def _AddListFieldsMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def ListFields(self): + all_fields = [item for item in self._fields.iteritems() if _IsPresent(item)] + all_fields.sort(key = lambda item: item[0].number) + return all_fields + + cls.ListFields = ListFields + + +def _AddHasFieldMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + singular_fields = {} + for field in message_descriptor.fields: + if field.label != _FieldDescriptor.LABEL_REPEATED: + singular_fields[field.name] = field + + def HasField(self, field_name): + try: + field = singular_fields[field_name] + except KeyError: + raise ValueError( + 'Protocol message has no singular "%s" field.' % field_name) + + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + value = self._fields.get(field) + return value is not None and value._is_present_in_parent + else: + return field in self._fields + cls.HasField = HasField + + +def _AddClearFieldMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def ClearField(self, field_name): + try: + field = message_descriptor.fields_by_name[field_name] + except KeyError: + raise ValueError('Protocol message has no "%s" field.' % field_name) + + if field in self._fields: + # Note: If the field is a sub-message, its listener will still point + # at us. That's fine, because the worst than can happen is that it + # will call _Modified() and invalidate our byte size. Big deal. + del self._fields[field] + + # Always call _Modified() -- even if nothing was changed, this is + # a mutating method, and thus calling it should cause the field to become + # present in the parent message. + self._Modified() + + cls.ClearField = ClearField + + +def _AddClearExtensionMethod(cls): + """Helper for _AddMessageMethods().""" + def ClearExtension(self, extension_handle): + _VerifyExtensionHandle(self, extension_handle) + + # Similar to ClearField(), above. + if extension_handle in self._fields: + del self._fields[extension_handle] + self._Modified() + cls.ClearExtension = ClearExtension + + +def _AddClearMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def Clear(self): + # Clear fields. + self._fields = {} + self._Modified() + cls.Clear = Clear + + +def _AddHasExtensionMethod(cls): + """Helper for _AddMessageMethods().""" + def HasExtension(self, extension_handle): + _VerifyExtensionHandle(self, extension_handle) + if extension_handle.label == _FieldDescriptor.LABEL_REPEATED: + raise KeyError('"%s" is repeated.' % extension_handle.full_name) + + if extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + value = self._fields.get(extension_handle) + return value is not None and value._is_present_in_parent + else: + return extension_handle in self._fields + cls.HasExtension = HasExtension + + +def _AddEqualsMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def __eq__(self, other): + if (not isinstance(other, message_mod.Message) or + other.DESCRIPTOR != self.DESCRIPTOR): + return False + + if self is other: + return True + + return self.ListFields() == other.ListFields() + + cls.__eq__ = __eq__ + + +def _AddStrMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def __str__(self): + return text_format.MessageToString(self) + cls.__str__ = __str__ + + +def _AddUnicodeMethod(unused_message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def __unicode__(self): + return text_format.MessageToString(self, as_utf8=True).decode('utf-8') + cls.__unicode__ = __unicode__ + + +def _AddSetListenerMethod(cls): + """Helper for _AddMessageMethods().""" + def SetListener(self, listener): + if listener is None: + self._listener = message_listener_mod.NullMessageListener() + else: + self._listener = listener + cls._SetListener = SetListener + + +def _BytesForNonRepeatedElement(value, field_number, field_type): + """Returns the number of bytes needed to serialize a non-repeated element. + The returned byte count includes space for tag information and any + other additional space associated with serializing value. + + Args: + value: Value we're serializing. + field_number: Field number of this value. (Since the field number + is stored as part of a varint-encoded tag, this has an impact + on the total bytes required to serialize the value). + field_type: The type of the field. One of the TYPE_* constants + within FieldDescriptor. + """ + try: + fn = type_checkers.TYPE_TO_BYTE_SIZE_FN[field_type] + return fn(field_number, value) + except KeyError: + raise message_mod.EncodeError('Unrecognized field type: %d' % field_type) + + +def _AddByteSizeMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def ByteSize(self): + if not self._cached_byte_size_dirty: + return self._cached_byte_size + + size = 0 + for field_descriptor, field_value in self.ListFields(): + size += field_descriptor._sizer(field_value) + + self._cached_byte_size = size + self._cached_byte_size_dirty = False + self._listener_for_children.dirty = False + return size + + cls.ByteSize = ByteSize + + +def _AddSerializeToStringMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def SerializeToString(self): + # Check if the message has all of its required fields set. + errors = [] + if not self.IsInitialized(): + raise message_mod.EncodeError( + 'Message is missing required fields: ' + + ','.join(self.FindInitializationErrors())) + return self.SerializePartialToString() + cls.SerializeToString = SerializeToString + + +def _AddSerializePartialToStringMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def SerializePartialToString(self): + out = StringIO() + self._InternalSerialize(out.write) + return out.getvalue() + cls.SerializePartialToString = SerializePartialToString + + def InternalSerialize(self, write_bytes): + for field_descriptor, field_value in self.ListFields(): + field_descriptor._encoder(write_bytes, field_value) + cls._InternalSerialize = InternalSerialize + + +def _AddMergeFromStringMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def MergeFromString(self, serialized): + length = len(serialized) + try: + if self._InternalParse(serialized, 0, length) != length: + # The only reason _InternalParse would return early is if it + # encountered an end-group tag. + raise message_mod.DecodeError('Unexpected end-group tag.') + except IndexError: + raise message_mod.DecodeError('Truncated message.') + except struct.error, e: + raise message_mod.DecodeError(e) + return length # Return this for legacy reasons. + cls.MergeFromString = MergeFromString + + local_ReadTag = decoder.ReadTag + local_SkipField = decoder.SkipField + decoders_by_tag = cls._decoders_by_tag + + def InternalParse(self, buffer, pos, end): + self._Modified() + field_dict = self._fields + while pos != end: + (tag_bytes, new_pos) = local_ReadTag(buffer, pos) + field_decoder = decoders_by_tag.get(tag_bytes) + if field_decoder is None: + new_pos = local_SkipField(buffer, new_pos, end, tag_bytes) + if new_pos == -1: + return pos + pos = new_pos + else: + pos = field_decoder(buffer, new_pos, end, self, field_dict) + return pos + cls._InternalParse = InternalParse + + +def _AddIsInitializedMethod(message_descriptor, cls): + """Adds the IsInitialized and FindInitializationError methods to the + protocol message class.""" + + required_fields = [field for field in message_descriptor.fields + if field.label == _FieldDescriptor.LABEL_REQUIRED] + + def IsInitialized(self, errors=None): + """Checks if all required fields of a message are set. + + Args: + errors: A list which, if provided, will be populated with the field + paths of all missing required fields. + + Returns: + True iff the specified message has all required fields set. + """ + + # Performance is critical so we avoid HasField() and ListFields(). + + for field in required_fields: + if (field not in self._fields or + (field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE and + not self._fields[field]._is_present_in_parent)): + if errors is not None: + errors.extend(self.FindInitializationErrors()) + return False + + for field, value in self._fields.iteritems(): + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + if field.label == _FieldDescriptor.LABEL_REPEATED: + for element in value: + if not element.IsInitialized(): + if errors is not None: + errors.extend(self.FindInitializationErrors()) + return False + elif value._is_present_in_parent and not value.IsInitialized(): + if errors is not None: + errors.extend(self.FindInitializationErrors()) + return False + + return True + + cls.IsInitialized = IsInitialized + + def FindInitializationErrors(self): + """Finds required fields which are not initialized. + + Returns: + A list of strings. Each string is a path to an uninitialized field from + the top-level message, e.g. "foo.bar[5].baz". + """ + + errors = [] # simplify things + + for field in required_fields: + if not self.HasField(field.name): + errors.append(field.name) + + for field, value in self.ListFields(): + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + if field.is_extension: + name = "(%s)" % field.full_name + else: + name = field.name + + if field.label == _FieldDescriptor.LABEL_REPEATED: + for i in xrange(len(value)): + element = value[i] + prefix = "%s[%d]." % (name, i) + sub_errors = element.FindInitializationErrors() + errors += [ prefix + error for error in sub_errors ] + else: + prefix = name + "." + sub_errors = value.FindInitializationErrors() + errors += [ prefix + error for error in sub_errors ] + + return errors + + cls.FindInitializationErrors = FindInitializationErrors + + +def _AddMergeFromMethod(cls): + LABEL_REPEATED = _FieldDescriptor.LABEL_REPEATED + CPPTYPE_MESSAGE = _FieldDescriptor.CPPTYPE_MESSAGE + + def MergeFrom(self, msg): + if not isinstance(msg, cls): + raise TypeError( + "Parameter to MergeFrom() must be instance of same class.") + + assert msg is not self + self._Modified() + + fields = self._fields + + for field, value in msg._fields.iteritems(): + if field.label == LABEL_REPEATED: + field_value = fields.get(field) + if field_value is None: + # Construct a new object to represent this field. + field_value = field._default_constructor(self) + fields[field] = field_value + field_value.MergeFrom(value) + elif field.cpp_type == CPPTYPE_MESSAGE: + if value._is_present_in_parent: + field_value = fields.get(field) + if field_value is None: + # Construct a new object to represent this field. + field_value = field._default_constructor(self) + fields[field] = field_value + field_value.MergeFrom(value) + else: + self._fields[field] = value + cls.MergeFrom = MergeFrom + + +def _AddMessageMethods(message_descriptor, cls): + """Adds implementations of all Message methods to cls.""" + _AddListFieldsMethod(message_descriptor, cls) + _AddHasFieldMethod(message_descriptor, cls) + _AddClearFieldMethod(message_descriptor, cls) + if message_descriptor.is_extendable: + _AddClearExtensionMethod(cls) + _AddHasExtensionMethod(cls) + _AddClearMethod(message_descriptor, cls) + _AddEqualsMethod(message_descriptor, cls) + _AddStrMethod(message_descriptor, cls) + _AddUnicodeMethod(message_descriptor, cls) + _AddSetListenerMethod(cls) + _AddByteSizeMethod(message_descriptor, cls) + _AddSerializeToStringMethod(message_descriptor, cls) + _AddSerializePartialToStringMethod(message_descriptor, cls) + _AddMergeFromStringMethod(message_descriptor, cls) + _AddIsInitializedMethod(message_descriptor, cls) + _AddMergeFromMethod(cls) + + +def _AddPrivateHelperMethods(cls): + """Adds implementation of private helper methods to cls.""" + + def Modified(self): + """Sets the _cached_byte_size_dirty bit to true, + and propagates this to our listener iff this was a state change. + """ + + # Note: Some callers check _cached_byte_size_dirty before calling + # _Modified() as an extra optimization. So, if this method is ever + # changed such that it does stuff even when _cached_byte_size_dirty is + # already true, the callers need to be updated. + if not self._cached_byte_size_dirty: + self._cached_byte_size_dirty = True + self._listener_for_children.dirty = True + self._is_present_in_parent = True + self._listener.Modified() + + cls._Modified = Modified + cls.SetInParent = Modified + + +class _Listener(object): + + """MessageListener implementation that a parent message registers with its + child message. + + In order to support semantics like: + + foo.bar.baz.qux = 23 + assert foo.HasField('bar') + + ...child objects must have back references to their parents. + This helper class is at the heart of this support. + """ + + def __init__(self, parent_message): + """Args: + parent_message: The message whose _Modified() method we should call when + we receive Modified() messages. + """ + # This listener establishes a back reference from a child (contained) object + # to its parent (containing) object. We make this a weak reference to avoid + # creating cyclic garbage when the client finishes with the 'parent' object + # in the tree. + if isinstance(parent_message, weakref.ProxyType): + self._parent_message_weakref = parent_message + else: + self._parent_message_weakref = weakref.proxy(parent_message) + + # As an optimization, we also indicate directly on the listener whether + # or not the parent message is dirty. This way we can avoid traversing + # up the tree in the common case. + self.dirty = False + + def Modified(self): + if self.dirty: + return + try: + # Propagate the signal to our parents iff this is the first field set. + self._parent_message_weakref._Modified() + except ReferenceError: + # We can get here if a client has kept a reference to a child object, + # and is now setting a field on it, but the child's parent has been + # garbage-collected. This is not an error. + pass + + +# TODO(robinson): Move elsewhere? This file is getting pretty ridiculous... +# TODO(robinson): Unify error handling of "unknown extension" crap. +# TODO(robinson): Support iteritems()-style iteration over all +# extensions with the "has" bits turned on? +class _ExtensionDict(object): + + """Dict-like container for supporting an indexable "Extensions" + field on proto instances. + + Note that in all cases we expect extension handles to be + FieldDescriptors. + """ + + def __init__(self, extended_message): + """extended_message: Message instance for which we are the Extensions dict. + """ + + self._extended_message = extended_message + + def __getitem__(self, extension_handle): + """Returns the current value of the given extension handle.""" + + _VerifyExtensionHandle(self._extended_message, extension_handle) + + result = self._extended_message._fields.get(extension_handle) + if result is not None: + return result + + if extension_handle.label == _FieldDescriptor.LABEL_REPEATED: + result = extension_handle._default_constructor(self._extended_message) + elif extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + result = extension_handle.message_type._concrete_class() + try: + result._SetListener(self._extended_message._listener_for_children) + except ReferenceError: + pass + else: + # Singular scalar -- just return the default without inserting into the + # dict. + return extension_handle.default_value + + # Atomically check if another thread has preempted us and, if not, swap + # in the new object we just created. If someone has preempted us, we + # take that object and discard ours. + # WARNING: We are relying on setdefault() being atomic. This is true + # in CPython but we haven't investigated others. This warning appears + # in several other locations in this file. + result = self._extended_message._fields.setdefault( + extension_handle, result) + + return result + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + + my_fields = self._extended_message.ListFields() + other_fields = other._extended_message.ListFields() + + # Get rid of non-extension fields. + my_fields = [ field for field in my_fields if field.is_extension ] + other_fields = [ field for field in other_fields if field.is_extension ] + + return my_fields == other_fields + + def __ne__(self, other): + return not self == other + + def __hash__(self): + raise TypeError('unhashable object') + + # Note that this is only meaningful for non-repeated, scalar extension + # fields. Note also that we may have to call _Modified() when we do + # successfully set a field this way, to set any necssary "has" bits in the + # ancestors of the extended message. + def __setitem__(self, extension_handle, value): + """If extension_handle specifies a non-repeated, scalar extension + field, sets the value of that field. + """ + + _VerifyExtensionHandle(self._extended_message, extension_handle) + + if (extension_handle.label == _FieldDescriptor.LABEL_REPEATED or + extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE): + raise TypeError( + 'Cannot assign to extension "%s" because it is a repeated or ' + 'composite type.' % extension_handle.full_name) + + # It's slightly wasteful to lookup the type checker each time, + # but we expect this to be a vanishingly uncommon case anyway. + type_checker = type_checkers.GetTypeChecker( + extension_handle.cpp_type, extension_handle.type) + type_checker.CheckValue(value) + self._extended_message._fields[extension_handle] = value + self._extended_message._Modified() + + def _FindExtensionByName(self, name): + """Tries to find a known extension with the specified name. + + Args: + name: Extension full name. + + Returns: + Extension field descriptor. + """ + return self._extended_message._extensions_by_name.get(name, None) diff --git a/python/google/protobuf/internal/reflection_test.py b/python/google/protobuf/internal/reflection_test.py index 54eeebe6..7b9d3398 100755 --- a/python/google/protobuf/internal/reflection_test.py +++ b/python/google/protobuf/internal/reflection_test.py @@ -41,8 +41,6 @@ import operator import struct import unittest -# TODO(robinson): When we split this test in two, only some of these imports -# will be necessary in each test. from google.protobuf import unittest_import_pb2 from google.protobuf import unittest_mset_pb2 from google.protobuf import unittest_pb2 @@ -50,6 +48,7 @@ from google.protobuf import descriptor_pb2 from google.protobuf import descriptor from google.protobuf import message from google.protobuf import reflection +from google.protobuf.internal import api_implementation from google.protobuf.internal import more_extensions_pb2 from google.protobuf.internal import more_messages_pb2 from google.protobuf.internal import wire_format @@ -104,10 +103,10 @@ class _MiniDecoder(object): class ReflectionTest(unittest.TestCase): - def assertIs(self, values, others): + def assertListsEqual(self, values, others): self.assertEqual(len(values), len(others)) for i in range(len(values)): - self.assertTrue(values[i] is others[i]) + self.assertEqual(values[i], others[i]) def testScalarConstructor(self): # Constructor with only scalar types should succeed. @@ -201,15 +200,24 @@ class ReflectionTest(unittest.TestCase): list(proto.repeated_foreign_message)) def testConstructorTypeError(self): - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, optional_int32="foo") - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, optional_string=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, optional_nested_message=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_int32=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_int32=["foo"]) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_string=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_string=[1234]) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=[1234]) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, optional_int32="foo") + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, optional_string=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, optional_nested_message=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_int32=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_int32=["foo"]) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_string=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_string=[1234]) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=[1234]) def testConstructorInvalidatesCachedByteSize(self): message = unittest_pb2.TestAllTypes(optional_int32 = 12) @@ -311,11 +319,14 @@ class ReflectionTest(unittest.TestCase): self.assertEqual(0, getattr(composite_field, scalar_field_name)) # Finally, ensure that modifications to the old composite field object - # don't have any effect on the parent. + # don't have any effect on the parent. Possible only with the pure-python + # implementation of the API. # # (NOTE that when we clear the composite field in the parent, we actually # don't recursively clear down the tree. Instead, we just disconnect the # cleared composite from the tree.) + if api_implementation.Type() != 'python': + return self.assertTrue(old_composite_field is not composite_field) setattr(old_composite_field, scalar_field_name, new_val) self.assertTrue(not composite_field.HasField(scalar_field_name)) @@ -337,6 +348,8 @@ class ReflectionTest(unittest.TestCase): nested.bb = 23 def testDisconnectingNestedMessageBeforeSettingField(self): + if api_implementation.Type() != 'python': + return proto = unittest_pb2.TestAllTypes() nested = proto.optional_nested_message proto.ClearField('optional_nested_message') # Should disconnect from parent @@ -526,7 +539,6 @@ class ReflectionTest(unittest.TestCase): # proto.nonexistent_field = 23 should fail as well. self.assertRaises(AttributeError, setattr, proto, 'nonexistent_field', 23) - # TODO(robinson): Add type-safety check for enums. def testSingleScalarTypeSafety(self): proto = unittest_pb2.TestAllTypes() self.assertRaises(TypeError, setattr, proto, 'optional_int32', 1.1) @@ -538,7 +550,9 @@ class ReflectionTest(unittest.TestCase): def TestMinAndMaxIntegers(field_name, expected_min, expected_max): pb = unittest_pb2.TestAllTypes() setattr(pb, field_name, expected_min) + self.assertEqual(expected_min, getattr(pb, field_name)) setattr(pb, field_name, expected_max) + self.assertEqual(expected_max, getattr(pb, field_name)) self.assertRaises(ValueError, setattr, pb, field_name, expected_min - 1) self.assertRaises(ValueError, setattr, pb, field_name, expected_max + 1) @@ -546,7 +560,33 @@ class ReflectionTest(unittest.TestCase): TestMinAndMaxIntegers('optional_uint32', 0, 0xffffffff) TestMinAndMaxIntegers('optional_int64', -(1 << 63), (1 << 63) - 1) TestMinAndMaxIntegers('optional_uint64', 0, 0xffffffffffffffff) - TestMinAndMaxIntegers('optional_nested_enum', -(1 << 31), (1 << 31) - 1) + + pb = unittest_pb2.TestAllTypes() + pb.optional_nested_enum = 1 + self.assertEqual(1, pb.optional_nested_enum) + + # Invalid enum values. + pb.optional_nested_enum = 0 + self.assertEqual(0, pb.optional_nested_enum) + + bytes_size_before = pb.ByteSize() + + pb.optional_nested_enum = 4 + self.assertEqual(4, pb.optional_nested_enum) + + pb.optional_nested_enum = 0 + self.assertEqual(0, pb.optional_nested_enum) + + # Make sure that setting the same enum field doesn't just add unknown + # fields (but overwrites them). + self.assertEqual(bytes_size_before, pb.ByteSize()) + + # Is the invalid value preserved after serialization? + serialized = pb.SerializeToString() + pb2 = unittest_pb2.TestAllTypes() + pb2.ParseFromString(serialized) + self.assertEqual(0, pb2.optional_nested_enum) + self.assertEqual(pb, pb2) def testRepeatedScalarTypeSafety(self): proto = unittest_pb2.TestAllTypes() @@ -560,11 +600,19 @@ class ReflectionTest(unittest.TestCase): self.assertRaises(IndexError, proto.repeated_int32.__setitem__, 500, 23) self.assertRaises(TypeError, proto.repeated_int32.__setitem__, 0, 'abc') + # Repeated enums tests. + #proto.repeated_nested_enum.append(0) + def testSingleScalarGettersAndSetters(self): proto = unittest_pb2.TestAllTypes() self.assertEqual(0, proto.optional_int32) proto.optional_int32 = 1 self.assertEqual(1, proto.optional_int32) + + proto.optional_uint64 = 0xffffffffffff + self.assertEqual(0xffffffffffff, proto.optional_uint64) + proto.optional_uint64 = 0xffffffffffffffff + self.assertEqual(0xffffffffffffffff, proto.optional_uint64) # TODO(robinson): Test all other scalar field types. def testSingleScalarClearField(self): @@ -645,11 +693,38 @@ class ReflectionTest(unittest.TestCase): del proto.repeated_int32[2:] self.assertEqual([5, 35], proto.repeated_int32) + # Test extending. + proto.repeated_int32.extend([3, 13]) + self.assertEqual([5, 35, 3, 13], proto.repeated_int32) + # Test clearing. proto.ClearField('repeated_int32') self.assertTrue(not proto.repeated_int32) self.assertEqual(0, len(proto.repeated_int32)) + proto.repeated_int32.append(1) + self.assertEqual(1, proto.repeated_int32[-1]) + # Test assignment to a negative index. + proto.repeated_int32[-1] = 2 + self.assertEqual(2, proto.repeated_int32[-1]) + + # Test deletion at negative indices. + proto.repeated_int32[:] = [0, 1, 2, 3] + del proto.repeated_int32[-1] + self.assertEqual([0, 1, 2], proto.repeated_int32) + + del proto.repeated_int32[-2] + self.assertEqual([0, 2], proto.repeated_int32) + + self.assertRaises(IndexError, proto.repeated_int32.__delitem__, -3) + self.assertRaises(IndexError, proto.repeated_int32.__delitem__, 300) + + del proto.repeated_int32[-2:-1] + self.assertEqual([2], proto.repeated_int32) + + del proto.repeated_int32[100:10000] + self.assertEqual([2], proto.repeated_int32) + def testRepeatedScalarsRemove(self): proto = unittest_pb2.TestAllTypes() @@ -687,7 +762,7 @@ class ReflectionTest(unittest.TestCase): m1 = proto.repeated_nested_message.add() self.assertTrue(proto.repeated_nested_message) self.assertEqual(2, len(proto.repeated_nested_message)) - self.assertIs([m0, m1], proto.repeated_nested_message) + self.assertListsEqual([m0, m1], proto.repeated_nested_message) self.assertTrue(isinstance(m0, unittest_pb2.TestAllTypes.NestedMessage)) # Test out-of-bounds indices. @@ -706,32 +781,57 @@ class ReflectionTest(unittest.TestCase): m2 = proto.repeated_nested_message.add() m3 = proto.repeated_nested_message.add() m4 = proto.repeated_nested_message.add() - self.assertIs([m1, m2, m3], proto.repeated_nested_message[1:4]) - self.assertIs([m0, m1, m2, m3, m4], proto.repeated_nested_message[:]) + self.assertListsEqual( + [m1, m2, m3], proto.repeated_nested_message[1:4]) + self.assertListsEqual( + [m0, m1, m2, m3, m4], proto.repeated_nested_message[:]) + self.assertListsEqual( + [m0, m1], proto.repeated_nested_message[:2]) + self.assertListsEqual( + [m2, m3, m4], proto.repeated_nested_message[2:]) + self.assertEqual( + m0, proto.repeated_nested_message[0]) + self.assertListsEqual( + [m0], proto.repeated_nested_message[:1]) # Test that we can use the field as an iterator. result = [] for i in proto.repeated_nested_message: result.append(i) - self.assertIs([m0, m1, m2, m3, m4], result) + self.assertListsEqual([m0, m1, m2, m3, m4], result) # Test single deletion. del proto.repeated_nested_message[2] - self.assertIs([m0, m1, m3, m4], proto.repeated_nested_message) + self.assertListsEqual([m0, m1, m3, m4], proto.repeated_nested_message) # Test slice deletion. del proto.repeated_nested_message[2:] - self.assertIs([m0, m1], proto.repeated_nested_message) + self.assertListsEqual([m0, m1], proto.repeated_nested_message) + + # Test extending. + n1 = unittest_pb2.TestAllTypes.NestedMessage(bb=1) + n2 = unittest_pb2.TestAllTypes.NestedMessage(bb=2) + proto.repeated_nested_message.extend([n1,n2]) + self.assertEqual(4, len(proto.repeated_nested_message)) + self.assertEqual(n1, proto.repeated_nested_message[2]) + self.assertEqual(n2, proto.repeated_nested_message[3]) # Test clearing. proto.ClearField('repeated_nested_message') self.assertTrue(not proto.repeated_nested_message) self.assertEqual(0, len(proto.repeated_nested_message)) + # Test constructing an element while adding it. + proto.repeated_nested_message.add(bb=23) + self.assertEqual(1, len(proto.repeated_nested_message)) + self.assertEqual(23, proto.repeated_nested_message[0].bb) + def testHandWrittenReflection(self): - # TODO(robinson): We probably need a better way to specify - # protocol types by hand. But then again, this isn't something - # we expect many people to do. Hmm. + # Hand written extensions are only supported by the pure-Python + # implementation of the API. + if api_implementation.Type() != 'python': + return + FieldDescriptor = descriptor.FieldDescriptor foo_field_descriptor = FieldDescriptor( name='foo_field', full_name='MyProto.foo_field', @@ -894,7 +994,7 @@ class ReflectionTest(unittest.TestCase): self.assertTrue(not toplevel.HasField('submessage')) foreign = toplevel.submessage.Extensions[ more_extensions_pb2.repeated_message_extension].add() - self.assertTrue(foreign is toplevel.submessage.Extensions[ + self.assertEqual(foreign, toplevel.submessage.Extensions[ more_extensions_pb2.repeated_message_extension][0]) self.assertTrue(toplevel.HasField('submessage')) @@ -997,6 +1097,12 @@ class ReflectionTest(unittest.TestCase): self.assertEqual(123, proto2.repeated_nested_message[1].bb) self.assertEqual(321, proto2.repeated_nested_message[2].bb) + proto3 = unittest_pb2.TestAllTypes() + proto3.repeated_nested_message.MergeFrom(proto2.repeated_nested_message) + self.assertEqual(999, proto3.repeated_nested_message[0].bb) + self.assertEqual(123, proto3.repeated_nested_message[1].bb) + self.assertEqual(321, proto3.repeated_nested_message[2].bb) + def testMergeFromAllFields(self): # With all fields set. proto1 = unittest_pb2.TestAllTypes() @@ -1126,6 +1232,15 @@ class ReflectionTest(unittest.TestCase): self.assertEqual(2, proto1.optional_int32) self.assertEqual('important-text', proto1.optional_string) + def testCopyFromBadType(self): + # The python implementation doesn't raise an exception in this + # case. In theory it should. + if api_implementation.Type() == 'python': + return + proto1 = unittest_pb2.TestAllTypes() + proto2 = unittest_pb2.TestAllExtensions() + self.assertRaises(TypeError, proto1.CopyFrom, proto2) + def testClear(self): proto = unittest_pb2.TestAllTypes() test_util.SetAllFields(proto) @@ -1231,9 +1346,10 @@ class ReflectionTest(unittest.TestCase): proto.optional_string = str('Testing') self.assertEqual(proto.optional_string, unicode('Testing')) - # Values of type 'str' are also accepted as long as they can be encoded in - # UTF-8. - self.assertEqual(type(proto.optional_string), str) + if api_implementation.Type() == 'python': + # Values of type 'str' are also accepted as long as they can be + # encoded in UTF-8. + self.assertEqual(type(proto.optional_string), str) # Try to assign a 'str' value which contains bytes that aren't 7-bit ASCII. self.assertRaises(ValueError, @@ -1271,7 +1387,7 @@ class ReflectionTest(unittest.TestCase): # Check that the type_id is the same as the tag ID in the .proto file. self.assertEqual(raw.item[0].type_id, 1547769) - # Check the actually bytes on the wire. + # Check the actual bytes on the wire. self.assertTrue( raw.item[0].message.endswith(test_utf8_bytes)) message2.MergeFromString(raw.item[0].message) @@ -1279,10 +1395,23 @@ class ReflectionTest(unittest.TestCase): self.assertEqual(type(message2.str), unicode) self.assertEqual(message2.str, test_utf8) - # How about if the bytes on the wire aren't a valid UTF-8 encoded string. + # The pure Python API throws an exception on MergeFromString(), + # if any of the string fields of the message can't be UTF-8 decoded. + # The C++ implementation of the API has no way to check that on + # MergeFromString and thus has no way to throw the exception. + # + # The pure Python API always returns objects of type 'unicode' (UTF-8 + # encoded), or 'str' (in 7 bit ASCII). bytes = raw.item[0].message.replace( test_utf8_bytes, len(test_utf8_bytes) * '\xff') - self.assertRaises(UnicodeDecodeError, message2.MergeFromString, bytes) + + unicode_decode_failed = False + try: + message2.MergeFromString(bytes) + except UnicodeDecodeError, e: + unicode_decode_failed = True + string_field = message2.str + self.assertTrue(unicode_decode_failed or type(string_field) == str) def testEmptyNestedMessage(self): proto = unittest_pb2.TestAllTypes() @@ -1325,6 +1454,9 @@ class TestAllTypesEqualityTest(unittest.TestCase): self.first_proto = unittest_pb2.TestAllTypes() self.second_proto = unittest_pb2.TestAllTypes() + def testNotHashable(self): + self.assertRaises(TypeError, hash, self.first_proto) + def testSelfEquality(self): self.assertEqual(self.first_proto, self.first_proto) @@ -1342,6 +1474,9 @@ class FullProtosEqualityTest(unittest.TestCase): test_util.SetAllFields(self.first_proto) test_util.SetAllFields(self.second_proto) + def testNotHashable(self): + self.assertRaises(TypeError, hash, self.first_proto) + def testNoneNotEqual(self): self.assertNotEqual(self.first_proto, None) self.assertNotEqual(None, self.second_proto) @@ -1410,9 +1545,6 @@ class FullProtosEqualityTest(unittest.TestCase): self.first_proto.ClearField('optional_nested_message') self.second_proto.optional_nested_message.ClearField('bb') self.assertNotEqual(self.first_proto, self.second_proto) - # TODO(robinson): Replace next two lines with method - # to set the "has" bit without changing the value, - # if/when such a method exists. self.first_proto.optional_nested_message.bb = 0 self.first_proto.optional_nested_message.ClearField('bb') self.assertEqual(self.first_proto, self.second_proto) @@ -1477,6 +1609,14 @@ class ByteSizeTest(unittest.TestCase): def testEmptyMessage(self): self.assertEqual(0, self.proto.ByteSize()) + def testSizedOnKwargs(self): + # Use a separate message to ensure testing right after creation. + proto = unittest_pb2.TestAllTypes() + self.assertEqual(0, proto.ByteSize()) + proto_kwargs = unittest_pb2.TestAllTypes(optional_int64 = 1) + # One byte for the tag, one to encode varint 1. + self.assertEqual(2, proto_kwargs.ByteSize()) + def testVarints(self): def Test(i, expected_varint_size): self.proto.Clear() @@ -1668,10 +1808,13 @@ class ByteSizeTest(unittest.TestCase): self.assertEqual(3, self.proto.ByteSize()) self.proto.ClearField('optional_foreign_message') self.assertEqual(0, self.proto.ByteSize()) - child = self.proto.optional_foreign_message - self.proto.ClearField('optional_foreign_message') - child.c = 128 - self.assertEqual(0, self.proto.ByteSize()) + + if api_implementation.Type() == 'python': + # This is only possible in pure-Python implementation of the API. + child = self.proto.optional_foreign_message + self.proto.ClearField('optional_foreign_message') + child.c = 128 + self.assertEqual(0, self.proto.ByteSize()) # Test within extension. extension = more_extensions_pb2.optional_message_extension @@ -1737,7 +1880,6 @@ class ByteSizeTest(unittest.TestCase): self.assertEqual(19, self.packed_extended_proto.ByteSize()) -# TODO(robinson): We need cross-language serialization consistency tests. # Issues to be sure to cover include: # * Handling of unrecognized tags ("uninterpreted_bytes"). # * Handling of MessageSets. @@ -1792,6 +1934,10 @@ class SerializationTest(unittest.TestCase): self.assertEqual(first_proto, second_proto) def testParseTruncated(self): + # This test is only applicable for the Python implementation of the API. + if api_implementation.Type() != 'python': + return + first_proto = unittest_pb2.TestAllTypes() test_util.SetAllFields(first_proto) serialized = first_proto.SerializeToString() diff --git a/python/google/protobuf/internal/text_format_test.py b/python/google/protobuf/internal/text_format_test.py index e0991cb1..a3547782 100755 --- a/python/google/protobuf/internal/text_format_test.py +++ b/python/google/protobuf/internal/text_format_test.py @@ -35,6 +35,7 @@ __author__ = 'kenton@google.com (Kenton Varda)' import difflib +import re import unittest from google.protobuf import text_format @@ -95,12 +96,13 @@ class TextFormatTest(unittest.TestCase): def testPrintExotic(self): message = unittest_pb2.TestAllTypes() - message.repeated_int64.append(-9223372036854775808); - message.repeated_uint64.append(18446744073709551615); - message.repeated_double.append(123.456); - message.repeated_double.append(1.23e22); - message.repeated_double.append(1.23e-18); - message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'\"'); + message.repeated_int64.append(-9223372036854775808) + message.repeated_uint64.append(18446744073709551615) + message.repeated_double.append(123.456) + message.repeated_double.append(1.23e22) + message.repeated_double.append(1.23e-18) + message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'"') + message.repeated_string.append(u'\u00fc\ua71f') self.CompareToGoldenText( self.RemoveRedundantZeros(text_format.MessageToString(message)), 'repeated_int64: -9223372036854775808\n' @@ -109,7 +111,95 @@ class TextFormatTest(unittest.TestCase): 'repeated_double: 1.23e+22\n' 'repeated_double: 1.23e-18\n' 'repeated_string: ' - '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n') + '"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\""\n' + 'repeated_string: "\\303\\274\\352\\234\\237"\n') + + def testPrintNestedMessageAsOneLine(self): + message = unittest_pb2.TestAllTypes() + msg = message.repeated_nested_message.add() + msg.bb = 42; + self.CompareToGoldenText( + text_format.MessageToString(message, as_one_line=True), + 'repeated_nested_message { bb: 42 }') + + def testPrintRepeatedFieldsAsOneLine(self): + message = unittest_pb2.TestAllTypes() + message.repeated_int32.append(1) + message.repeated_int32.append(1) + message.repeated_int32.append(3) + message.repeated_string.append("Google") + message.repeated_string.append("Zurich") + self.CompareToGoldenText( + text_format.MessageToString(message, as_one_line=True), + 'repeated_int32: 1 repeated_int32: 1 repeated_int32: 3 ' + 'repeated_string: "Google" repeated_string: "Zurich"') + + def testPrintNestedNewLineInStringAsOneLine(self): + message = unittest_pb2.TestAllTypes() + message.optional_string = "a\nnew\nline" + self.CompareToGoldenText( + text_format.MessageToString(message, as_one_line=True), + 'optional_string: "a\\nnew\\nline"') + + def testPrintMessageSetAsOneLine(self): + message = unittest_mset_pb2.TestMessageSetContainer() + ext1 = unittest_mset_pb2.TestMessageSetExtension1.message_set_extension + ext2 = unittest_mset_pb2.TestMessageSetExtension2.message_set_extension + message.message_set.Extensions[ext1].i = 23 + message.message_set.Extensions[ext2].str = 'foo' + self.CompareToGoldenText( + text_format.MessageToString(message, as_one_line=True), + 'message_set {' + ' [protobuf_unittest.TestMessageSetExtension1] {' + ' i: 23' + ' }' + ' [protobuf_unittest.TestMessageSetExtension2] {' + ' str: \"foo\"' + ' }' + ' }') + + def testPrintExoticAsOneLine(self): + message = unittest_pb2.TestAllTypes() + message.repeated_int64.append(-9223372036854775808) + message.repeated_uint64.append(18446744073709551615) + message.repeated_double.append(123.456) + message.repeated_double.append(1.23e22) + message.repeated_double.append(1.23e-18) + message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'"') + message.repeated_string.append(u'\u00fc\ua71f') + self.CompareToGoldenText( + self.RemoveRedundantZeros( + text_format.MessageToString(message, as_one_line=True)), + 'repeated_int64: -9223372036854775808' + ' repeated_uint64: 18446744073709551615' + ' repeated_double: 123.456' + ' repeated_double: 1.23e+22' + ' repeated_double: 1.23e-18' + ' repeated_string: ' + '"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\""' + ' repeated_string: "\\303\\274\\352\\234\\237"') + + def testRoundTripExoticAsOneLine(self): + message = unittest_pb2.TestAllTypes() + message.repeated_int64.append(-9223372036854775808) + message.repeated_uint64.append(18446744073709551615) + message.repeated_double.append(123.456) + message.repeated_double.append(1.23e22) + message.repeated_double.append(1.23e-18) + message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'"') + message.repeated_string.append(u'\u00fc\ua71f') + + wire_text = text_format.MessageToString(message, as_one_line=True) + parsed_message = unittest_pb2.TestAllTypes() + text_format.Merge(wire_text, parsed_message) + self.assertEquals(message, parsed_message) + + def testPrintRawUtf8String(self): + message = unittest_pb2.TestAllTypes() + message.repeated_string.append(u'\u00fc\ua71f') + self.CompareToGoldenText( + text_format.MessageToString(message, as_utf8 = True), + 'repeated_string: "\303\274\352\234\237"\n') def testMessageToString(self): message = unittest_pb2.ForeignMessage() @@ -119,8 +209,12 @@ class TextFormatTest(unittest.TestCase): def RemoveRedundantZeros(self, text): # Some platforms print 1e+5 as 1e+005. This is fine, but we need to remove # these zeros in order to match the golden file. - return text.replace('e+0','e+').replace('e+0','e+') \ + text = text.replace('e+0','e+').replace('e+0','e+') \ .replace('e-0','e-').replace('e-0','e-') + # Floating point fields are printed with .0 suffix even if they are + # actualy integer numbers. + text = re.compile('\.0$', re.MULTILINE).sub('', text) + return text def testMergeGolden(self): golden_text = '\n'.join(self.ReadGolden('text_format_unittest_data.txt')) @@ -191,8 +285,11 @@ class TextFormatTest(unittest.TestCase): 'repeated_double: 1.23e+22\n' 'repeated_double: 1.23e-18\n' 'repeated_string: \n' - '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n' - 'repeated_string: "foo" \'corge\' "grault"') + '"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\""\n' + 'repeated_string: "foo" \'corge\' "grault"\n' + 'repeated_string: "\\303\\274\\352\\234\\237"\n' + 'repeated_string: "\\xc3\\xbc"\n' + 'repeated_string: "\xc3\xbc"\n') text_format.Merge(text, message) self.assertEqual(-9223372036854775808, message.repeated_int64[0]) @@ -201,8 +298,30 @@ class TextFormatTest(unittest.TestCase): self.assertEqual(1.23e22, message.repeated_double[1]) self.assertEqual(1.23e-18, message.repeated_double[2]) self.assertEqual( - '\000\001\a\b\f\n\r\t\v\\\'\"', message.repeated_string[0]) + '\000\001\a\b\f\n\r\t\v\\\'"', message.repeated_string[0]) self.assertEqual('foocorgegrault', message.repeated_string[1]) + self.assertEqual(u'\u00fc\ua71f', message.repeated_string[2]) + self.assertEqual(u'\u00fc', message.repeated_string[3]) + + def testMergeEmptyText(self): + message = unittest_pb2.TestAllTypes() + text = '' + text_format.Merge(text, message) + self.assertEquals(unittest_pb2.TestAllTypes(), message) + + def testMergeInvalidUtf8(self): + message = unittest_pb2.TestAllTypes() + text = 'repeated_string: "\\xc3\\xc3"' + self.assertRaises(text_format.ParseError, text_format.Merge, text, message) + + def testMergeSingleWord(self): + message = unittest_pb2.TestAllTypes() + text = 'foo' + self.assertRaisesWithMessage( + text_format.ParseError, + ('1:1 : Message type "protobuf_unittest.TestAllTypes" has no field named ' + '"foo".'), + text_format.Merge, text, message) def testMergeUnknownField(self): message = unittest_pb2.TestAllTypes() @@ -297,7 +416,8 @@ class TokenizerTest(unittest.TestCase): 'identifiER_4 : 1.1e+2 ID5:-0.23 ID6:\'aaaa\\\'bbbb\'\n' 'ID7 : "aa\\"bb"\n\n\n\n ID8: {A:inf B:-inf C:true D:false}\n' 'ID9: 22 ID10: -111111111111111111 ID11: -22\n' - 'ID12: 2222222222222222222') + 'ID12: 2222222222222222222 ' + 'false_bool: 0 true_BOOL:t \n true_bool1: 1 false_BOOL1:f ' ) tokenizer = text_format._Tokenizer(text) methods = [(tokenizer.ConsumeIdentifier, 'identifier1'), ':', @@ -347,7 +467,19 @@ class TokenizerTest(unittest.TestCase): (tokenizer.ConsumeInt32, -22), (tokenizer.ConsumeIdentifier, 'ID12'), ':', - (tokenizer.ConsumeUint64, 2222222222222222222)] + (tokenizer.ConsumeUint64, 2222222222222222222), + (tokenizer.ConsumeIdentifier, 'false_bool'), + ':', + (tokenizer.ConsumeBool, False), + (tokenizer.ConsumeIdentifier, 'true_BOOL'), + ':', + (tokenizer.ConsumeBool, True), + (tokenizer.ConsumeIdentifier, 'true_bool1'), + ':', + (tokenizer.ConsumeBool, True), + (tokenizer.ConsumeIdentifier, 'false_BOOL1'), + ':', + (tokenizer.ConsumeBool, False)] i = 0 while not tokenizer.AtEnd(): |