diff options
author | liujisi@google.com <liujisi@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2010-11-02 13:14:58 +0000 |
---|---|---|
committer | liujisi@google.com <liujisi@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2010-11-02 13:14:58 +0000 |
commit | 33165fe0d5c265c92f2a67fc2b437b567c24e294 (patch) | |
tree | 52def0850ddd2e976da238d1a437fbda79c96e44 /python | |
parent | 80aa23df6c63750e8cdfdcf3996fbc37d63cac61 (diff) |
Submit recent changes from internal branch. See CHANGES.txt for more details.
Diffstat (limited to 'python')
20 files changed, 4891 insertions, 1152 deletions
diff --git a/python/google/protobuf/descriptor.py b/python/google/protobuf/descriptor.py index aa4ab969..cf609bee 100755 --- a/python/google/protobuf/descriptor.py +++ b/python/google/protobuf/descriptor.py @@ -28,16 +28,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# TODO(robinson): We probably need to provide deep-copy methods for -# descriptor types. When a FieldDescriptor is passed into -# Descriptor.__init__(), we should make a deep copy and then set -# containing_type on it. Alternatively, we could just get -# rid of containing_type (iit's not needed for reflection.py, at least). -# -# TODO(robinson): Print method? -# -# TODO(robinson): Useful __repr__? - """Descriptors essentially contain exactly the information found in a .proto file, in types that make this information accessible in Python. """ @@ -45,6 +35,13 @@ file, in types that make this information accessible in Python. __author__ = 'robinson@google.com (Will Robinson)' +from google.protobuf.internal import api_implementation + + +if api_implementation.Type() == 'cpp': + from google.protobuf.internal import cpp_message + + class Error(Exception): """Base error for this module.""" @@ -396,6 +393,13 @@ class FieldDescriptor(DescriptorBase): self.enum_type = enum_type self.is_extension = is_extension self.extension_scope = extension_scope + if api_implementation.Type() == 'cpp': + if is_extension: + self._cdescriptor = cpp_message.GetExtensionDescriptor(full_name) + else: + self._cdescriptor = cpp_message.GetFieldDescriptor(full_name) + else: + self._cdescriptor = None class EnumDescriptor(_NestedDescriptorBase): @@ -567,9 +571,13 @@ class FileDescriptor(DescriptorBase): """Constructor.""" super(FileDescriptor, self).__init__(options, 'FileOptions') + self.message_types_by_name = {} self.name = name self.package = package self.serialized_pb = serialized_pb + if (api_implementation.Type() == 'cpp' and + self.serialized_pb is not None): + cpp_message.BuildFile(self.serialized_pb) def CopyToProto(self, proto): """Copies this to a descriptor_pb2.FileDescriptorProto. diff --git a/python/google/protobuf/internal/api_implementation.py b/python/google/protobuf/internal/api_implementation.py new file mode 100755 index 00000000..b3e412e2 --- /dev/null +++ b/python/google/protobuf/internal/api_implementation.py @@ -0,0 +1,64 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# http://code.google.com/p/protobuf/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This module is the central entity that determines which implementation of the +API is used. +""" + +__author__ = 'petar@google.com (Petar Petrov)' + +import os +# This environment variable can be used to switch to a certain implementation +# of the Python API. Right now only 'python' and 'cpp' are valid values. Any +# other value will be ignored. +_implementation_type = os.getenv('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', + 'python') + + +if _implementation_type != 'python': + # For now, by default use the pure-Python implementation. + # The code below checks if the C extension is available and + # uses it if it is available. + _implementation_type = 'cpp' + ## Determine automatically which implementation to use. + #try: + # from google.protobuf.internal import cpp_message + # _implementation_type = 'cpp' + #except ImportError, e: + # _implementation_type = 'python' + + +# Usage of this function is discouraged. Clients shouldn't care which +# implementation of the API is in use. Note that there is no guarantee +# that differences between APIs will be maintained. +# Please don't use this function if possible. +def Type(): + return _implementation_type diff --git a/python/google/protobuf/internal/containers.py b/python/google/protobuf/internal/containers.py index 5cc7d6d0..097a3c26 100755 --- a/python/google/protobuf/internal/containers.py +++ b/python/google/protobuf/internal/containers.py @@ -72,9 +72,15 @@ class BaseContainer(object): # The concrete classes should define __eq__. return not self == other + def __hash__(self): + raise TypeError('unhashable object') + def __repr__(self): return repr(self._values) + def sort(self, sort_function=cmp): + self._values.sort(sort_function) + class RepeatedScalarFieldContainer(BaseContainer): @@ -198,28 +204,37 @@ class RepeatedCompositeFieldContainer(BaseContainer): super(RepeatedCompositeFieldContainer, self).__init__(message_listener) self._message_descriptor = message_descriptor - def add(self): - new_element = self._message_descriptor._concrete_class() + def add(self, **kwargs): + """Adds a new element at the end of the list and returns it. Keyword + arguments may be used to initialize the element. + """ + new_element = self._message_descriptor._concrete_class(**kwargs) new_element._SetListener(self._message_listener) self._values.append(new_element) if not self._message_listener.dirty: self._message_listener.Modified() return new_element - def MergeFrom(self, other): - """Appends the contents of another repeated field of the same type to this - one, copying each individual message. + def extend(self, elem_seq): + """Extends by appending the given sequence of elements of the same type + as this one, copying each individual message. """ message_class = self._message_descriptor._concrete_class listener = self._message_listener values = self._values - for message in other._values: + for message in elem_seq: new_element = message_class() new_element._SetListener(listener) new_element.MergeFrom(message) values.append(new_element) listener.Modified() + def MergeFrom(self, other): + """Appends the contents of another repeated field of the same type to this + one, copying each individual message. + """ + self.extend(other._values) + def __getslice__(self, start, stop): """Retrieves the subset of items from between the specified indices.""" return self._values[start:stop] diff --git a/python/google/protobuf/internal/cpp_message.py b/python/google/protobuf/internal/cpp_message.py new file mode 100755 index 00000000..3f426502 --- /dev/null +++ b/python/google/protobuf/internal/cpp_message.py @@ -0,0 +1,616 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# http://code.google.com/p/protobuf/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Contains helper functions used to create protocol message classes from +Descriptor objects at runtime backed by the protocol buffer C++ API. +""" + +__author__ = 'petar@google.com (Petar Petrov)' + +import operator +from google.protobuf.internal import _net_proto2___python +from google.protobuf import message + + +_LABEL_REPEATED = _net_proto2___python.LABEL_REPEATED +_LABEL_OPTIONAL = _net_proto2___python.LABEL_OPTIONAL +_CPPTYPE_MESSAGE = _net_proto2___python.CPPTYPE_MESSAGE +_TYPE_MESSAGE = _net_proto2___python.TYPE_MESSAGE + + +def GetDescriptorPool(): + """Creates a new DescriptorPool C++ object.""" + return _net_proto2___python.NewCDescriptorPool() + + +_pool = GetDescriptorPool() + + +def GetFieldDescriptor(full_field_name): + """Searches for a field descriptor given a full field name.""" + return _pool.FindFieldByName(full_field_name) + + +def BuildFile(content): + """Registers a new proto file in the underlying C++ descriptor pool.""" + _net_proto2___python.BuildFile(content) + + +def GetExtensionDescriptor(full_extension_name): + """Searches for extension descriptor given a full field name.""" + return _pool.FindExtensionByName(full_extension_name) + + +def NewCMessage(full_message_name): + """Creates a new C++ protocol message by its name.""" + return _net_proto2___python.NewCMessage(full_message_name) + + +def ScalarProperty(cdescriptor): + """Returns a scalar property for the given descriptor.""" + + def Getter(self): + return self._cmsg.GetScalar(cdescriptor) + + def Setter(self, value): + self._cmsg.SetScalar(cdescriptor, value) + + return property(Getter, Setter) + + +def CompositeProperty(cdescriptor, message_type): + """Returns a Python property the given composite field.""" + + def Getter(self): + sub_message = self._composite_fields.get(cdescriptor.name, None) + if sub_message is None: + cmessage = self._cmsg.NewSubMessage(cdescriptor) + sub_message = message_type._concrete_class(__cmessage=cmessage) + self._composite_fields[cdescriptor.name] = sub_message + return sub_message + + return property(Getter) + + +class RepeatedScalarContainer(object): + """Container for repeated scalar fields.""" + + __slots__ = ['_message', '_cfield_descriptor', '_cmsg'] + + def __init__(self, msg, cfield_descriptor): + self._message = msg + self._cmsg = msg._cmsg + self._cfield_descriptor = cfield_descriptor + + def append(self, value): + self._cmsg.AddRepeatedScalar( + self._cfield_descriptor, value) + + def extend(self, sequence): + for element in sequence: + self.append(element) + + def insert(self, key, value): + values = self[slice(None, None, None)] + values.insert(key, value) + self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values) + + def remove(self, value): + values = self[slice(None, None, None)] + values.remove(value) + self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values) + + def __setitem__(self, key, value): + values = self[slice(None, None, None)] + values[key] = value + self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values) + + def __getitem__(self, key): + return self._cmsg.GetRepeatedScalar(self._cfield_descriptor, key) + + def __delitem__(self, key): + self._cmsg.DeleteRepeatedField(self._cfield_descriptor, key) + + def __len__(self): + return len(self[slice(None, None, None)]) + + def __eq__(self, other): + if self is other: + return True + if not operator.isSequenceType(other): + raise TypeError( + 'Can only compare repeated scalar fields against sequences.') + # We are presumably comparing against some other sequence type. + return other == self[slice(None, None, None)] + + def __ne__(self, other): + return not self == other + + def __hash__(self): + raise TypeError('unhashable object') + + def sort(self, sort_function=cmp): + values = self[slice(None, None, None)] + values.sort(sort_function) + self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values) + + +def RepeatedScalarProperty(cdescriptor): + """Returns a Python property the given repeated scalar field.""" + + def Getter(self): + container = self._composite_fields.get(cdescriptor.name, None) + if container is None: + container = RepeatedScalarContainer(self, cdescriptor) + self._composite_fields[cdescriptor.name] = container + return container + + def Setter(self, new_value): + raise AttributeError('Assignment not allowed to repeated field ' + '"%s" in protocol message object.' % cdescriptor.name) + + doc = 'Magic attribute generated for "%s" proto field.' % cdescriptor.name + return property(Getter, Setter, doc=doc) + + +class RepeatedCompositeContainer(object): + """Container for repeated composite fields.""" + + __slots__ = ['_message', '_subclass', '_cfield_descriptor', '_cmsg'] + + def __init__(self, msg, cfield_descriptor, subclass): + self._message = msg + self._cmsg = msg._cmsg + self._subclass = subclass + self._cfield_descriptor = cfield_descriptor + + def add(self, **kwargs): + cmessage = self._cmsg.AddMessage(self._cfield_descriptor) + return self._subclass(__cmessage=cmessage, __owner=self._message, **kwargs) + + def extend(self, elem_seq): + """Extends by appending the given sequence of elements of the same type + as this one, copying each individual message. + """ + for message in elem_seq: + self.add().MergeFrom(message) + + def MergeFrom(self, other): + for message in other[:]: + self.add().MergeFrom(message) + + def __getitem__(self, key): + cmessages = self._cmsg.GetRepeatedMessage( + self._cfield_descriptor, key) + subclass = self._subclass + if not isinstance(cmessages, list): + return subclass(__cmessage=cmessages, __owner=self._message) + + return [subclass(__cmessage=m, __owner=self._message) for m in cmessages] + + def __delitem__(self, key): + self._cmsg.DeleteRepeatedField( + self._cfield_descriptor, key) + + def __len__(self): + return self._cmsg.FieldLength(self._cfield_descriptor) + + def __eq__(self, other): + """Compares the current instance with another one.""" + if self is other: + return True + if not isinstance(other, self.__class__): + raise TypeError('Can only compare repeated composite fields against ' + 'other repeated composite fields.') + messages = self[slice(None, None, None)] + other_messages = other[slice(None, None, None)] + return messages == other_messages + + def __hash__(self): + raise TypeError('unhashable object') + + def sort(self, sort_function=cmp): + messages = [] + for index in range(len(self)): + # messages[i][0] is where the i-th element of the new array has to come + # from. + # messages[i][1] is where the i-th element of the old array has to go. + messages.append([index, 0, self[index]]) + messages.sort(lambda x,y: sort_function(x[2], y[2])) + + # Remember which position each elements has to move to. + for i in range(len(messages)): + messages[messages[i][0]][1] = i + + # Apply the transposition. + for i in range(len(messages)): + from_position = messages[i][0] + if i == from_position: + continue + self._cmsg.SwapRepeatedFieldElements( + self._cfield_descriptor, i, from_position) + messages[messages[i][1]][0] = from_position + + +def RepeatedCompositeProperty(cdescriptor, message_type): + """Returns a Python property for the given repeated composite field.""" + + def Getter(self): + container = self._composite_fields.get(cdescriptor.name, None) + if container is None: + container = RepeatedCompositeContainer( + self, cdescriptor, message_type._concrete_class) + self._composite_fields[cdescriptor.name] = container + return container + + def Setter(self, new_value): + raise AttributeError('Assignment not allowed to repeated field ' + '"%s" in protocol message object.' % cdescriptor.name) + + doc = 'Magic attribute generated for "%s" proto field.' % cdescriptor.name + return property(Getter, Setter, doc=doc) + + +class ExtensionDict(object): + """Extension dictionary added to each protocol message.""" + + def __init__(self, msg): + self._message = msg + self._cmsg = msg._cmsg + self._values = {} + + def __setitem__(self, extension, value): + from google.protobuf import descriptor + if not isinstance(extension, descriptor.FieldDescriptor): + raise KeyError('Bad extension %r.' % (extension,)) + cdescriptor = extension._cdescriptor + if (cdescriptor.label != _LABEL_OPTIONAL or + cdescriptor.cpp_type == _CPPTYPE_MESSAGE): + raise TypeError('Extension %r is repeated and/or a composite type.' % ( + extension.full_name,)) + self._cmsg.SetScalar(cdescriptor, value) + self._values[extension] = value + + def __getitem__(self, extension): + from google.protobuf import descriptor + if not isinstance(extension, descriptor.FieldDescriptor): + raise KeyError('Bad extension %r.' % (extension,)) + + cdescriptor = extension._cdescriptor + if (cdescriptor.label != _LABEL_REPEATED and + cdescriptor.cpp_type != _CPPTYPE_MESSAGE): + return self._cmsg.GetScalar(cdescriptor) + + ext = self._values.get(extension, None) + if ext is not None: + return ext + + ext = self._CreateNewHandle(extension) + self._values[extension] = ext + return ext + + def ClearExtension(self, extension): + from google.protobuf import descriptor + if not isinstance(extension, descriptor.FieldDescriptor): + raise KeyError('Bad extension %r.' % (extension,)) + self._cmsg.ClearFieldByDescriptor(extension._cdescriptor) + if extension in self._values: + del self._values[extension] + + def HasExtension(self, extension): + from google.protobuf import descriptor + if not isinstance(extension, descriptor.FieldDescriptor): + raise KeyError('Bad extension %r.' % (extension,)) + return self._cmsg.HasFieldByDescriptor(extension._cdescriptor) + + def _FindExtensionByName(self, name): + """Tries to find a known extension with the specified name. + + Args: + name: Extension full name. + + Returns: + Extension field descriptor. + """ + return self._message._extensions_by_name.get(name, None) + + def _CreateNewHandle(self, extension): + cdescriptor = extension._cdescriptor + if (cdescriptor.label != _LABEL_REPEATED and + cdescriptor.cpp_type == _CPPTYPE_MESSAGE): + cmessage = self._cmsg.NewSubMessage(cdescriptor) + return extension.message_type._concrete_class(__cmessage=cmessage) + + if cdescriptor.label == _LABEL_REPEATED: + if cdescriptor.cpp_type == _CPPTYPE_MESSAGE: + return RepeatedCompositeContainer( + self._message, cdescriptor, extension.message_type._concrete_class) + else: + return RepeatedScalarContainer(self._message, cdescriptor) + # This shouldn't happen! + assert False + return None + + +def NewMessage(message_descriptor, dictionary): + """Creates a new protocol message *class*.""" + _AddClassAttributesForNestedExtensions(message_descriptor, dictionary) + _AddEnumValues(message_descriptor, dictionary) + _AddDescriptors(message_descriptor, dictionary) + + +def InitMessage(message_descriptor, cls): + """Constructs a new message instance (called before instance's __init__).""" + cls._extensions_by_name = {} + _AddInitMethod(message_descriptor, cls) + _AddMessageMethods(message_descriptor, cls) + _AddPropertiesForExtensions(message_descriptor, cls) + + +def _AddDescriptors(message_descriptor, dictionary): + """Sets up a new protocol message class dictionary. + + Args: + message_descriptor: A Descriptor instance describing this message type. + dictionary: Class dictionary to which we'll add a '__slots__' entry. + """ + dictionary['__descriptors'] = {} + for field in message_descriptor.fields: + dictionary['__descriptors'][field.name] = GetFieldDescriptor( + field.full_name) + + dictionary['__slots__'] = list(dictionary['__descriptors'].iterkeys()) + [ + '_cmsg', '_owner', '_composite_fields', 'Extensions'] + + +def _AddEnumValues(message_descriptor, dictionary): + """Sets class-level attributes for all enum fields defined in this message. + + Args: + message_descriptor: Descriptor object for this message type. + dictionary: Class dictionary that should be populated. + """ + for enum_type in message_descriptor.enum_types: + for enum_value in enum_type.values: + dictionary[enum_value.name] = enum_value.number + + +def _AddClassAttributesForNestedExtensions(message_descriptor, dictionary): + """Adds class attributes for the nested extensions.""" + extension_dict = message_descriptor.extensions_by_name + for extension_name, extension_field in extension_dict.iteritems(): + assert extension_name not in dictionary + dictionary[extension_name] = extension_field + + +def _AddInitMethod(message_descriptor, cls): + """Adds an __init__ method to cls.""" + + # Create and attach message field properties to the message class. + # This can be done just once per message class, since property setters and + # getters are passed the message instance. + # This makes message instantiation extremely fast, and at the same time it + # doesn't require the creation of property objects for each message instance, + # which saves a lot of memory. + for field in message_descriptor.fields: + field_cdescriptor = cls.__descriptors[field.name] + if field.label == _LABEL_REPEATED: + if field.cpp_type == _CPPTYPE_MESSAGE: + value = RepeatedCompositeProperty(field_cdescriptor, field.message_type) + else: + value = RepeatedScalarProperty(field_cdescriptor) + elif field.cpp_type == _CPPTYPE_MESSAGE: + value = CompositeProperty(field_cdescriptor, field.message_type) + else: + value = ScalarProperty(field_cdescriptor) + setattr(cls, field.name, value) + + # Attach a constant with the field number. + constant_name = field.name.upper() + '_FIELD_NUMBER' + setattr(cls, constant_name, field.number) + + def Init(self, **kwargs): + """Message constructor.""" + cmessage = kwargs.pop('__cmessage', None) + if cmessage is None: + self._cmsg = NewCMessage(message_descriptor.full_name) + else: + self._cmsg = cmessage + + # Keep a reference to the owner, as the owner keeps a reference to the + # underlying protocol buffer message. + owner = kwargs.pop('__owner', None) + if owner is not None: + self._owner = owner + + self.Extensions = ExtensionDict(self) + self._composite_fields = {} + + for field_name, field_value in kwargs.iteritems(): + field_cdescriptor = self.__descriptors.get(field_name, None) + if field_cdescriptor is None: + raise ValueError('Protocol message has no "%s" field.' % field_name) + if field_cdescriptor.label == _LABEL_REPEATED: + if field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE: + for val in field_value: + getattr(self, field_name).add().MergeFrom(val) + else: + getattr(self, field_name).extend(field_value) + elif field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE: + getattr(self, field_name).MergeFrom(field_value) + else: + setattr(self, field_name, field_value) + + Init.__module__ = None + Init.__doc__ = None + cls.__init__ = Init + + +def _IsMessageSetExtension(field): + """Checks if a field is a message set extension.""" + return (field.is_extension and + field.containing_type.has_options and + field.containing_type.GetOptions().message_set_wire_format and + field.type == _TYPE_MESSAGE and + field.message_type == field.extension_scope and + field.label == _LABEL_OPTIONAL) + + +def _AddMessageMethods(message_descriptor, cls): + """Adds the methods to a protocol message class.""" + if message_descriptor.is_extendable: + + def ClearExtension(self, extension): + self.Extensions.ClearExtension(extension) + + def HasExtension(self, extension): + return self.Extensions.HasExtension(extension) + + def HasField(self, field_name): + return self._cmsg.HasField(field_name) + + def ClearField(self, field_name): + if field_name in self._composite_fields: + del self._composite_fields[field_name] + self._cmsg.ClearField(field_name) + + def Clear(self): + return self._cmsg.Clear() + + def IsInitialized(self, errors=None): + if self._cmsg.IsInitialized(): + return True + if errors is not None: + errors.extend(self.FindInitializationErrors()); + return False + + def SerializeToString(self): + if not self.IsInitialized(): + raise message.EncodeError( + 'Message is missing required fields: ' + + ','.join(self.FindInitializationErrors())) + return self._cmsg.SerializeToString() + + def SerializePartialToString(self): + return self._cmsg.SerializePartialToString() + + def ParseFromString(self, serialized): + self.Clear() + self.MergeFromString(serialized) + + def MergeFromString(self, serialized): + byte_size = self._cmsg.MergeFromString(serialized) + if byte_size < 0: + raise message.DecodeError('Unable to merge from string.') + return byte_size + + def MergeFrom(self, msg): + if not isinstance(msg, cls): + raise TypeError( + "Parameter to MergeFrom() must be instance of same class.") + self._cmsg.MergeFrom(msg._cmsg) + + def CopyFrom(self, msg): + self._cmsg.CopyFrom(msg._cmsg) + + def ByteSize(self): + return self._cmsg.ByteSize() + + def SetInParent(self): + return self._cmsg.SetInParent() + + def ListFields(self): + all_fields = [] + field_list = self._cmsg.ListFields() + fields_by_name = cls.DESCRIPTOR.fields_by_name + for is_extension, field_name in field_list: + if is_extension: + extension = cls._extensions_by_name[field_name] + all_fields.append((extension, self.Extensions[extension])) + else: + field_descriptor = fields_by_name[field_name] + all_fields.append( + (field_descriptor, getattr(self, field_name))) + all_fields.sort(key=lambda item: item[0].number) + return all_fields + + def FindInitializationErrors(self): + return self._cmsg.FindInitializationErrors() + + def __str__(self): + return self._cmsg.DebugString() + + def __eq__(self, other): + if self is other: + return True + if not isinstance(other, self.__class__): + return False + return self.ListFields() == other.ListFields() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + raise TypeError('unhashable object') + + def __unicode__(self): + return text_format.MessageToString(self, as_utf8=True).decode('utf-8') + + # Attach the local methods to the message class. + for key, value in locals().copy().iteritems(): + if key not in ('key', 'value', '__builtins__', '__name__', '__doc__'): + setattr(cls, key, value) + + # Static methods: + + def RegisterExtension(extension_handle): + extension_handle.containing_type = cls.DESCRIPTOR + cls._extensions_by_name[extension_handle.full_name] = extension_handle + + if _IsMessageSetExtension(extension_handle): + # MessageSet extension. Also register under type name. + cls._extensions_by_name[ + extension_handle.message_type.full_name] = extension_handle + cls.RegisterExtension = staticmethod(RegisterExtension) + + def FromString(string): + msg = cls() + msg.MergeFromString(string) + return msg + cls.FromString = staticmethod(FromString) + + + +def _AddPropertiesForExtensions(message_descriptor, cls): + """Adds properties for all fields in this protocol message type.""" + extension_dict = message_descriptor.extensions_by_name + for extension_name, extension_field in extension_dict.iteritems(): + constant_name = extension_name.upper() + '_FIELD_NUMBER' + setattr(cls, constant_name, extension_field.number) diff --git a/python/google/protobuf/internal/decoder.py b/python/google/protobuf/internal/decoder.py index 461a30c0..55f746f5 100755 --- a/python/google/protobuf/internal/decoder.py +++ b/python/google/protobuf/internal/decoder.py @@ -86,6 +86,13 @@ from google.protobuf.internal import wire_format from google.protobuf import message +# This will overflow and thus become IEEE-754 "infinity". We would use +# "float('inf')" but it doesn't work on Windows pre-Python-2.6. +_POS_INF = 1e10000 +_NEG_INF = -_POS_INF +_NAN = _POS_INF * 0 + + # This is not for optimization, but rather to avoid conflicts with local # variables named "message". _DecodeError = message.DecodeError @@ -269,6 +276,72 @@ def _StructPackDecoder(wire_type, format): return _SimpleDecoder(wire_type, InnerDecode) +def _FloatDecoder(): + """Returns a decoder for a float field. + + This code works around a bug in struct.unpack for non-finite 32-bit + floating-point values. + """ + + local_unpack = struct.unpack + + def InnerDecode(buffer, pos): + # We expect a 32-bit value in little-endian byte order. Bit 1 is the sign + # bit, bits 2-9 represent the exponent, and bits 10-32 are the significand. + new_pos = pos + 4 + float_bytes = buffer[pos:new_pos] + + # If this value has all its exponent bits set, then it's non-finite. + # In Python 2.4, struct.unpack will convert it to a finite 64-bit value. + # To avoid that, we parse it specially. + if ((float_bytes[3] in '\x7F\xFF') + and (float_bytes[2] >= '\x80')): + # If at least one significand bit is set... + if float_bytes[0:3] != '\x00\x00\x80': + return (_NAN, new_pos) + # If sign bit is set... + if float_bytes[3] == '\xFF': + return (_NEG_INF, new_pos) + return (_POS_INF, new_pos) + + # Note that we expect someone up-stack to catch struct.error and convert + # it to _DecodeError -- this way we don't have to set up exception- + # handling blocks every time we parse one value. + result = local_unpack('<f', float_bytes)[0] + return (result, new_pos) + return _SimpleDecoder(wire_format.WIRETYPE_FIXED32, InnerDecode) + + +def _DoubleDecoder(): + """Returns a decoder for a double field. + + This code works around a bug in struct.unpack for not-a-number. + """ + + local_unpack = struct.unpack + + def InnerDecode(buffer, pos): + # We expect a 64-bit value in little-endian byte order. Bit 1 is the sign + # bit, bits 2-12 represent the exponent, and bits 13-64 are the significand. + new_pos = pos + 8 + double_bytes = buffer[pos:new_pos] + + # If this value has all its exponent bits set and at least one significand + # bit set, it's not a number. In Python 2.4, struct.unpack will treat it + # as inf or -inf. To avoid that, we treat it specially. + if ((double_bytes[7] in '\x7F\xFF') + and (double_bytes[6] >= '\xF0') + and (double_bytes[0:7] != '\x00\x00\x00\x00\x00\x00\xF0')): + return (_NAN, new_pos) + + # Note that we expect someone up-stack to catch struct.error and convert + # it to _DecodeError -- this way we don't have to set up exception- + # handling blocks every time we parse one value. + result = local_unpack('<d', double_bytes)[0] + return (result, new_pos) + return _SimpleDecoder(wire_format.WIRETYPE_FIXED64, InnerDecode) + + # -------------------------------------------------------------------- @@ -294,8 +367,8 @@ Fixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<I') Fixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<Q') SFixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<i') SFixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<q') -FloatDecoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<f') -DoubleDecoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<d') +FloatDecoder = _FloatDecoder() +DoubleDecoder = _DoubleDecoder() BoolDecoder = _ModifiedDecoder( wire_format.WIRETYPE_VARINT, _DecodeVarint, bool) diff --git a/python/google/protobuf/internal/encoder.py b/python/google/protobuf/internal/encoder.py index aa05d5b3..777975e8 100755 --- a/python/google/protobuf/internal/encoder.py +++ b/python/google/protobuf/internal/encoder.py @@ -70,6 +70,12 @@ import struct from google.protobuf.internal import wire_format +# This will overflow and thus become IEEE-754 "infinity". We would use +# "float('inf')" but it doesn't work on Windows pre-Python-2.6. +_POS_INF = 1e10000 +_NEG_INF = -_POS_INF + + def _VarintSize(value): """Compute the size of a varint value.""" if value <= 0x7f: return 1 @@ -502,6 +508,83 @@ def _StructPackEncoder(wire_type, format): return SpecificEncoder +def _FloatingPointEncoder(wire_type, format): + """Return a constructor for an encoder for float fields. + + This is like StructPackEncoder, but catches errors that may be due to + passing non-finite floating-point values to struct.pack, and makes a + second attempt to encode those values. + + Args: + wire_type: The field's wire type, for encoding tags. + format: The format string to pass to struct.pack(). + """ + + value_size = struct.calcsize(format) + if value_size == 4: + def EncodeNonFiniteOrRaise(write, value): + # Remember that the serialized form uses little-endian byte order. + if value == _POS_INF: + write('\x00\x00\x80\x7F') + elif value == _NEG_INF: + write('\x00\x00\x80\xFF') + elif value != value: # NaN + write('\x00\x00\xC0\x7F') + else: + raise + elif value_size == 8: + def EncodeNonFiniteOrRaise(write, value): + if value == _POS_INF: + write('\x00\x00\x00\x00\x00\x00\xF0\x7F') + elif value == _NEG_INF: + write('\x00\x00\x00\x00\x00\x00\xF0\xFF') + elif value != value: # NaN + write('\x00\x00\x00\x00\x00\x00\xF8\x7F') + else: + raise + else: + raise ValueError('Can\'t encode floating-point values that are ' + '%d bytes long (only 4 or 8)' % value_size) + + def SpecificEncoder(field_number, is_repeated, is_packed): + local_struct_pack = struct.pack + if is_packed: + tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED) + local_EncodeVarint = _EncodeVarint + def EncodePackedField(write, value): + write(tag_bytes) + local_EncodeVarint(write, len(value) * value_size) + for element in value: + # This try/except block is going to be faster than any code that + # we could write to check whether element is finite. + try: + write(local_struct_pack(format, element)) + except SystemError: + EncodeNonFiniteOrRaise(write, element) + return EncodePackedField + elif is_repeated: + tag_bytes = TagBytes(field_number, wire_type) + def EncodeRepeatedField(write, value): + for element in value: + write(tag_bytes) + try: + write(local_struct_pack(format, element)) + except SystemError: + EncodeNonFiniteOrRaise(write, element) + return EncodeRepeatedField + else: + tag_bytes = TagBytes(field_number, wire_type) + def EncodeField(write, value): + write(tag_bytes) + try: + write(local_struct_pack(format, value)) + except SystemError: + EncodeNonFiniteOrRaise(write, value) + return EncodeField + + return SpecificEncoder + + # ==================================================================== # Here we declare an encoder constructor for each field type. These work # very similarly to sizer constructors, described earlier. @@ -525,8 +608,8 @@ Fixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<I') Fixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<Q') SFixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<i') SFixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<q') -FloatEncoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<f') -DoubleEncoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<d') +FloatEncoder = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED32, '<f') +DoubleEncoder = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED64, '<d') def BoolEncoder(field_number, is_repeated, is_packed): diff --git a/python/google/protobuf/internal/generator_test.py b/python/google/protobuf/internal/generator_test.py index 78360b53..e4387c85 100755 --- a/python/google/protobuf/internal/generator_test.py +++ b/python/google/protobuf/internal/generator_test.py @@ -42,11 +42,12 @@ further ensures that we can use Python protocol message objects as we expect. __author__ = 'robinson@google.com (Will Robinson)' import unittest +from google.protobuf import unittest_custom_options_pb2 from google.protobuf import unittest_import_pb2 from google.protobuf import unittest_mset_pb2 from google.protobuf import unittest_pb2 from google.protobuf import unittest_no_generic_services_pb2 - +from google.protobuf import service MAX_EXTENSION = 536870912 @@ -140,6 +141,13 @@ class GeneratorTest(unittest.TestCase): proto = unittest_mset_pb2.TestMessageSet() self.assertTrue(proto.DESCRIPTOR.GetOptions().message_set_wire_format) + def testMessageWithCustomOptions(self): + proto = unittest_custom_options_pb2.TestMessageWithCustomOptions() + enum_options = proto.DESCRIPTOR.enum_types_by_name['AnEnum'].GetOptions() + self.assertTrue(enum_options is not None) + # TODO(gps): We really should test for the presense of the enum_opt1 + # extension and for its value to be set to -789. + def testNestedTypes(self): self.assertEquals( set(unittest_pb2.TestAllTypes.DESCRIPTOR.nested_types), @@ -208,12 +216,27 @@ class GeneratorTest(unittest.TestCase): self.assertFalse(unittest_pb2.DESCRIPTOR.serialized_pb is None) def testNoGenericServices(self): - # unittest_no_generic_services.proto should contain defs for everything - # except services. self.assertTrue(hasattr(unittest_no_generic_services_pb2, "TestMessage")) self.assertTrue(hasattr(unittest_no_generic_services_pb2, "FOO")) self.assertTrue(hasattr(unittest_no_generic_services_pb2, "test_extension")) - self.assertFalse(hasattr(unittest_no_generic_services_pb2, "TestService")) + + # Make sure unittest_no_generic_services_pb2 has no services subclassing + # Proto2 Service class. + if hasattr(unittest_no_generic_services_pb2, "TestService"): + self.assertFalse(issubclass(unittest_no_generic_services_pb2.TestService, + service.Service)) + + def testMessageTypesByName(self): + file_type = unittest_pb2.DESCRIPTOR + self.assertEqual( + unittest_pb2._TESTALLTYPES, + file_type.message_types_by_name[unittest_pb2._TESTALLTYPES.name]) + + # Nested messages shouldn't be included in the message_types_by_name + # dictionary (like in the C++ API). + self.assertFalse( + unittest_pb2._TESTALLTYPES_NESTEDMESSAGE.name in + file_type.message_types_by_name) if __name__ == '__main__': diff --git a/python/google/protobuf/internal/message_test.py b/python/google/protobuf/internal/message_test.py index 73a9a3a3..65174373 100755 --- a/python/google/protobuf/internal/message_test.py +++ b/python/google/protobuf/internal/message_test.py @@ -43,11 +43,25 @@ abstract interface. __author__ = 'gps@google.com (Gregory P. Smith)' +import copy +import math import unittest from google.protobuf import unittest_import_pb2 from google.protobuf import unittest_pb2 from google.protobuf.internal import test_util +# Python pre-2.6 does not have isinf() or isnan() functions, so we have +# to provide our own. +def isnan(val): + # NaN is never equal to itself. + return val != val +def isinf(val): + # Infinity times zero equals NaN. + return not isnan(val) and isnan(val * 0) +def IsPosInf(val): + return isinf(val) and (val > 0) +def IsNegInf(val): + return isinf(val) and (val < 0) class MessageTest(unittest.TestCase): @@ -57,6 +71,8 @@ class MessageTest(unittest.TestCase): golden_message.ParseFromString(golden_data) test_util.ExpectAllFieldsSet(self, golden_message) self.assertTrue(golden_message.SerializeToString() == golden_data) + golden_copy = copy.deepcopy(golden_message) + self.assertTrue(golden_copy.SerializeToString() == golden_data) def testGoldenExtensions(self): golden_data = test_util.GoldenFile('golden_message').read() @@ -66,6 +82,8 @@ class MessageTest(unittest.TestCase): test_util.SetAllExtensions(all_set) self.assertEquals(all_set, golden_message) self.assertTrue(golden_message.SerializeToString() == golden_data) + golden_copy = copy.deepcopy(golden_message) + self.assertTrue(golden_copy.SerializeToString() == golden_data) def testGoldenPackedMessage(self): golden_data = test_util.GoldenFile('golden_packed_fields_message').read() @@ -75,6 +93,8 @@ class MessageTest(unittest.TestCase): test_util.SetAllPackedFields(all_set) self.assertEquals(all_set, golden_message) self.assertTrue(all_set.SerializeToString() == golden_data) + golden_copy = copy.deepcopy(golden_message) + self.assertTrue(golden_copy.SerializeToString() == golden_data) def testGoldenPackedExtensions(self): golden_data = test_util.GoldenFile('golden_packed_fields_message').read() @@ -84,6 +104,240 @@ class MessageTest(unittest.TestCase): test_util.SetAllPackedExtensions(all_set) self.assertEquals(all_set, golden_message) self.assertTrue(all_set.SerializeToString() == golden_data) + golden_copy = copy.deepcopy(golden_message) + self.assertTrue(golden_copy.SerializeToString() == golden_data) + + def testPositiveInfinity(self): + golden_data = ('\x5D\x00\x00\x80\x7F' + '\x61\x00\x00\x00\x00\x00\x00\xF0\x7F' + '\xCD\x02\x00\x00\x80\x7F' + '\xD1\x02\x00\x00\x00\x00\x00\x00\xF0\x7F') + golden_message = unittest_pb2.TestAllTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(IsPosInf(golden_message.optional_float)) + self.assertTrue(IsPosInf(golden_message.optional_double)) + self.assertTrue(IsPosInf(golden_message.repeated_float[0])) + self.assertTrue(IsPosInf(golden_message.repeated_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testNegativeInfinity(self): + golden_data = ('\x5D\x00\x00\x80\xFF' + '\x61\x00\x00\x00\x00\x00\x00\xF0\xFF' + '\xCD\x02\x00\x00\x80\xFF' + '\xD1\x02\x00\x00\x00\x00\x00\x00\xF0\xFF') + golden_message = unittest_pb2.TestAllTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(IsNegInf(golden_message.optional_float)) + self.assertTrue(IsNegInf(golden_message.optional_double)) + self.assertTrue(IsNegInf(golden_message.repeated_float[0])) + self.assertTrue(IsNegInf(golden_message.repeated_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testNotANumber(self): + golden_data = ('\x5D\x00\x00\xC0\x7F' + '\x61\x00\x00\x00\x00\x00\x00\xF8\x7F' + '\xCD\x02\x00\x00\xC0\x7F' + '\xD1\x02\x00\x00\x00\x00\x00\x00\xF8\x7F') + golden_message = unittest_pb2.TestAllTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(isnan(golden_message.optional_float)) + self.assertTrue(isnan(golden_message.optional_double)) + self.assertTrue(isnan(golden_message.repeated_float[0])) + self.assertTrue(isnan(golden_message.repeated_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testPositiveInfinityPacked(self): + golden_data = ('\xA2\x06\x04\x00\x00\x80\x7F' + '\xAA\x06\x08\x00\x00\x00\x00\x00\x00\xF0\x7F') + golden_message = unittest_pb2.TestPackedTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(IsPosInf(golden_message.packed_float[0])) + self.assertTrue(IsPosInf(golden_message.packed_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testNegativeInfinityPacked(self): + golden_data = ('\xA2\x06\x04\x00\x00\x80\xFF' + '\xAA\x06\x08\x00\x00\x00\x00\x00\x00\xF0\xFF') + golden_message = unittest_pb2.TestPackedTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(IsNegInf(golden_message.packed_float[0])) + self.assertTrue(IsNegInf(golden_message.packed_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testNotANumberPacked(self): + golden_data = ('\xA2\x06\x04\x00\x00\xC0\x7F' + '\xAA\x06\x08\x00\x00\x00\x00\x00\x00\xF8\x7F') + golden_message = unittest_pb2.TestPackedTypes() + golden_message.ParseFromString(golden_data) + self.assertTrue(isnan(golden_message.packed_float[0])) + self.assertTrue(isnan(golden_message.packed_double[0])) + self.assertTrue(golden_message.SerializeToString() == golden_data) + + def testExtremeFloatValues(self): + message = unittest_pb2.TestAllTypes() + + # Most positive exponent, no significand bits set. + kMostPosExponentNoSigBits = math.pow(2, 127) + message.optional_float = kMostPosExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == kMostPosExponentNoSigBits) + + # Most positive exponent, one significand bit set. + kMostPosExponentOneSigBit = 1.5 * math.pow(2, 127) + message.optional_float = kMostPosExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == kMostPosExponentOneSigBit) + + # Repeat last two cases with values of same magnitude, but negative. + message.optional_float = -kMostPosExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == -kMostPosExponentNoSigBits) + + message.optional_float = -kMostPosExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == -kMostPosExponentOneSigBit) + + # Most negative exponent, no significand bits set. + kMostNegExponentNoSigBits = math.pow(2, -127) + message.optional_float = kMostNegExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == kMostNegExponentNoSigBits) + + # Most negative exponent, one significand bit set. + kMostNegExponentOneSigBit = 1.5 * math.pow(2, -127) + message.optional_float = kMostNegExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == kMostNegExponentOneSigBit) + + # Repeat last two cases with values of the same magnitude, but negative. + message.optional_float = -kMostNegExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == -kMostNegExponentNoSigBits) + + message.optional_float = -kMostNegExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_float == -kMostNegExponentOneSigBit) + + def testExtremeFloatValues(self): + message = unittest_pb2.TestAllTypes() + + # Most positive exponent, no significand bits set. + kMostPosExponentNoSigBits = math.pow(2, 1023) + message.optional_double = kMostPosExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == kMostPosExponentNoSigBits) + + # Most positive exponent, one significand bit set. + kMostPosExponentOneSigBit = 1.5 * math.pow(2, 1023) + message.optional_double = kMostPosExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == kMostPosExponentOneSigBit) + + # Repeat last two cases with values of same magnitude, but negative. + message.optional_double = -kMostPosExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == -kMostPosExponentNoSigBits) + + message.optional_double = -kMostPosExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == -kMostPosExponentOneSigBit) + + # Most negative exponent, no significand bits set. + kMostNegExponentNoSigBits = math.pow(2, -1023) + message.optional_double = kMostNegExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == kMostNegExponentNoSigBits) + + # Most negative exponent, one significand bit set. + kMostNegExponentOneSigBit = 1.5 * math.pow(2, -1023) + message.optional_double = kMostNegExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == kMostNegExponentOneSigBit) + + # Repeat last two cases with values of the same magnitude, but negative. + message.optional_double = -kMostNegExponentNoSigBits + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == -kMostNegExponentNoSigBits) + + message.optional_double = -kMostNegExponentOneSigBit + message.ParseFromString(message.SerializeToString()) + self.assertTrue(message.optional_double == -kMostNegExponentOneSigBit) + + def testSortingRepeatedScalarFieldsDefaultComparator(self): + """Check some different types with the default comparator.""" + message = unittest_pb2.TestAllTypes() + + # TODO(mattp): would testing more scalar types strengthen test? + message.repeated_int32.append(1) + message.repeated_int32.append(3) + message.repeated_int32.append(2) + message.repeated_int32.sort() + self.assertEqual(message.repeated_int32[0], 1) + self.assertEqual(message.repeated_int32[1], 2) + self.assertEqual(message.repeated_int32[2], 3) + + message.repeated_float.append(1.1) + message.repeated_float.append(1.3) + message.repeated_float.append(1.2) + message.repeated_float.sort() + self.assertAlmostEqual(message.repeated_float[0], 1.1) + self.assertAlmostEqual(message.repeated_float[1], 1.2) + self.assertAlmostEqual(message.repeated_float[2], 1.3) + + message.repeated_string.append('a') + message.repeated_string.append('c') + message.repeated_string.append('b') + message.repeated_string.sort() + self.assertEqual(message.repeated_string[0], 'a') + self.assertEqual(message.repeated_string[1], 'b') + self.assertEqual(message.repeated_string[2], 'c') + + message.repeated_bytes.append('a') + message.repeated_bytes.append('c') + message.repeated_bytes.append('b') + message.repeated_bytes.sort() + self.assertEqual(message.repeated_bytes[0], 'a') + self.assertEqual(message.repeated_bytes[1], 'b') + self.assertEqual(message.repeated_bytes[2], 'c') + + def testSortingRepeatedScalarFieldsCustomComparator(self): + """Check some different types with custom comparator.""" + message = unittest_pb2.TestAllTypes() + + message.repeated_int32.append(-3) + message.repeated_int32.append(-2) + message.repeated_int32.append(-1) + message.repeated_int32.sort(lambda x,y: cmp(abs(x), abs(y))) + self.assertEqual(message.repeated_int32[0], -1) + self.assertEqual(message.repeated_int32[1], -2) + self.assertEqual(message.repeated_int32[2], -3) + + message.repeated_string.append('aaa') + message.repeated_string.append('bb') + message.repeated_string.append('c') + message.repeated_string.sort(lambda x,y: cmp(len(x), len(y))) + self.assertEqual(message.repeated_string[0], 'c') + self.assertEqual(message.repeated_string[1], 'bb') + self.assertEqual(message.repeated_string[2], 'aaa') + + def testSortingRepeatedCompositeFieldsCustomComparator(self): + """Check passing a custom comparator to sort a repeated composite field.""" + message = unittest_pb2.TestAllTypes() + + message.repeated_nested_message.add().bb = 1 + message.repeated_nested_message.add().bb = 3 + message.repeated_nested_message.add().bb = 2 + message.repeated_nested_message.add().bb = 6 + message.repeated_nested_message.add().bb = 5 + message.repeated_nested_message.add().bb = 4 + message.repeated_nested_message.sort(lambda x,y: cmp(x.bb, y.bb)) + self.assertEqual(message.repeated_nested_message[0].bb, 1) + self.assertEqual(message.repeated_nested_message[1].bb, 2) + self.assertEqual(message.repeated_nested_message[2].bb, 3) + self.assertEqual(message.repeated_nested_message[3].bb, 4) + self.assertEqual(message.repeated_nested_message[4].bb, 5) + self.assertEqual(message.repeated_nested_message[5].bb, 6) + if __name__ == '__main__': unittest.main() diff --git a/python/google/protobuf/internal/python_message.py b/python/google/protobuf/internal/python_message.py new file mode 100755 index 00000000..66fca918 --- /dev/null +++ b/python/google/protobuf/internal/python_message.py @@ -0,0 +1,1098 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# http://code.google.com/p/protobuf/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This code is meant to work on Python 2.4 and above only. +# +# TODO(robinson): Helpers for verbose, common checks like seeing if a +# descriptor's cpp_type is CPPTYPE_MESSAGE. + +"""Contains a metaclass and helper functions used to create +protocol message classes from Descriptor objects at runtime. + +Recall that a metaclass is the "type" of a class. +(A class is to a metaclass what an instance is to a class.) + +In this case, we use the GeneratedProtocolMessageType metaclass +to inject all the useful functionality into the classes +output by the protocol compiler at compile-time. + +The upshot of all this is that the real implementation +details for ALL pure-Python protocol buffers are *here in +this file*. +""" + +__author__ = 'robinson@google.com (Will Robinson)' + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO +import struct +import weakref + +# We use "as" to avoid name collisions with variables. +from google.protobuf.internal import containers +from google.protobuf.internal import decoder +from google.protobuf.internal import encoder +from google.protobuf.internal import message_listener as message_listener_mod +from google.protobuf.internal import type_checkers +from google.protobuf.internal import wire_format +from google.protobuf import descriptor as descriptor_mod +from google.protobuf import message as message_mod +from google.protobuf import text_format + +_FieldDescriptor = descriptor_mod.FieldDescriptor + + +def NewMessage(descriptor, dictionary): + _AddClassAttributesForNestedExtensions(descriptor, dictionary) + _AddSlots(descriptor, dictionary) + + +def InitMessage(descriptor, cls): + cls._decoders_by_tag = {} + cls._extensions_by_name = {} + cls._extensions_by_number = {} + if (descriptor.has_options and + descriptor.GetOptions().message_set_wire_format): + cls._decoders_by_tag[decoder.MESSAGE_SET_ITEM_TAG] = ( + decoder.MessageSetItemDecoder(cls._extensions_by_number)) + + # Attach stuff to each FieldDescriptor for quick lookup later on. + for field in descriptor.fields: + _AttachFieldHelpers(cls, field) + + _AddEnumValues(descriptor, cls) + _AddInitMethod(descriptor, cls) + _AddPropertiesForFields(descriptor, cls) + _AddPropertiesForExtensions(descriptor, cls) + _AddStaticMethods(cls) + _AddMessageMethods(descriptor, cls) + _AddPrivateHelperMethods(cls) + + +# Stateless helpers for GeneratedProtocolMessageType below. +# Outside clients should not access these directly. +# +# I opted not to make any of these methods on the metaclass, to make it more +# clear that I'm not really using any state there and to keep clients from +# thinking that they have direct access to these construction helpers. + + +def _PropertyName(proto_field_name): + """Returns the name of the public property attribute which + clients can use to get and (in some cases) set the value + of a protocol message field. + + Args: + proto_field_name: The protocol message field name, exactly + as it appears (or would appear) in a .proto file. + """ + # TODO(robinson): Escape Python keywords (e.g., yield), and test this support. + # nnorwitz makes my day by writing: + # """ + # FYI. See the keyword module in the stdlib. This could be as simple as: + # + # if keyword.iskeyword(proto_field_name): + # return proto_field_name + "_" + # return proto_field_name + # """ + # Kenton says: The above is a BAD IDEA. People rely on being able to use + # getattr() and setattr() to reflectively manipulate field values. If we + # rename the properties, then every such user has to also make sure to apply + # the same transformation. Note that currently if you name a field "yield", + # you can still access it just fine using getattr/setattr -- it's not even + # that cumbersome to do so. + # TODO(kenton): Remove this method entirely if/when everyone agrees with my + # position. + return proto_field_name + + +def _VerifyExtensionHandle(message, extension_handle): + """Verify that the given extension handle is valid.""" + + if not isinstance(extension_handle, _FieldDescriptor): + raise KeyError('HasExtension() expects an extension handle, got: %s' % + extension_handle) + + if not extension_handle.is_extension: + raise KeyError('"%s" is not an extension.' % extension_handle.full_name) + + if extension_handle.containing_type is not message.DESCRIPTOR: + raise KeyError('Extension "%s" extends message type "%s", but this ' + 'message is of type "%s".' % + (extension_handle.full_name, + extension_handle.containing_type.full_name, + message.DESCRIPTOR.full_name)) + + +def _AddSlots(message_descriptor, dictionary): + """Adds a __slots__ entry to dictionary, containing the names of all valid + attributes for this message type. + + Args: + message_descriptor: A Descriptor instance describing this message type. + dictionary: Class dictionary to which we'll add a '__slots__' entry. + """ + dictionary['__slots__'] = ['_cached_byte_size', + '_cached_byte_size_dirty', + '_fields', + '_is_present_in_parent', + '_listener', + '_listener_for_children', + '__weakref__'] + + +def _IsMessageSetExtension(field): + return (field.is_extension and + field.containing_type.has_options and + field.containing_type.GetOptions().message_set_wire_format and + field.type == _FieldDescriptor.TYPE_MESSAGE and + field.message_type == field.extension_scope and + field.label == _FieldDescriptor.LABEL_OPTIONAL) + + +def _AttachFieldHelpers(cls, field_descriptor): + is_repeated = (field_descriptor.label == _FieldDescriptor.LABEL_REPEATED) + is_packed = (field_descriptor.has_options and + field_descriptor.GetOptions().packed) + + if _IsMessageSetExtension(field_descriptor): + field_encoder = encoder.MessageSetItemEncoder(field_descriptor.number) + sizer = encoder.MessageSetItemSizer(field_descriptor.number) + else: + field_encoder = type_checkers.TYPE_TO_ENCODER[field_descriptor.type]( + field_descriptor.number, is_repeated, is_packed) + sizer = type_checkers.TYPE_TO_SIZER[field_descriptor.type]( + field_descriptor.number, is_repeated, is_packed) + + field_descriptor._encoder = field_encoder + field_descriptor._sizer = sizer + field_descriptor._default_constructor = _DefaultValueConstructorForField( + field_descriptor) + + def AddDecoder(wiretype, is_packed): + tag_bytes = encoder.TagBytes(field_descriptor.number, wiretype) + cls._decoders_by_tag[tag_bytes] = ( + type_checkers.TYPE_TO_DECODER[field_descriptor.type]( + field_descriptor.number, is_repeated, is_packed, + field_descriptor, field_descriptor._default_constructor)) + + AddDecoder(type_checkers.FIELD_TYPE_TO_WIRE_TYPE[field_descriptor.type], + False) + + if is_repeated and wire_format.IsTypePackable(field_descriptor.type): + # To support wire compatibility of adding packed = true, add a decoder for + # packed values regardless of the field's options. + AddDecoder(wire_format.WIRETYPE_LENGTH_DELIMITED, True) + + +def _AddClassAttributesForNestedExtensions(descriptor, dictionary): + extension_dict = descriptor.extensions_by_name + for extension_name, extension_field in extension_dict.iteritems(): + assert extension_name not in dictionary + dictionary[extension_name] = extension_field + + +def _AddEnumValues(descriptor, cls): + """Sets class-level attributes for all enum fields defined in this message. + + Args: + descriptor: Descriptor object for this message type. + cls: Class we're constructing for this message type. + """ + for enum_type in descriptor.enum_types: + for enum_value in enum_type.values: + setattr(cls, enum_value.name, enum_value.number) + + +def _DefaultValueConstructorForField(field): + """Returns a function which returns a default value for a field. + + Args: + field: FieldDescriptor object for this field. + + The returned function has one argument: + message: Message instance containing this field, or a weakref proxy + of same. + + That function in turn returns a default value for this field. The default + value may refer back to |message| via a weak reference. + """ + + if field.label == _FieldDescriptor.LABEL_REPEATED: + if field.default_value != []: + raise ValueError('Repeated field default value not empty list: %s' % ( + field.default_value)) + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + # We can't look at _concrete_class yet since it might not have + # been set. (Depends on order in which we initialize the classes). + message_type = field.message_type + def MakeRepeatedMessageDefault(message): + return containers.RepeatedCompositeFieldContainer( + message._listener_for_children, field.message_type) + return MakeRepeatedMessageDefault + else: + type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type) + def MakeRepeatedScalarDefault(message): + return containers.RepeatedScalarFieldContainer( + message._listener_for_children, type_checker) + return MakeRepeatedScalarDefault + + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + # _concrete_class may not yet be initialized. + message_type = field.message_type + def MakeSubMessageDefault(message): + result = message_type._concrete_class() + result._SetListener(message._listener_for_children) + return result + return MakeSubMessageDefault + + def MakeScalarDefault(message): + return field.default_value + return MakeScalarDefault + + +def _AddInitMethod(message_descriptor, cls): + """Adds an __init__ method to cls.""" + fields = message_descriptor.fields + def init(self, **kwargs): + self._cached_byte_size = 0 + self._cached_byte_size_dirty = len(kwargs) > 0 + self._fields = {} + self._is_present_in_parent = False + self._listener = message_listener_mod.NullMessageListener() + self._listener_for_children = _Listener(self) + for field_name, field_value in kwargs.iteritems(): + field = _GetFieldByName(message_descriptor, field_name) + if field is None: + raise TypeError("%s() got an unexpected keyword argument '%s'" % + (message_descriptor.name, field_name)) + if field.label == _FieldDescriptor.LABEL_REPEATED: + copy = field._default_constructor(self) + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: # Composite + for val in field_value: + copy.add().MergeFrom(val) + else: # Scalar + copy.extend(field_value) + self._fields[field] = copy + elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + copy = field._default_constructor(self) + copy.MergeFrom(field_value) + self._fields[field] = copy + else: + setattr(self, field_name, field_value) + + init.__module__ = None + init.__doc__ = None + cls.__init__ = init + + +def _GetFieldByName(message_descriptor, field_name): + """Returns a field descriptor by field name. + + Args: + message_descriptor: A Descriptor describing all fields in message. + field_name: The name of the field to retrieve. + Returns: + The field descriptor associated with the field name. + """ + try: + return message_descriptor.fields_by_name[field_name] + except KeyError: + raise ValueError('Protocol message has no "%s" field.' % field_name) + + +def _AddPropertiesForFields(descriptor, cls): + """Adds properties for all fields in this protocol message type.""" + for field in descriptor.fields: + _AddPropertiesForField(field, cls) + + if descriptor.is_extendable: + # _ExtensionDict is just an adaptor with no state so we allocate a new one + # every time it is accessed. + cls.Extensions = property(lambda self: _ExtensionDict(self)) + + +def _AddPropertiesForField(field, cls): + """Adds a public property for a protocol message field. + Clients can use this property to get and (in the case + of non-repeated scalar fields) directly set the value + of a protocol message field. + + Args: + field: A FieldDescriptor for this field. + cls: The class we're constructing. + """ + # Catch it if we add other types that we should + # handle specially here. + assert _FieldDescriptor.MAX_CPPTYPE == 10 + + constant_name = field.name.upper() + "_FIELD_NUMBER" + setattr(cls, constant_name, field.number) + + if field.label == _FieldDescriptor.LABEL_REPEATED: + _AddPropertiesForRepeatedField(field, cls) + elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + _AddPropertiesForNonRepeatedCompositeField(field, cls) + else: + _AddPropertiesForNonRepeatedScalarField(field, cls) + + +def _AddPropertiesForRepeatedField(field, cls): + """Adds a public property for a "repeated" protocol message field. Clients + can use this property to get the value of the field, which will be either a + _RepeatedScalarFieldContainer or _RepeatedCompositeFieldContainer (see + below). + + Note that when clients add values to these containers, we perform + type-checking in the case of repeated scalar fields, and we also set any + necessary "has" bits as a side-effect. + + Args: + field: A FieldDescriptor for this field. + cls: The class we're constructing. + """ + proto_field_name = field.name + property_name = _PropertyName(proto_field_name) + + def getter(self): + field_value = self._fields.get(field) + if field_value is None: + # Construct a new object to represent this field. + field_value = field._default_constructor(self) + + # Atomically check if another thread has preempted us and, if not, swap + # in the new object we just created. If someone has preempted us, we + # take that object and discard ours. + # WARNING: We are relying on setdefault() being atomic. This is true + # in CPython but we haven't investigated others. This warning appears + # in several other locations in this file. + field_value = self._fields.setdefault(field, field_value) + return field_value + getter.__module__ = None + getter.__doc__ = 'Getter for %s.' % proto_field_name + + # We define a setter just so we can throw an exception with a more + # helpful error message. + def setter(self, new_value): + raise AttributeError('Assignment not allowed to repeated field ' + '"%s" in protocol message object.' % proto_field_name) + + doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name + setattr(cls, property_name, property(getter, setter, doc=doc)) + + +def _AddPropertiesForNonRepeatedScalarField(field, cls): + """Adds a public property for a nonrepeated, scalar protocol message field. + Clients can use this property to get and directly set the value of the field. + Note that when the client sets the value of a field by using this property, + all necessary "has" bits are set as a side-effect, and we also perform + type-checking. + + Args: + field: A FieldDescriptor for this field. + cls: The class we're constructing. + """ + proto_field_name = field.name + property_name = _PropertyName(proto_field_name) + type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type) + default_value = field.default_value + valid_values = set() + + def getter(self): + return self._fields.get(field, default_value) + getter.__module__ = None + getter.__doc__ = 'Getter for %s.' % proto_field_name + def setter(self, new_value): + type_checker.CheckValue(new_value) + self._fields[field] = new_value + # Check _cached_byte_size_dirty inline to improve performance, since scalar + # setters are called frequently. + if not self._cached_byte_size_dirty: + self._Modified() + + setter.__module__ = None + setter.__doc__ = 'Setter for %s.' % proto_field_name + + # Add a property to encapsulate the getter/setter. + doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name + setattr(cls, property_name, property(getter, setter, doc=doc)) + + +def _AddPropertiesForNonRepeatedCompositeField(field, cls): + """Adds a public property for a nonrepeated, composite protocol message field. + A composite field is a "group" or "message" field. + + Clients can use this property to get the value of the field, but cannot + assign to the property directly. + + Args: + field: A FieldDescriptor for this field. + cls: The class we're constructing. + """ + # TODO(robinson): Remove duplication with similar method + # for non-repeated scalars. + proto_field_name = field.name + property_name = _PropertyName(proto_field_name) + message_type = field.message_type + + def getter(self): + field_value = self._fields.get(field) + if field_value is None: + # Construct a new object to represent this field. + field_value = message_type._concrete_class() + field_value._SetListener(self._listener_for_children) + + # Atomically check if another thread has preempted us and, if not, swap + # in the new object we just created. If someone has preempted us, we + # take that object and discard ours. + # WARNING: We are relying on setdefault() being atomic. This is true + # in CPython but we haven't investigated others. This warning appears + # in several other locations in this file. + field_value = self._fields.setdefault(field, field_value) + return field_value + getter.__module__ = None + getter.__doc__ = 'Getter for %s.' % proto_field_name + + # We define a setter just so we can throw an exception with a more + # helpful error message. + def setter(self, new_value): + raise AttributeError('Assignment not allowed to composite field ' + '"%s" in protocol message object.' % proto_field_name) + + # Add a property to encapsulate the getter. + doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name + setattr(cls, property_name, property(getter, setter, doc=doc)) + + +def _AddPropertiesForExtensions(descriptor, cls): + """Adds properties for all fields in this protocol message type.""" + extension_dict = descriptor.extensions_by_name + for extension_name, extension_field in extension_dict.iteritems(): + constant_name = extension_name.upper() + "_FIELD_NUMBER" + setattr(cls, constant_name, extension_field.number) + + +def _AddStaticMethods(cls): + # TODO(robinson): This probably needs to be thread-safe(?) + def RegisterExtension(extension_handle): + extension_handle.containing_type = cls.DESCRIPTOR + _AttachFieldHelpers(cls, extension_handle) + + # Try to insert our extension, failing if an extension with the same number + # already exists. + actual_handle = cls._extensions_by_number.setdefault( + extension_handle.number, extension_handle) + if actual_handle is not extension_handle: + raise AssertionError( + 'Extensions "%s" and "%s" both try to extend message type "%s" with ' + 'field number %d.' % + (extension_handle.full_name, actual_handle.full_name, + cls.DESCRIPTOR.full_name, extension_handle.number)) + + cls._extensions_by_name[extension_handle.full_name] = extension_handle + + handle = extension_handle # avoid line wrapping + if _IsMessageSetExtension(handle): + # MessageSet extension. Also register under type name. + cls._extensions_by_name[ + extension_handle.message_type.full_name] = extension_handle + + cls.RegisterExtension = staticmethod(RegisterExtension) + + def FromString(s): + message = cls() + message.MergeFromString(s) + return message + cls.FromString = staticmethod(FromString) + + +def _IsPresent(item): + """Given a (FieldDescriptor, value) tuple from _fields, return true if the + value should be included in the list returned by ListFields().""" + + if item[0].label == _FieldDescriptor.LABEL_REPEATED: + return bool(item[1]) + elif item[0].cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + return item[1]._is_present_in_parent + else: + return True + + +def _AddListFieldsMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def ListFields(self): + all_fields = [item for item in self._fields.iteritems() if _IsPresent(item)] + all_fields.sort(key = lambda item: item[0].number) + return all_fields + + cls.ListFields = ListFields + + +def _AddHasFieldMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + singular_fields = {} + for field in message_descriptor.fields: + if field.label != _FieldDescriptor.LABEL_REPEATED: + singular_fields[field.name] = field + + def HasField(self, field_name): + try: + field = singular_fields[field_name] + except KeyError: + raise ValueError( + 'Protocol message has no singular "%s" field.' % field_name) + + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + value = self._fields.get(field) + return value is not None and value._is_present_in_parent + else: + return field in self._fields + cls.HasField = HasField + + +def _AddClearFieldMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def ClearField(self, field_name): + try: + field = message_descriptor.fields_by_name[field_name] + except KeyError: + raise ValueError('Protocol message has no "%s" field.' % field_name) + + if field in self._fields: + # Note: If the field is a sub-message, its listener will still point + # at us. That's fine, because the worst than can happen is that it + # will call _Modified() and invalidate our byte size. Big deal. + del self._fields[field] + + # Always call _Modified() -- even if nothing was changed, this is + # a mutating method, and thus calling it should cause the field to become + # present in the parent message. + self._Modified() + + cls.ClearField = ClearField + + +def _AddClearExtensionMethod(cls): + """Helper for _AddMessageMethods().""" + def ClearExtension(self, extension_handle): + _VerifyExtensionHandle(self, extension_handle) + + # Similar to ClearField(), above. + if extension_handle in self._fields: + del self._fields[extension_handle] + self._Modified() + cls.ClearExtension = ClearExtension + + +def _AddClearMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def Clear(self): + # Clear fields. + self._fields = {} + self._Modified() + cls.Clear = Clear + + +def _AddHasExtensionMethod(cls): + """Helper for _AddMessageMethods().""" + def HasExtension(self, extension_handle): + _VerifyExtensionHandle(self, extension_handle) + if extension_handle.label == _FieldDescriptor.LABEL_REPEATED: + raise KeyError('"%s" is repeated.' % extension_handle.full_name) + + if extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + value = self._fields.get(extension_handle) + return value is not None and value._is_present_in_parent + else: + return extension_handle in self._fields + cls.HasExtension = HasExtension + + +def _AddEqualsMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def __eq__(self, other): + if (not isinstance(other, message_mod.Message) or + other.DESCRIPTOR != self.DESCRIPTOR): + return False + + if self is other: + return True + + return self.ListFields() == other.ListFields() + + cls.__eq__ = __eq__ + + +def _AddStrMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def __str__(self): + return text_format.MessageToString(self) + cls.__str__ = __str__ + + +def _AddUnicodeMethod(unused_message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def __unicode__(self): + return text_format.MessageToString(self, as_utf8=True).decode('utf-8') + cls.__unicode__ = __unicode__ + + +def _AddSetListenerMethod(cls): + """Helper for _AddMessageMethods().""" + def SetListener(self, listener): + if listener is None: + self._listener = message_listener_mod.NullMessageListener() + else: + self._listener = listener + cls._SetListener = SetListener + + +def _BytesForNonRepeatedElement(value, field_number, field_type): + """Returns the number of bytes needed to serialize a non-repeated element. + The returned byte count includes space for tag information and any + other additional space associated with serializing value. + + Args: + value: Value we're serializing. + field_number: Field number of this value. (Since the field number + is stored as part of a varint-encoded tag, this has an impact + on the total bytes required to serialize the value). + field_type: The type of the field. One of the TYPE_* constants + within FieldDescriptor. + """ + try: + fn = type_checkers.TYPE_TO_BYTE_SIZE_FN[field_type] + return fn(field_number, value) + except KeyError: + raise message_mod.EncodeError('Unrecognized field type: %d' % field_type) + + +def _AddByteSizeMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def ByteSize(self): + if not self._cached_byte_size_dirty: + return self._cached_byte_size + + size = 0 + for field_descriptor, field_value in self.ListFields(): + size += field_descriptor._sizer(field_value) + + self._cached_byte_size = size + self._cached_byte_size_dirty = False + self._listener_for_children.dirty = False + return size + + cls.ByteSize = ByteSize + + +def _AddSerializeToStringMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def SerializeToString(self): + # Check if the message has all of its required fields set. + errors = [] + if not self.IsInitialized(): + raise message_mod.EncodeError( + 'Message is missing required fields: ' + + ','.join(self.FindInitializationErrors())) + return self.SerializePartialToString() + cls.SerializeToString = SerializeToString + + +def _AddSerializePartialToStringMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + + def SerializePartialToString(self): + out = StringIO() + self._InternalSerialize(out.write) + return out.getvalue() + cls.SerializePartialToString = SerializePartialToString + + def InternalSerialize(self, write_bytes): + for field_descriptor, field_value in self.ListFields(): + field_descriptor._encoder(write_bytes, field_value) + cls._InternalSerialize = InternalSerialize + + +def _AddMergeFromStringMethod(message_descriptor, cls): + """Helper for _AddMessageMethods().""" + def MergeFromString(self, serialized): + length = len(serialized) + try: + if self._InternalParse(serialized, 0, length) != length: + # The only reason _InternalParse would return early is if it + # encountered an end-group tag. + raise message_mod.DecodeError('Unexpected end-group tag.') + except IndexError: + raise message_mod.DecodeError('Truncated message.') + except struct.error, e: + raise message_mod.DecodeError(e) + return length # Return this for legacy reasons. + cls.MergeFromString = MergeFromString + + local_ReadTag = decoder.ReadTag + local_SkipField = decoder.SkipField + decoders_by_tag = cls._decoders_by_tag + + def InternalParse(self, buffer, pos, end): + self._Modified() + field_dict = self._fields + while pos != end: + (tag_bytes, new_pos) = local_ReadTag(buffer, pos) + field_decoder = decoders_by_tag.get(tag_bytes) + if field_decoder is None: + new_pos = local_SkipField(buffer, new_pos, end, tag_bytes) + if new_pos == -1: + return pos + pos = new_pos + else: + pos = field_decoder(buffer, new_pos, end, self, field_dict) + return pos + cls._InternalParse = InternalParse + + +def _AddIsInitializedMethod(message_descriptor, cls): + """Adds the IsInitialized and FindInitializationError methods to the + protocol message class.""" + + required_fields = [field for field in message_descriptor.fields + if field.label == _FieldDescriptor.LABEL_REQUIRED] + + def IsInitialized(self, errors=None): + """Checks if all required fields of a message are set. + + Args: + errors: A list which, if provided, will be populated with the field + paths of all missing required fields. + + Returns: + True iff the specified message has all required fields set. + """ + + # Performance is critical so we avoid HasField() and ListFields(). + + for field in required_fields: + if (field not in self._fields or + (field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE and + not self._fields[field]._is_present_in_parent)): + if errors is not None: + errors.extend(self.FindInitializationErrors()) + return False + + for field, value in self._fields.iteritems(): + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + if field.label == _FieldDescriptor.LABEL_REPEATED: + for element in value: + if not element.IsInitialized(): + if errors is not None: + errors.extend(self.FindInitializationErrors()) + return False + elif value._is_present_in_parent and not value.IsInitialized(): + if errors is not None: + errors.extend(self.FindInitializationErrors()) + return False + + return True + + cls.IsInitialized = IsInitialized + + def FindInitializationErrors(self): + """Finds required fields which are not initialized. + + Returns: + A list of strings. Each string is a path to an uninitialized field from + the top-level message, e.g. "foo.bar[5].baz". + """ + + errors = [] # simplify things + + for field in required_fields: + if not self.HasField(field.name): + errors.append(field.name) + + for field, value in self.ListFields(): + if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + if field.is_extension: + name = "(%s)" % field.full_name + else: + name = field.name + + if field.label == _FieldDescriptor.LABEL_REPEATED: + for i in xrange(len(value)): + element = value[i] + prefix = "%s[%d]." % (name, i) + sub_errors = element.FindInitializationErrors() + errors += [ prefix + error for error in sub_errors ] + else: + prefix = name + "." + sub_errors = value.FindInitializationErrors() + errors += [ prefix + error for error in sub_errors ] + + return errors + + cls.FindInitializationErrors = FindInitializationErrors + + +def _AddMergeFromMethod(cls): + LABEL_REPEATED = _FieldDescriptor.LABEL_REPEATED + CPPTYPE_MESSAGE = _FieldDescriptor.CPPTYPE_MESSAGE + + def MergeFrom(self, msg): + if not isinstance(msg, cls): + raise TypeError( + "Parameter to MergeFrom() must be instance of same class.") + + assert msg is not self + self._Modified() + + fields = self._fields + + for field, value in msg._fields.iteritems(): + if field.label == LABEL_REPEATED: + field_value = fields.get(field) + if field_value is None: + # Construct a new object to represent this field. + field_value = field._default_constructor(self) + fields[field] = field_value + field_value.MergeFrom(value) + elif field.cpp_type == CPPTYPE_MESSAGE: + if value._is_present_in_parent: + field_value = fields.get(field) + if field_value is None: + # Construct a new object to represent this field. + field_value = field._default_constructor(self) + fields[field] = field_value + field_value.MergeFrom(value) + else: + self._fields[field] = value + cls.MergeFrom = MergeFrom + + +def _AddMessageMethods(message_descriptor, cls): + """Adds implementations of all Message methods to cls.""" + _AddListFieldsMethod(message_descriptor, cls) + _AddHasFieldMethod(message_descriptor, cls) + _AddClearFieldMethod(message_descriptor, cls) + if message_descriptor.is_extendable: + _AddClearExtensionMethod(cls) + _AddHasExtensionMethod(cls) + _AddClearMethod(message_descriptor, cls) + _AddEqualsMethod(message_descriptor, cls) + _AddStrMethod(message_descriptor, cls) + _AddUnicodeMethod(message_descriptor, cls) + _AddSetListenerMethod(cls) + _AddByteSizeMethod(message_descriptor, cls) + _AddSerializeToStringMethod(message_descriptor, cls) + _AddSerializePartialToStringMethod(message_descriptor, cls) + _AddMergeFromStringMethod(message_descriptor, cls) + _AddIsInitializedMethod(message_descriptor, cls) + _AddMergeFromMethod(cls) + + +def _AddPrivateHelperMethods(cls): + """Adds implementation of private helper methods to cls.""" + + def Modified(self): + """Sets the _cached_byte_size_dirty bit to true, + and propagates this to our listener iff this was a state change. + """ + + # Note: Some callers check _cached_byte_size_dirty before calling + # _Modified() as an extra optimization. So, if this method is ever + # changed such that it does stuff even when _cached_byte_size_dirty is + # already true, the callers need to be updated. + if not self._cached_byte_size_dirty: + self._cached_byte_size_dirty = True + self._listener_for_children.dirty = True + self._is_present_in_parent = True + self._listener.Modified() + + cls._Modified = Modified + cls.SetInParent = Modified + + +class _Listener(object): + + """MessageListener implementation that a parent message registers with its + child message. + + In order to support semantics like: + + foo.bar.baz.qux = 23 + assert foo.HasField('bar') + + ...child objects must have back references to their parents. + This helper class is at the heart of this support. + """ + + def __init__(self, parent_message): + """Args: + parent_message: The message whose _Modified() method we should call when + we receive Modified() messages. + """ + # This listener establishes a back reference from a child (contained) object + # to its parent (containing) object. We make this a weak reference to avoid + # creating cyclic garbage when the client finishes with the 'parent' object + # in the tree. + if isinstance(parent_message, weakref.ProxyType): + self._parent_message_weakref = parent_message + else: + self._parent_message_weakref = weakref.proxy(parent_message) + + # As an optimization, we also indicate directly on the listener whether + # or not the parent message is dirty. This way we can avoid traversing + # up the tree in the common case. + self.dirty = False + + def Modified(self): + if self.dirty: + return + try: + # Propagate the signal to our parents iff this is the first field set. + self._parent_message_weakref._Modified() + except ReferenceError: + # We can get here if a client has kept a reference to a child object, + # and is now setting a field on it, but the child's parent has been + # garbage-collected. This is not an error. + pass + + +# TODO(robinson): Move elsewhere? This file is getting pretty ridiculous... +# TODO(robinson): Unify error handling of "unknown extension" crap. +# TODO(robinson): Support iteritems()-style iteration over all +# extensions with the "has" bits turned on? +class _ExtensionDict(object): + + """Dict-like container for supporting an indexable "Extensions" + field on proto instances. + + Note that in all cases we expect extension handles to be + FieldDescriptors. + """ + + def __init__(self, extended_message): + """extended_message: Message instance for which we are the Extensions dict. + """ + + self._extended_message = extended_message + + def __getitem__(self, extension_handle): + """Returns the current value of the given extension handle.""" + + _VerifyExtensionHandle(self._extended_message, extension_handle) + + result = self._extended_message._fields.get(extension_handle) + if result is not None: + return result + + if extension_handle.label == _FieldDescriptor.LABEL_REPEATED: + result = extension_handle._default_constructor(self._extended_message) + elif extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: + result = extension_handle.message_type._concrete_class() + try: + result._SetListener(self._extended_message._listener_for_children) + except ReferenceError: + pass + else: + # Singular scalar -- just return the default without inserting into the + # dict. + return extension_handle.default_value + + # Atomically check if another thread has preempted us and, if not, swap + # in the new object we just created. If someone has preempted us, we + # take that object and discard ours. + # WARNING: We are relying on setdefault() being atomic. This is true + # in CPython but we haven't investigated others. This warning appears + # in several other locations in this file. + result = self._extended_message._fields.setdefault( + extension_handle, result) + + return result + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + + my_fields = self._extended_message.ListFields() + other_fields = other._extended_message.ListFields() + + # Get rid of non-extension fields. + my_fields = [ field for field in my_fields if field.is_extension ] + other_fields = [ field for field in other_fields if field.is_extension ] + + return my_fields == other_fields + + def __ne__(self, other): + return not self == other + + def __hash__(self): + raise TypeError('unhashable object') + + # Note that this is only meaningful for non-repeated, scalar extension + # fields. Note also that we may have to call _Modified() when we do + # successfully set a field this way, to set any necssary "has" bits in the + # ancestors of the extended message. + def __setitem__(self, extension_handle, value): + """If extension_handle specifies a non-repeated, scalar extension + field, sets the value of that field. + """ + + _VerifyExtensionHandle(self._extended_message, extension_handle) + + if (extension_handle.label == _FieldDescriptor.LABEL_REPEATED or + extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE): + raise TypeError( + 'Cannot assign to extension "%s" because it is a repeated or ' + 'composite type.' % extension_handle.full_name) + + # It's slightly wasteful to lookup the type checker each time, + # but we expect this to be a vanishingly uncommon case anyway. + type_checker = type_checkers.GetTypeChecker( + extension_handle.cpp_type, extension_handle.type) + type_checker.CheckValue(value) + self._extended_message._fields[extension_handle] = value + self._extended_message._Modified() + + def _FindExtensionByName(self, name): + """Tries to find a known extension with the specified name. + + Args: + name: Extension full name. + + Returns: + Extension field descriptor. + """ + return self._extended_message._extensions_by_name.get(name, None) diff --git a/python/google/protobuf/internal/reflection_test.py b/python/google/protobuf/internal/reflection_test.py index 54eeebe6..7b9d3398 100755 --- a/python/google/protobuf/internal/reflection_test.py +++ b/python/google/protobuf/internal/reflection_test.py @@ -41,8 +41,6 @@ import operator import struct import unittest -# TODO(robinson): When we split this test in two, only some of these imports -# will be necessary in each test. from google.protobuf import unittest_import_pb2 from google.protobuf import unittest_mset_pb2 from google.protobuf import unittest_pb2 @@ -50,6 +48,7 @@ from google.protobuf import descriptor_pb2 from google.protobuf import descriptor from google.protobuf import message from google.protobuf import reflection +from google.protobuf.internal import api_implementation from google.protobuf.internal import more_extensions_pb2 from google.protobuf.internal import more_messages_pb2 from google.protobuf.internal import wire_format @@ -104,10 +103,10 @@ class _MiniDecoder(object): class ReflectionTest(unittest.TestCase): - def assertIs(self, values, others): + def assertListsEqual(self, values, others): self.assertEqual(len(values), len(others)) for i in range(len(values)): - self.assertTrue(values[i] is others[i]) + self.assertEqual(values[i], others[i]) def testScalarConstructor(self): # Constructor with only scalar types should succeed. @@ -201,15 +200,24 @@ class ReflectionTest(unittest.TestCase): list(proto.repeated_foreign_message)) def testConstructorTypeError(self): - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, optional_int32="foo") - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, optional_string=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, optional_nested_message=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_int32=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_int32=["foo"]) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_string=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_string=[1234]) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=1234) - self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=[1234]) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, optional_int32="foo") + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, optional_string=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, optional_nested_message=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_int32=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_int32=["foo"]) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_string=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_string=[1234]) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=1234) + self.assertRaises( + TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=[1234]) def testConstructorInvalidatesCachedByteSize(self): message = unittest_pb2.TestAllTypes(optional_int32 = 12) @@ -311,11 +319,14 @@ class ReflectionTest(unittest.TestCase): self.assertEqual(0, getattr(composite_field, scalar_field_name)) # Finally, ensure that modifications to the old composite field object - # don't have any effect on the parent. + # don't have any effect on the parent. Possible only with the pure-python + # implementation of the API. # # (NOTE that when we clear the composite field in the parent, we actually # don't recursively clear down the tree. Instead, we just disconnect the # cleared composite from the tree.) + if api_implementation.Type() != 'python': + return self.assertTrue(old_composite_field is not composite_field) setattr(old_composite_field, scalar_field_name, new_val) self.assertTrue(not composite_field.HasField(scalar_field_name)) @@ -337,6 +348,8 @@ class ReflectionTest(unittest.TestCase): nested.bb = 23 def testDisconnectingNestedMessageBeforeSettingField(self): + if api_implementation.Type() != 'python': + return proto = unittest_pb2.TestAllTypes() nested = proto.optional_nested_message proto.ClearField('optional_nested_message') # Should disconnect from parent @@ -526,7 +539,6 @@ class ReflectionTest(unittest.TestCase): # proto.nonexistent_field = 23 should fail as well. self.assertRaises(AttributeError, setattr, proto, 'nonexistent_field', 23) - # TODO(robinson): Add type-safety check for enums. def testSingleScalarTypeSafety(self): proto = unittest_pb2.TestAllTypes() self.assertRaises(TypeError, setattr, proto, 'optional_int32', 1.1) @@ -538,7 +550,9 @@ class ReflectionTest(unittest.TestCase): def TestMinAndMaxIntegers(field_name, expected_min, expected_max): pb = unittest_pb2.TestAllTypes() setattr(pb, field_name, expected_min) + self.assertEqual(expected_min, getattr(pb, field_name)) setattr(pb, field_name, expected_max) + self.assertEqual(expected_max, getattr(pb, field_name)) self.assertRaises(ValueError, setattr, pb, field_name, expected_min - 1) self.assertRaises(ValueError, setattr, pb, field_name, expected_max + 1) @@ -546,7 +560,33 @@ class ReflectionTest(unittest.TestCase): TestMinAndMaxIntegers('optional_uint32', 0, 0xffffffff) TestMinAndMaxIntegers('optional_int64', -(1 << 63), (1 << 63) - 1) TestMinAndMaxIntegers('optional_uint64', 0, 0xffffffffffffffff) - TestMinAndMaxIntegers('optional_nested_enum', -(1 << 31), (1 << 31) - 1) + + pb = unittest_pb2.TestAllTypes() + pb.optional_nested_enum = 1 + self.assertEqual(1, pb.optional_nested_enum) + + # Invalid enum values. + pb.optional_nested_enum = 0 + self.assertEqual(0, pb.optional_nested_enum) + + bytes_size_before = pb.ByteSize() + + pb.optional_nested_enum = 4 + self.assertEqual(4, pb.optional_nested_enum) + + pb.optional_nested_enum = 0 + self.assertEqual(0, pb.optional_nested_enum) + + # Make sure that setting the same enum field doesn't just add unknown + # fields (but overwrites them). + self.assertEqual(bytes_size_before, pb.ByteSize()) + + # Is the invalid value preserved after serialization? + serialized = pb.SerializeToString() + pb2 = unittest_pb2.TestAllTypes() + pb2.ParseFromString(serialized) + self.assertEqual(0, pb2.optional_nested_enum) + self.assertEqual(pb, pb2) def testRepeatedScalarTypeSafety(self): proto = unittest_pb2.TestAllTypes() @@ -560,11 +600,19 @@ class ReflectionTest(unittest.TestCase): self.assertRaises(IndexError, proto.repeated_int32.__setitem__, 500, 23) self.assertRaises(TypeError, proto.repeated_int32.__setitem__, 0, 'abc') + # Repeated enums tests. + #proto.repeated_nested_enum.append(0) + def testSingleScalarGettersAndSetters(self): proto = unittest_pb2.TestAllTypes() self.assertEqual(0, proto.optional_int32) proto.optional_int32 = 1 self.assertEqual(1, proto.optional_int32) + + proto.optional_uint64 = 0xffffffffffff + self.assertEqual(0xffffffffffff, proto.optional_uint64) + proto.optional_uint64 = 0xffffffffffffffff + self.assertEqual(0xffffffffffffffff, proto.optional_uint64) # TODO(robinson): Test all other scalar field types. def testSingleScalarClearField(self): @@ -645,11 +693,38 @@ class ReflectionTest(unittest.TestCase): del proto.repeated_int32[2:] self.assertEqual([5, 35], proto.repeated_int32) + # Test extending. + proto.repeated_int32.extend([3, 13]) + self.assertEqual([5, 35, 3, 13], proto.repeated_int32) + # Test clearing. proto.ClearField('repeated_int32') self.assertTrue(not proto.repeated_int32) self.assertEqual(0, len(proto.repeated_int32)) + proto.repeated_int32.append(1) + self.assertEqual(1, proto.repeated_int32[-1]) + # Test assignment to a negative index. + proto.repeated_int32[-1] = 2 + self.assertEqual(2, proto.repeated_int32[-1]) + + # Test deletion at negative indices. + proto.repeated_int32[:] = [0, 1, 2, 3] + del proto.repeated_int32[-1] + self.assertEqual([0, 1, 2], proto.repeated_int32) + + del proto.repeated_int32[-2] + self.assertEqual([0, 2], proto.repeated_int32) + + self.assertRaises(IndexError, proto.repeated_int32.__delitem__, -3) + self.assertRaises(IndexError, proto.repeated_int32.__delitem__, 300) + + del proto.repeated_int32[-2:-1] + self.assertEqual([2], proto.repeated_int32) + + del proto.repeated_int32[100:10000] + self.assertEqual([2], proto.repeated_int32) + def testRepeatedScalarsRemove(self): proto = unittest_pb2.TestAllTypes() @@ -687,7 +762,7 @@ class ReflectionTest(unittest.TestCase): m1 = proto.repeated_nested_message.add() self.assertTrue(proto.repeated_nested_message) self.assertEqual(2, len(proto.repeated_nested_message)) - self.assertIs([m0, m1], proto.repeated_nested_message) + self.assertListsEqual([m0, m1], proto.repeated_nested_message) self.assertTrue(isinstance(m0, unittest_pb2.TestAllTypes.NestedMessage)) # Test out-of-bounds indices. @@ -706,32 +781,57 @@ class ReflectionTest(unittest.TestCase): m2 = proto.repeated_nested_message.add() m3 = proto.repeated_nested_message.add() m4 = proto.repeated_nested_message.add() - self.assertIs([m1, m2, m3], proto.repeated_nested_message[1:4]) - self.assertIs([m0, m1, m2, m3, m4], proto.repeated_nested_message[:]) + self.assertListsEqual( + [m1, m2, m3], proto.repeated_nested_message[1:4]) + self.assertListsEqual( + [m0, m1, m2, m3, m4], proto.repeated_nested_message[:]) + self.assertListsEqual( + [m0, m1], proto.repeated_nested_message[:2]) + self.assertListsEqual( + [m2, m3, m4], proto.repeated_nested_message[2:]) + self.assertEqual( + m0, proto.repeated_nested_message[0]) + self.assertListsEqual( + [m0], proto.repeated_nested_message[:1]) # Test that we can use the field as an iterator. result = [] for i in proto.repeated_nested_message: result.append(i) - self.assertIs([m0, m1, m2, m3, m4], result) + self.assertListsEqual([m0, m1, m2, m3, m4], result) # Test single deletion. del proto.repeated_nested_message[2] - self.assertIs([m0, m1, m3, m4], proto.repeated_nested_message) + self.assertListsEqual([m0, m1, m3, m4], proto.repeated_nested_message) # Test slice deletion. del proto.repeated_nested_message[2:] - self.assertIs([m0, m1], proto.repeated_nested_message) + self.assertListsEqual([m0, m1], proto.repeated_nested_message) + + # Test extending. + n1 = unittest_pb2.TestAllTypes.NestedMessage(bb=1) + n2 = unittest_pb2.TestAllTypes.NestedMessage(bb=2) + proto.repeated_nested_message.extend([n1,n2]) + self.assertEqual(4, len(proto.repeated_nested_message)) + self.assertEqual(n1, proto.repeated_nested_message[2]) + self.assertEqual(n2, proto.repeated_nested_message[3]) # Test clearing. proto.ClearField('repeated_nested_message') self.assertTrue(not proto.repeated_nested_message) self.assertEqual(0, len(proto.repeated_nested_message)) + # Test constructing an element while adding it. + proto.repeated_nested_message.add(bb=23) + self.assertEqual(1, len(proto.repeated_nested_message)) + self.assertEqual(23, proto.repeated_nested_message[0].bb) + def testHandWrittenReflection(self): - # TODO(robinson): We probably need a better way to specify - # protocol types by hand. But then again, this isn't something - # we expect many people to do. Hmm. + # Hand written extensions are only supported by the pure-Python + # implementation of the API. + if api_implementation.Type() != 'python': + return + FieldDescriptor = descriptor.FieldDescriptor foo_field_descriptor = FieldDescriptor( name='foo_field', full_name='MyProto.foo_field', @@ -894,7 +994,7 @@ class ReflectionTest(unittest.TestCase): self.assertTrue(not toplevel.HasField('submessage')) foreign = toplevel.submessage.Extensions[ more_extensions_pb2.repeated_message_extension].add() - self.assertTrue(foreign is toplevel.submessage.Extensions[ + self.assertEqual(foreign, toplevel.submessage.Extensions[ more_extensions_pb2.repeated_message_extension][0]) self.assertTrue(toplevel.HasField('submessage')) @@ -997,6 +1097,12 @@ class ReflectionTest(unittest.TestCase): self.assertEqual(123, proto2.repeated_nested_message[1].bb) self.assertEqual(321, proto2.repeated_nested_message[2].bb) + proto3 = unittest_pb2.TestAllTypes() + proto3.repeated_nested_message.MergeFrom(proto2.repeated_nested_message) + self.assertEqual(999, proto3.repeated_nested_message[0].bb) + self.assertEqual(123, proto3.repeated_nested_message[1].bb) + self.assertEqual(321, proto3.repeated_nested_message[2].bb) + def testMergeFromAllFields(self): # With all fields set. proto1 = unittest_pb2.TestAllTypes() @@ -1126,6 +1232,15 @@ class ReflectionTest(unittest.TestCase): self.assertEqual(2, proto1.optional_int32) self.assertEqual('important-text', proto1.optional_string) + def testCopyFromBadType(self): + # The python implementation doesn't raise an exception in this + # case. In theory it should. + if api_implementation.Type() == 'python': + return + proto1 = unittest_pb2.TestAllTypes() + proto2 = unittest_pb2.TestAllExtensions() + self.assertRaises(TypeError, proto1.CopyFrom, proto2) + def testClear(self): proto = unittest_pb2.TestAllTypes() test_util.SetAllFields(proto) @@ -1231,9 +1346,10 @@ class ReflectionTest(unittest.TestCase): proto.optional_string = str('Testing') self.assertEqual(proto.optional_string, unicode('Testing')) - # Values of type 'str' are also accepted as long as they can be encoded in - # UTF-8. - self.assertEqual(type(proto.optional_string), str) + if api_implementation.Type() == 'python': + # Values of type 'str' are also accepted as long as they can be + # encoded in UTF-8. + self.assertEqual(type(proto.optional_string), str) # Try to assign a 'str' value which contains bytes that aren't 7-bit ASCII. self.assertRaises(ValueError, @@ -1271,7 +1387,7 @@ class ReflectionTest(unittest.TestCase): # Check that the type_id is the same as the tag ID in the .proto file. self.assertEqual(raw.item[0].type_id, 1547769) - # Check the actually bytes on the wire. + # Check the actual bytes on the wire. self.assertTrue( raw.item[0].message.endswith(test_utf8_bytes)) message2.MergeFromString(raw.item[0].message) @@ -1279,10 +1395,23 @@ class ReflectionTest(unittest.TestCase): self.assertEqual(type(message2.str), unicode) self.assertEqual(message2.str, test_utf8) - # How about if the bytes on the wire aren't a valid UTF-8 encoded string. + # The pure Python API throws an exception on MergeFromString(), + # if any of the string fields of the message can't be UTF-8 decoded. + # The C++ implementation of the API has no way to check that on + # MergeFromString and thus has no way to throw the exception. + # + # The pure Python API always returns objects of type 'unicode' (UTF-8 + # encoded), or 'str' (in 7 bit ASCII). bytes = raw.item[0].message.replace( test_utf8_bytes, len(test_utf8_bytes) * '\xff') - self.assertRaises(UnicodeDecodeError, message2.MergeFromString, bytes) + + unicode_decode_failed = False + try: + message2.MergeFromString(bytes) + except UnicodeDecodeError, e: + unicode_decode_failed = True + string_field = message2.str + self.assertTrue(unicode_decode_failed or type(string_field) == str) def testEmptyNestedMessage(self): proto = unittest_pb2.TestAllTypes() @@ -1325,6 +1454,9 @@ class TestAllTypesEqualityTest(unittest.TestCase): self.first_proto = unittest_pb2.TestAllTypes() self.second_proto = unittest_pb2.TestAllTypes() + def testNotHashable(self): + self.assertRaises(TypeError, hash, self.first_proto) + def testSelfEquality(self): self.assertEqual(self.first_proto, self.first_proto) @@ -1342,6 +1474,9 @@ class FullProtosEqualityTest(unittest.TestCase): test_util.SetAllFields(self.first_proto) test_util.SetAllFields(self.second_proto) + def testNotHashable(self): + self.assertRaises(TypeError, hash, self.first_proto) + def testNoneNotEqual(self): self.assertNotEqual(self.first_proto, None) self.assertNotEqual(None, self.second_proto) @@ -1410,9 +1545,6 @@ class FullProtosEqualityTest(unittest.TestCase): self.first_proto.ClearField('optional_nested_message') self.second_proto.optional_nested_message.ClearField('bb') self.assertNotEqual(self.first_proto, self.second_proto) - # TODO(robinson): Replace next two lines with method - # to set the "has" bit without changing the value, - # if/when such a method exists. self.first_proto.optional_nested_message.bb = 0 self.first_proto.optional_nested_message.ClearField('bb') self.assertEqual(self.first_proto, self.second_proto) @@ -1477,6 +1609,14 @@ class ByteSizeTest(unittest.TestCase): def testEmptyMessage(self): self.assertEqual(0, self.proto.ByteSize()) + def testSizedOnKwargs(self): + # Use a separate message to ensure testing right after creation. + proto = unittest_pb2.TestAllTypes() + self.assertEqual(0, proto.ByteSize()) + proto_kwargs = unittest_pb2.TestAllTypes(optional_int64 = 1) + # One byte for the tag, one to encode varint 1. + self.assertEqual(2, proto_kwargs.ByteSize()) + def testVarints(self): def Test(i, expected_varint_size): self.proto.Clear() @@ -1668,10 +1808,13 @@ class ByteSizeTest(unittest.TestCase): self.assertEqual(3, self.proto.ByteSize()) self.proto.ClearField('optional_foreign_message') self.assertEqual(0, self.proto.ByteSize()) - child = self.proto.optional_foreign_message - self.proto.ClearField('optional_foreign_message') - child.c = 128 - self.assertEqual(0, self.proto.ByteSize()) + + if api_implementation.Type() == 'python': + # This is only possible in pure-Python implementation of the API. + child = self.proto.optional_foreign_message + self.proto.ClearField('optional_foreign_message') + child.c = 128 + self.assertEqual(0, self.proto.ByteSize()) # Test within extension. extension = more_extensions_pb2.optional_message_extension @@ -1737,7 +1880,6 @@ class ByteSizeTest(unittest.TestCase): self.assertEqual(19, self.packed_extended_proto.ByteSize()) -# TODO(robinson): We need cross-language serialization consistency tests. # Issues to be sure to cover include: # * Handling of unrecognized tags ("uninterpreted_bytes"). # * Handling of MessageSets. @@ -1792,6 +1934,10 @@ class SerializationTest(unittest.TestCase): self.assertEqual(first_proto, second_proto) def testParseTruncated(self): + # This test is only applicable for the Python implementation of the API. + if api_implementation.Type() != 'python': + return + first_proto = unittest_pb2.TestAllTypes() test_util.SetAllFields(first_proto) serialized = first_proto.SerializeToString() diff --git a/python/google/protobuf/internal/text_format_test.py b/python/google/protobuf/internal/text_format_test.py index e0991cb1..a3547782 100755 --- a/python/google/protobuf/internal/text_format_test.py +++ b/python/google/protobuf/internal/text_format_test.py @@ -35,6 +35,7 @@ __author__ = 'kenton@google.com (Kenton Varda)' import difflib +import re import unittest from google.protobuf import text_format @@ -95,12 +96,13 @@ class TextFormatTest(unittest.TestCase): def testPrintExotic(self): message = unittest_pb2.TestAllTypes() - message.repeated_int64.append(-9223372036854775808); - message.repeated_uint64.append(18446744073709551615); - message.repeated_double.append(123.456); - message.repeated_double.append(1.23e22); - message.repeated_double.append(1.23e-18); - message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'\"'); + message.repeated_int64.append(-9223372036854775808) + message.repeated_uint64.append(18446744073709551615) + message.repeated_double.append(123.456) + message.repeated_double.append(1.23e22) + message.repeated_double.append(1.23e-18) + message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'"') + message.repeated_string.append(u'\u00fc\ua71f') self.CompareToGoldenText( self.RemoveRedundantZeros(text_format.MessageToString(message)), 'repeated_int64: -9223372036854775808\n' @@ -109,7 +111,95 @@ class TextFormatTest(unittest.TestCase): 'repeated_double: 1.23e+22\n' 'repeated_double: 1.23e-18\n' 'repeated_string: ' - '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n') + '"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\""\n' + 'repeated_string: "\\303\\274\\352\\234\\237"\n') + + def testPrintNestedMessageAsOneLine(self): + message = unittest_pb2.TestAllTypes() + msg = message.repeated_nested_message.add() + msg.bb = 42; + self.CompareToGoldenText( + text_format.MessageToString(message, as_one_line=True), + 'repeated_nested_message { bb: 42 }') + + def testPrintRepeatedFieldsAsOneLine(self): + message = unittest_pb2.TestAllTypes() + message.repeated_int32.append(1) + message.repeated_int32.append(1) + message.repeated_int32.append(3) + message.repeated_string.append("Google") + message.repeated_string.append("Zurich") + self.CompareToGoldenText( + text_format.MessageToString(message, as_one_line=True), + 'repeated_int32: 1 repeated_int32: 1 repeated_int32: 3 ' + 'repeated_string: "Google" repeated_string: "Zurich"') + + def testPrintNestedNewLineInStringAsOneLine(self): + message = unittest_pb2.TestAllTypes() + message.optional_string = "a\nnew\nline" + self.CompareToGoldenText( + text_format.MessageToString(message, as_one_line=True), + 'optional_string: "a\\nnew\\nline"') + + def testPrintMessageSetAsOneLine(self): + message = unittest_mset_pb2.TestMessageSetContainer() + ext1 = unittest_mset_pb2.TestMessageSetExtension1.message_set_extension + ext2 = unittest_mset_pb2.TestMessageSetExtension2.message_set_extension + message.message_set.Extensions[ext1].i = 23 + message.message_set.Extensions[ext2].str = 'foo' + self.CompareToGoldenText( + text_format.MessageToString(message, as_one_line=True), + 'message_set {' + ' [protobuf_unittest.TestMessageSetExtension1] {' + ' i: 23' + ' }' + ' [protobuf_unittest.TestMessageSetExtension2] {' + ' str: \"foo\"' + ' }' + ' }') + + def testPrintExoticAsOneLine(self): + message = unittest_pb2.TestAllTypes() + message.repeated_int64.append(-9223372036854775808) + message.repeated_uint64.append(18446744073709551615) + message.repeated_double.append(123.456) + message.repeated_double.append(1.23e22) + message.repeated_double.append(1.23e-18) + message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'"') + message.repeated_string.append(u'\u00fc\ua71f') + self.CompareToGoldenText( + self.RemoveRedundantZeros( + text_format.MessageToString(message, as_one_line=True)), + 'repeated_int64: -9223372036854775808' + ' repeated_uint64: 18446744073709551615' + ' repeated_double: 123.456' + ' repeated_double: 1.23e+22' + ' repeated_double: 1.23e-18' + ' repeated_string: ' + '"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\""' + ' repeated_string: "\\303\\274\\352\\234\\237"') + + def testRoundTripExoticAsOneLine(self): + message = unittest_pb2.TestAllTypes() + message.repeated_int64.append(-9223372036854775808) + message.repeated_uint64.append(18446744073709551615) + message.repeated_double.append(123.456) + message.repeated_double.append(1.23e22) + message.repeated_double.append(1.23e-18) + message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'"') + message.repeated_string.append(u'\u00fc\ua71f') + + wire_text = text_format.MessageToString(message, as_one_line=True) + parsed_message = unittest_pb2.TestAllTypes() + text_format.Merge(wire_text, parsed_message) + self.assertEquals(message, parsed_message) + + def testPrintRawUtf8String(self): + message = unittest_pb2.TestAllTypes() + message.repeated_string.append(u'\u00fc\ua71f') + self.CompareToGoldenText( + text_format.MessageToString(message, as_utf8 = True), + 'repeated_string: "\303\274\352\234\237"\n') def testMessageToString(self): message = unittest_pb2.ForeignMessage() @@ -119,8 +209,12 @@ class TextFormatTest(unittest.TestCase): def RemoveRedundantZeros(self, text): # Some platforms print 1e+5 as 1e+005. This is fine, but we need to remove # these zeros in order to match the golden file. - return text.replace('e+0','e+').replace('e+0','e+') \ + text = text.replace('e+0','e+').replace('e+0','e+') \ .replace('e-0','e-').replace('e-0','e-') + # Floating point fields are printed with .0 suffix even if they are + # actualy integer numbers. + text = re.compile('\.0$', re.MULTILINE).sub('', text) + return text def testMergeGolden(self): golden_text = '\n'.join(self.ReadGolden('text_format_unittest_data.txt')) @@ -191,8 +285,11 @@ class TextFormatTest(unittest.TestCase): 'repeated_double: 1.23e+22\n' 'repeated_double: 1.23e-18\n' 'repeated_string: \n' - '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n' - 'repeated_string: "foo" \'corge\' "grault"') + '"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\""\n' + 'repeated_string: "foo" \'corge\' "grault"\n' + 'repeated_string: "\\303\\274\\352\\234\\237"\n' + 'repeated_string: "\\xc3\\xbc"\n' + 'repeated_string: "\xc3\xbc"\n') text_format.Merge(text, message) self.assertEqual(-9223372036854775808, message.repeated_int64[0]) @@ -201,8 +298,30 @@ class TextFormatTest(unittest.TestCase): self.assertEqual(1.23e22, message.repeated_double[1]) self.assertEqual(1.23e-18, message.repeated_double[2]) self.assertEqual( - '\000\001\a\b\f\n\r\t\v\\\'\"', message.repeated_string[0]) + '\000\001\a\b\f\n\r\t\v\\\'"', message.repeated_string[0]) self.assertEqual('foocorgegrault', message.repeated_string[1]) + self.assertEqual(u'\u00fc\ua71f', message.repeated_string[2]) + self.assertEqual(u'\u00fc', message.repeated_string[3]) + + def testMergeEmptyText(self): + message = unittest_pb2.TestAllTypes() + text = '' + text_format.Merge(text, message) + self.assertEquals(unittest_pb2.TestAllTypes(), message) + + def testMergeInvalidUtf8(self): + message = unittest_pb2.TestAllTypes() + text = 'repeated_string: "\\xc3\\xc3"' + self.assertRaises(text_format.ParseError, text_format.Merge, text, message) + + def testMergeSingleWord(self): + message = unittest_pb2.TestAllTypes() + text = 'foo' + self.assertRaisesWithMessage( + text_format.ParseError, + ('1:1 : Message type "protobuf_unittest.TestAllTypes" has no field named ' + '"foo".'), + text_format.Merge, text, message) def testMergeUnknownField(self): message = unittest_pb2.TestAllTypes() @@ -297,7 +416,8 @@ class TokenizerTest(unittest.TestCase): 'identifiER_4 : 1.1e+2 ID5:-0.23 ID6:\'aaaa\\\'bbbb\'\n' 'ID7 : "aa\\"bb"\n\n\n\n ID8: {A:inf B:-inf C:true D:false}\n' 'ID9: 22 ID10: -111111111111111111 ID11: -22\n' - 'ID12: 2222222222222222222') + 'ID12: 2222222222222222222 ' + 'false_bool: 0 true_BOOL:t \n true_bool1: 1 false_BOOL1:f ' ) tokenizer = text_format._Tokenizer(text) methods = [(tokenizer.ConsumeIdentifier, 'identifier1'), ':', @@ -347,7 +467,19 @@ class TokenizerTest(unittest.TestCase): (tokenizer.ConsumeInt32, -22), (tokenizer.ConsumeIdentifier, 'ID12'), ':', - (tokenizer.ConsumeUint64, 2222222222222222222)] + (tokenizer.ConsumeUint64, 2222222222222222222), + (tokenizer.ConsumeIdentifier, 'false_bool'), + ':', + (tokenizer.ConsumeBool, False), + (tokenizer.ConsumeIdentifier, 'true_BOOL'), + ':', + (tokenizer.ConsumeBool, True), + (tokenizer.ConsumeIdentifier, 'true_bool1'), + ':', + (tokenizer.ConsumeBool, True), + (tokenizer.ConsumeIdentifier, 'false_BOOL1'), + ':', + (tokenizer.ConsumeBool, False)] i = 0 while not tokenizer.AtEnd(): diff --git a/python/google/protobuf/message.py b/python/google/protobuf/message.py index f8398474..54cf35ea 100755 --- a/python/google/protobuf/message.py +++ b/python/google/protobuf/message.py @@ -67,6 +67,11 @@ class Message(object): DESCRIPTOR = None + def __deepcopy__(self, memo=None): + clone = type(self)() + clone.MergeFrom(self) + return clone + def __eq__(self, other_msg): raise NotImplementedError @@ -74,9 +79,15 @@ class Message(object): # Can't just say self != other_msg, since that would infinitely recurse. :) return not self == other_msg + def __hash__(self): + raise TypeError('unhashable object') + def __str__(self): raise NotImplementedError + def __unicode__(self): + raise NotImplementedError + def MergeFrom(self, other_msg): """Merges the contents of the specified message into current message. diff --git a/python/google/protobuf/pyext/python-proto2.cc b/python/google/protobuf/pyext/python-proto2.cc new file mode 100644 index 00000000..f2799e62 --- /dev/null +++ b/python/google/protobuf/pyext/python-proto2.cc @@ -0,0 +1,1658 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: petar@google.com (Petar Petrov) + +#include <Python.h> +#include <map> +#include <string> +#include <vector> + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/pyext/python_descriptor.h> +#include <google/protobuf/io/coded_stream.h> +#include <google/protobuf/descriptor.h> +#include <google/protobuf/dynamic_message.h> +#include <google/protobuf/message.h> +#include <google/protobuf/unknown_field_set.h> +#include <google/protobuf/pyext/python_protobuf.h> + +/* Is 64bit */ +#define IS_64BIT (SIZEOF_LONG == 8) + +#define FIELD_BELONGS_TO_MESSAGE(field_descriptor, message) \ + ((message)->GetDescriptor() == (field_descriptor)->containing_type()) + +#define FIELD_IS_REPEATED(field_descriptor) \ + ((field_descriptor)->label() == google::protobuf::FieldDescriptor::LABEL_REPEATED) + +#define GOOGLE_CHECK_GET_INT32(arg, value) \ + int32 value; \ + if (!CheckAndGetInteger(arg, &value, kint32min_py, kint32max_py)) { \ + return NULL; \ + } + +#define GOOGLE_CHECK_GET_INT64(arg, value) \ + int64 value; \ + if (!CheckAndGetInteger(arg, &value, kint64min_py, kint64max_py)) { \ + return NULL; \ + } + +#define GOOGLE_CHECK_GET_UINT32(arg, value) \ + uint32 value; \ + if (!CheckAndGetInteger(arg, &value, kPythonZero, kuint32max_py)) { \ + return NULL; \ + } + +#define GOOGLE_CHECK_GET_UINT64(arg, value) \ + uint64 value; \ + if (!CheckAndGetInteger(arg, &value, kPythonZero, kuint64max_py)) { \ + return NULL; \ + } + +#define GOOGLE_CHECK_GET_FLOAT(arg, value) \ + float value; \ + if (!CheckAndGetFloat(arg, &value)) { \ + return NULL; \ + } \ + +#define GOOGLE_CHECK_GET_DOUBLE(arg, value) \ + double value; \ + if (!CheckAndGetDouble(arg, &value)) { \ + return NULL; \ + } + +#define GOOGLE_CHECK_GET_BOOL(arg, value) \ + bool value; \ + if (!CheckAndGetBool(arg, &value)) { \ + return NULL; \ + } + +#define C(str) const_cast<char*>(str) + +// --- Globals: + +// Constants used for integer type range checking. +static PyObject* kPythonZero; +static PyObject* kint32min_py; +static PyObject* kint32max_py; +static PyObject* kuint32max_py; +static PyObject* kint64min_py; +static PyObject* kint64max_py; +static PyObject* kuint64max_py; + +namespace google { +namespace protobuf { +namespace python { + +// --- Support Routines: + +static void AddConstants(PyObject* module) { + struct NameValue { + char* name; + int32 value; + } constants[] = { + // Labels: + {"LABEL_OPTIONAL", google::protobuf::FieldDescriptor::LABEL_OPTIONAL}, + {"LABEL_REQUIRED", google::protobuf::FieldDescriptor::LABEL_REQUIRED}, + {"LABEL_REPEATED", google::protobuf::FieldDescriptor::LABEL_REPEATED}, + // CPP types: + {"CPPTYPE_MESSAGE", google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE}, + // Field Types: + {"TYPE_MESSAGE", google::protobuf::FieldDescriptor::TYPE_MESSAGE}, + // End. + {NULL, 0} + }; + + for (NameValue* constant = constants; + constant->name != NULL; constant++) { + PyModule_AddIntConstant(module, constant->name, constant->value); + } +} + +// --- CMessage Custom Type: + +// ------ Type Forward Declaration: + +struct CMessage; +struct CMessage_Type; + +static void CMessageDealloc(CMessage* self); +static int CMessageInit(CMessage* self, PyObject *args, PyObject *kwds); +static PyObject* CMessageStr(CMessage* self); + +static PyObject* CMessage_AddMessage(CMessage* self, PyObject* args); +static PyObject* CMessage_AddRepeatedScalar(CMessage* self, PyObject* args); +static PyObject* CMessage_AssignRepeatedScalar(CMessage* self, PyObject* args); +static PyObject* CMessage_ByteSize(CMessage* self, PyObject* args); +static PyObject* CMessage_Clear(CMessage* self, PyObject* args); +static PyObject* CMessage_ClearField(CMessage* self, PyObject* args); +static PyObject* CMessage_ClearFieldByDescriptor( + CMessage* self, PyObject* args); +static PyObject* CMessage_CopyFrom(CMessage* self, PyObject* args); +static PyObject* CMessage_DebugString(CMessage* self, PyObject* args); +static PyObject* CMessage_DeleteRepeatedField(CMessage* self, PyObject* args); +static PyObject* CMessage_Equals(CMessage* self, PyObject* args); +static PyObject* CMessage_FieldLength(CMessage* self, PyObject* args); +static PyObject* CMessage_FindInitializationErrors(CMessage* self); +static PyObject* CMessage_GetRepeatedMessage(CMessage* self, PyObject* args); +static PyObject* CMessage_GetRepeatedScalar(CMessage* self, PyObject* args); +static PyObject* CMessage_GetScalar(CMessage* self, PyObject* args); +static PyObject* CMessage_HasField(CMessage* self, PyObject* args); +static PyObject* CMessage_HasFieldByDescriptor(CMessage* self, PyObject* args); +static PyObject* CMessage_IsInitialized(CMessage* self, PyObject* args); +static PyObject* CMessage_ListFields(CMessage* self, PyObject* args); +static PyObject* CMessage_MergeFrom(CMessage* self, PyObject* args); +static PyObject* CMessage_MergeFromString(CMessage* self, PyObject* args); +static PyObject* CMessage_MutableMessage(CMessage* self, PyObject* args); +static PyObject* CMessage_NewSubMessage(CMessage* self, PyObject* args); +static PyObject* CMessage_SetScalar(CMessage* self, PyObject* args); +static PyObject* CMessage_SerializePartialToString( + CMessage* self, PyObject* args); +static PyObject* CMessage_SerializeToString(CMessage* self, PyObject* args); +static PyObject* CMessage_SetInParent(CMessage* self, PyObject* args); +static PyObject* CMessage_SwapRepeatedFieldElements( + CMessage* self, PyObject* args); + +// ------ Object Definition: + +typedef struct CMessage { + PyObject_HEAD + + struct CMessage* parent; // NULL if wasn't created from another message. + CFieldDescriptor* parent_field; + const char* full_name; + google::protobuf::Message* message; + bool free_message; + bool read_only; +} CMessage; + +// ------ Method Table: + +#define CMETHOD(name, args, doc) \ + { C(#name), (PyCFunction)CMessage_##name, args, C(doc) } +static PyMethodDef CMessageMethods[] = { + CMETHOD(AddMessage, METH_O, + "Adds a new message to a repeated composite field."), + CMETHOD(AddRepeatedScalar, METH_VARARGS, + "Adds a scalar to a repeated scalar field."), + CMETHOD(AssignRepeatedScalar, METH_VARARGS, + "Clears and sets the values of a repeated scalar field."), + CMETHOD(ByteSize, METH_NOARGS, + "Returns the size of the message in bytes."), + CMETHOD(Clear, METH_NOARGS, + "Clears a protocol message."), + CMETHOD(ClearField, METH_O, + "Clears a protocol message field by name."), + CMETHOD(ClearFieldByDescriptor, METH_O, + "Clears a protocol message field by descriptor."), + CMETHOD(CopyFrom, METH_O, + "Copies a protocol message into the current message."), + CMETHOD(DebugString, METH_NOARGS, + "Returns the debug string of a protocol message."), + CMETHOD(DeleteRepeatedField, METH_VARARGS, + "Deletes a slice of values from a repeated field."), + CMETHOD(Equals, METH_O, + "Checks if two protocol messages are equal (by identity)."), + CMETHOD(FieldLength, METH_O, + "Returns the number of elements in a repeated field."), + CMETHOD(FindInitializationErrors, METH_NOARGS, + "Returns the initialization errors of a message."), + CMETHOD(GetRepeatedMessage, METH_VARARGS, + "Returns a message from a repeated composite field."), + CMETHOD(GetRepeatedScalar, METH_VARARGS, + "Returns a scalar value from a repeated scalar field."), + CMETHOD(GetScalar, METH_O, + "Returns the scalar value of a field."), + CMETHOD(HasField, METH_O, + "Checks if a message field is set."), + CMETHOD(HasFieldByDescriptor, METH_O, + "Checks if a message field is set by given its descriptor"), + CMETHOD(IsInitialized, METH_NOARGS, + "Checks if all required fields of a protocol message are set."), + CMETHOD(ListFields, METH_NOARGS, + "Lists all set fields of a message."), + CMETHOD(MergeFrom, METH_O, + "Merges a protocol message into the current message."), + CMETHOD(MergeFromString, METH_O, + "Merges a serialized message into the current message."), + CMETHOD(MutableMessage, METH_O, + "Returns a new instance of a nested protocol message."), + CMETHOD(NewSubMessage, METH_O, + "Creates and returns a python message given the descriptor of a " + "composite field of the current message."), + CMETHOD(SetScalar, METH_VARARGS, + "Sets the value of a singular scalar field."), + CMETHOD(SerializePartialToString, METH_VARARGS, + "Serializes the message to a string, even if it isn't initialized."), + CMETHOD(SerializeToString, METH_NOARGS, + "Serializes the message to a string, only for initialized messages."), + CMETHOD(SetInParent, METH_NOARGS, + "Sets the has bit of the given field in its parent message."), + CMETHOD(SwapRepeatedFieldElements, METH_VARARGS, + "Swaps the elements in two positions in a repeated field."), + { NULL, NULL } +}; +#undef CMETHOD + +static PyMemberDef CMessageMembers[] = { + { C("full_name"), T_STRING, offsetof(CMessage, full_name), 0, "Full name" }, + { NULL } +}; + +// ------ Type Definition: + +// The definition for the type object that captures the type of CMessage +// in Python. +PyTypeObject CMessage_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, + C("google3.net.google.protobuf.python.internal." + "_net_proto2___python." + "CMessage"), // tp_name + sizeof(CMessage), // tp_basicsize + 0, // tp_itemsize + (destructor)CMessageDealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_compare + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + (reprfunc)CMessageStr, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + C("A ProtocolMessage"), // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter + 0, // tp_iternext + CMessageMethods, // tp_methods + CMessageMembers, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + (initproc)CMessageInit, // tp_init + PyType_GenericAlloc, // tp_alloc + PyType_GenericNew, // tp_new + PyObject_Del, // tp_free +}; + +// ------ Helper Functions: + +static void FormatTypeError(PyObject* arg, char* expected_types) { + PyObject* s = PyObject_Str(PyObject_Type(arg)); + PyObject* repr = PyObject_Repr(PyObject_Type(arg)); + PyErr_Format(PyExc_TypeError, + "%.100s has type %.100s, but expected one of: %s", + PyString_AS_STRING(repr), + PyString_AS_STRING(s), + expected_types); + Py_DECREF(s); + Py_DECREF(repr); +} + +template <class T> +static bool CheckAndGetInteger( + PyObject* arg, T* value, PyObject* min, PyObject* max) { + bool is_long = PyLong_Check(arg); + if (!PyInt_Check(arg) && !is_long) { + FormatTypeError(arg, "int, long"); + return false; + } + + if (PyObject_Compare(min, arg) > 0 || PyObject_Compare(max, arg) < 0) { + PyObject* s = PyObject_Str(arg); + PyErr_Format(PyExc_ValueError, + "Value out of range: %s", + PyString_AS_STRING(s)); + Py_DECREF(s); + return false; + } + if (is_long) { + if (min == kPythonZero) { + *value = static_cast<T>(PyLong_AsUnsignedLongLong(arg)); + } else { + *value = static_cast<T>(PyLong_AsLongLong(arg)); + } + } else { + *value = static_cast<T>(PyInt_AsLong(arg)); + } + return true; +} + +static bool CheckAndGetDouble(PyObject* arg, double* value) { + if (!PyInt_Check(arg) && !PyLong_Check(arg) && + !PyFloat_Check(arg)) { + FormatTypeError(arg, "int, long, float"); + return false; + } + *value = PyFloat_AsDouble(arg); + return true; +} + +static bool CheckAndGetFloat(PyObject* arg, float* value) { + double double_value; + if (!CheckAndGetDouble(arg, &double_value)) { + return false; + } + *value = static_cast<float>(double_value); + return true; +} + +static bool CheckAndGetBool(PyObject* arg, bool* value) { + if (!PyInt_Check(arg) && !PyBool_Check(arg) && !PyLong_Check(arg)) { + FormatTypeError(arg, "int, long, bool"); + return false; + } + *value = static_cast<bool>(PyInt_AsLong(arg)); + return true; +} + +google::protobuf::DynamicMessageFactory* global_message_factory = NULL; +static const google::protobuf::Message* CreateMessage(const char* message_type) { + string message_name(message_type); + const google::protobuf::Descriptor* descriptor = + GetDescriptorPool()->FindMessageTypeByName(message_name); + if (descriptor == NULL) { + return NULL; + } + return global_message_factory->GetPrototype(descriptor); +} + +static bool CheckAndSetString( + PyObject* arg, google::protobuf::Message* message, + const google::protobuf::FieldDescriptor* descriptor, + const google::protobuf::Reflection* reflection, + bool append, + int index) { + GOOGLE_DCHECK(descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING || + descriptor->type() == google::protobuf::FieldDescriptor::TYPE_BYTES); + if (descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING) { + if (!PyString_Check(arg) && !PyUnicode_Check(arg)) { + FormatTypeError(arg, "str, unicode"); + return false; + } + + if (PyString_Check(arg)) { + PyObject* unicode = PyUnicode_FromEncodedObject(arg, "ascii", NULL); + if (unicode == NULL) { + PyObject* repr = PyObject_Repr(arg); + PyErr_Format(PyExc_ValueError, + "%s has type str, but isn't in 7-bit ASCII " + "encoding. Non-ASCII strings must be converted to " + "unicode objects before being added.", + PyString_AS_STRING(repr)); + Py_DECREF(repr); + return false; + } else { + Py_DECREF(unicode); + } + } + } else if (!PyString_Check(arg)) { + FormatTypeError(arg, "str"); + return false; + } + + PyObject* encoded_string = NULL; + if (descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING) { + if (PyString_Check(arg)) { + encoded_string = PyString_AsEncodedObject(arg, "utf-8", NULL); + } else { + encoded_string = PyUnicode_AsEncodedObject(arg, "utf-8", NULL); + } + } else { + // In this case field type is "bytes". + encoded_string = arg; + Py_INCREF(encoded_string); + } + + if (encoded_string == NULL) { + return false; + } + + char* value; + Py_ssize_t value_len; + if (PyString_AsStringAndSize(encoded_string, &value, &value_len) < 0) { + Py_DECREF(encoded_string); + return false; + } + + string value_string(value, value_len); + if (append) { + reflection->AddString(message, descriptor, value_string); + } else if (index < 0) { + reflection->SetString(message, descriptor, value_string); + } else { + reflection->SetRepeatedString(message, descriptor, index, value_string); + } + Py_DECREF(encoded_string); + return true; +} + +static PyObject* ToStringObject( + const google::protobuf::FieldDescriptor* descriptor, string value) { + if (descriptor->type() != google::protobuf::FieldDescriptor::TYPE_STRING) { + return PyString_FromStringAndSize(value.c_str(), value.length()); + } + + PyObject* result = PyUnicode_DecodeUTF8(value.c_str(), value.length(), NULL); + // If the string can't be decoded in UTF-8, just return a string object that + // contains the raw bytes. This can't happen if the value was assigned using + // the members of the Python message object, but can happen if the values were + // parsed from the wire (binary). + if (result == NULL) { + PyErr_Clear(); + result = PyString_FromStringAndSize(value.c_str(), value.length()); + } + return result; +} + +static void AssureWritable(CMessage* self) { + if (self == NULL || + self->parent == NULL || + self->parent_field == NULL) { + return; + } + + if (!self->read_only) { + return; + } + + AssureWritable(self->parent); + + google::protobuf::Message* message = self->parent->message; + const google::protobuf::Reflection* reflection = message->GetReflection(); + self->message = reflection->MutableMessage( + message, self->parent_field->descriptor, global_message_factory); + self->read_only = false; + self->parent = NULL; + self->parent_field = NULL; +} + +static PyObject* InternalGetScalar( + google::protobuf::Message* message, + const google::protobuf::FieldDescriptor* field_descriptor) { + const google::protobuf::Reflection* reflection = message->GetReflection(); + + if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) { + PyErr_SetString( + PyExc_KeyError, "Field does not belong to message!"); + return NULL; + } + + PyObject* result = NULL; + switch (field_descriptor->cpp_type()) { + case google::protobuf::FieldDescriptor::CPPTYPE_INT32: { + int32 value = reflection->GetInt32(*message, field_descriptor); + result = PyInt_FromLong(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_INT64: { + int64 value = reflection->GetInt64(*message, field_descriptor); +#if IS_64BIT + result = PyInt_FromLong(value); +#else + result = PyLong_FromLongLong(value); +#endif + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: { + uint32 value = reflection->GetUInt32(*message, field_descriptor); +#if IS_64BIT + result = PyInt_FromLong(value); +#else + result = PyLong_FromLongLong(value); +#endif + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: { + uint64 value = reflection->GetUInt64(*message, field_descriptor); +#if IS_64BIT + if (value <= static_cast<uint64>(kint64max)) { + result = PyInt_FromLong(static_cast<uint64>(value)); + } +#else + if (value <= static_cast<uint32>(kint32max)) { + result = PyInt_FromLong(static_cast<uint32>(value)); + } +#endif + else { // NOLINT + result = PyLong_FromUnsignedLongLong(value); + } + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: { + float value = reflection->GetFloat(*message, field_descriptor); + result = PyFloat_FromDouble(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: { + double value = reflection->GetDouble(*message, field_descriptor); + result = PyFloat_FromDouble(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: { + bool value = reflection->GetBool(*message, field_descriptor); + result = PyBool_FromLong(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_STRING: { + string value = reflection->GetString(*message, field_descriptor); + result = ToStringObject(field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: { + if (!message->GetReflection()->HasField(*message, field_descriptor)) { + // Look for the value in the unknown fields. + google::protobuf::UnknownFieldSet* unknown_field_set = + message->GetReflection()->MutableUnknownFields(message); + for (int i = 0; i < unknown_field_set->field_count(); ++i) { + if (unknown_field_set->field(i).number() == + field_descriptor->number()) { + result = PyInt_FromLong(unknown_field_set->field(i).varint()); + break; + } + } + } + + if (result == NULL) { + const google::protobuf::EnumValueDescriptor* enum_value = + message->GetReflection()->GetEnum(*message, field_descriptor); + result = PyInt_FromLong(enum_value->number()); + } + break; + } + default: + PyErr_Format( + PyExc_SystemError, "Getting a value from a field of unknown type %d", + field_descriptor->cpp_type()); + } + + return result; +} + +static PyObject* InternalSetScalar( + google::protobuf::Message* message, const google::protobuf::FieldDescriptor* field_descriptor, + PyObject* arg) { + const google::protobuf::Reflection* reflection = message->GetReflection(); + + if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) { + PyErr_SetString( + PyExc_KeyError, "Field does not belong to message!"); + return NULL; + } + + switch (field_descriptor->cpp_type()) { + case google::protobuf::FieldDescriptor::CPPTYPE_INT32: { + GOOGLE_CHECK_GET_INT32(arg, value); + reflection->SetInt32(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_INT64: { + GOOGLE_CHECK_GET_INT64(arg, value); + reflection->SetInt64(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: { + GOOGLE_CHECK_GET_UINT32(arg, value); + reflection->SetUInt32(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: { + GOOGLE_CHECK_GET_UINT64(arg, value); + reflection->SetUInt64(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: { + GOOGLE_CHECK_GET_FLOAT(arg, value); + reflection->SetFloat(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: { + GOOGLE_CHECK_GET_DOUBLE(arg, value); + reflection->SetDouble(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: { + GOOGLE_CHECK_GET_BOOL(arg, value); + reflection->SetBool(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_STRING: { + if (!CheckAndSetString( + arg, message, field_descriptor, reflection, false, -1)) { + return NULL; + } + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: { + GOOGLE_CHECK_GET_INT32(arg, value); + const google::protobuf::EnumDescriptor* enum_descriptor = + field_descriptor->enum_type(); + const google::protobuf::EnumValueDescriptor* enum_value = + enum_descriptor->FindValueByNumber(value); + if (enum_value != NULL) { + reflection->SetEnum(message, field_descriptor, enum_value); + } else { + bool added = false; + // Add the value to the unknown fields. + google::protobuf::UnknownFieldSet* unknown_field_set = + message->GetReflection()->MutableUnknownFields(message); + for (int i = 0; i < unknown_field_set->field_count(); ++i) { + if (unknown_field_set->field(i).number() == + field_descriptor->number()) { + unknown_field_set->mutable_field(i)->set_varint(value); + added = true; + break; + } + } + + if (!added) { + unknown_field_set->AddVarint(field_descriptor->number(), value); + } + reflection->ClearField(message, field_descriptor); + } + break; + } + default: + PyErr_Format( + PyExc_SystemError, "Setting value to a field of unknown type %d", + field_descriptor->cpp_type()); + } + + Py_RETURN_NONE; +} + +static PyObject* InternalAddRepeatedScalar( + google::protobuf::Message* message, const google::protobuf::FieldDescriptor* field_descriptor, + PyObject* arg) { + + if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) { + PyErr_SetString( + PyExc_KeyError, "Field does not belong to message!"); + return NULL; + } + + const google::protobuf::Reflection* reflection = message->GetReflection(); + switch (field_descriptor->cpp_type()) { + case google::protobuf::FieldDescriptor::CPPTYPE_INT32: { + GOOGLE_CHECK_GET_INT32(arg, value); + reflection->AddInt32(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_INT64: { + GOOGLE_CHECK_GET_INT64(arg, value); + reflection->AddInt64(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: { + GOOGLE_CHECK_GET_UINT32(arg, value); + reflection->AddUInt32(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: { + GOOGLE_CHECK_GET_UINT64(arg, value); + reflection->AddUInt64(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: { + GOOGLE_CHECK_GET_FLOAT(arg, value); + reflection->AddFloat(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: { + GOOGLE_CHECK_GET_DOUBLE(arg, value); + reflection->AddDouble(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: { + GOOGLE_CHECK_GET_BOOL(arg, value); + reflection->AddBool(message, field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_STRING: { + if (!CheckAndSetString( + arg, message, field_descriptor, reflection, true, -1)) { + return NULL; + } + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: { + GOOGLE_CHECK_GET_INT32(arg, value); + const google::protobuf::EnumDescriptor* enum_descriptor = + field_descriptor->enum_type(); + const google::protobuf::EnumValueDescriptor* enum_value = + enum_descriptor->FindValueByNumber(value); + if (enum_value != NULL) { + reflection->AddEnum(message, field_descriptor, enum_value); + } else { + PyObject* s = PyObject_Str(arg); + PyErr_Format(PyExc_ValueError, "Unknown enum value: %s", + PyString_AS_STRING(s)); + Py_DECREF(s); + return NULL; + } + break; + } + default: + PyErr_Format( + PyExc_SystemError, "Adding value to a field of unknown type %d", + field_descriptor->cpp_type()); + } + + Py_RETURN_NONE; +} + +static PyObject* InternalGetRepeatedScalar( + CMessage* cmessage, const google::protobuf::FieldDescriptor* field_descriptor, + int index) { + google::protobuf::Message* message = cmessage->message; + const google::protobuf::Reflection* reflection = message->GetReflection(); + + int field_size = reflection->FieldSize(*message, field_descriptor); + if (index < 0) { + index = field_size + index; + } + if (index < 0 || index >= field_size) { + PyErr_Format(PyExc_IndexError, + "list assignment index (%d) out of range", index); + return NULL; + } + + PyObject* result = NULL; + switch (field_descriptor->cpp_type()) { + case google::protobuf::FieldDescriptor::CPPTYPE_INT32: { + int32 value = reflection->GetRepeatedInt32( + *message, field_descriptor, index); + result = PyInt_FromLong(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_INT64: { + int64 value = reflection->GetRepeatedInt64( + *message, field_descriptor, index); + result = PyLong_FromLongLong(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: { + uint32 value = reflection->GetRepeatedUInt32( + *message, field_descriptor, index); + result = PyLong_FromLongLong(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: { + uint64 value = reflection->GetRepeatedUInt64( + *message, field_descriptor, index); + result = PyLong_FromUnsignedLongLong(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: { + float value = reflection->GetRepeatedFloat( + *message, field_descriptor, index); + result = PyFloat_FromDouble(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: { + double value = reflection->GetRepeatedDouble( + *message, field_descriptor, index); + result = PyFloat_FromDouble(value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: { + bool value = reflection->GetRepeatedBool( + *message, field_descriptor, index); + result = PyBool_FromLong(value ? 1 : 0); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: { + const google::protobuf::EnumValueDescriptor* enum_value = + message->GetReflection()->GetRepeatedEnum( + *message, field_descriptor, index); + result = PyInt_FromLong(enum_value->number()); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_STRING: { + string value = reflection->GetRepeatedString( + *message, field_descriptor, index); + result = ToStringObject(field_descriptor, value); + break; + } + case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { + CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type); + if (py_cmsg == NULL) { + return NULL; + } + const google::protobuf::Message& msg = reflection->GetRepeatedMessage( + *message, field_descriptor, index); + py_cmsg->parent = cmessage; + py_cmsg->full_name = field_descriptor->full_name().c_str(); + py_cmsg->message = const_cast<google::protobuf::Message*>(&msg); + py_cmsg->free_message = false; + py_cmsg->read_only = false; + result = reinterpret_cast<PyObject*>(py_cmsg); + break; + } + default: + PyErr_Format( + PyExc_SystemError, + "Getting value from a repeated field of unknown type %d", + field_descriptor->cpp_type()); + } + + return result; +} + +static PyObject* InternalGetRepeatedScalarSlice( + CMessage* cmessage, const google::protobuf::FieldDescriptor* field_descriptor, + PyObject* slice) { + Py_ssize_t from; + Py_ssize_t to; + Py_ssize_t step; + Py_ssize_t length; + bool return_list = false; + google::protobuf::Message* message = cmessage->message; + + if (PyInt_Check(slice)) { + from = to = PyInt_AsLong(slice); + } else if (PyLong_Check(slice)) { + from = to = PyLong_AsLong(slice); + } else if (PySlice_Check(slice)) { + const google::protobuf::Reflection* reflection = message->GetReflection(); + length = reflection->FieldSize(*message, field_descriptor); + PySlice_GetIndices( + reinterpret_cast<PySliceObject*>(slice), length, &from, &to, &step); + return_list = true; + } else { + PyErr_SetString(PyExc_TypeError, "list indices must be integers"); + return NULL; + } + + if (!return_list) { + return InternalGetRepeatedScalar(cmessage, field_descriptor, from); + } + + PyObject* list = PyList_New(0); + if (list == NULL) { + return NULL; + } + + if (from <= to) { + if (step < 0) return list; + for (Py_ssize_t index = from; index < to; index += step) { + if (index < 0 || index >= length) break; + PyObject* s = InternalGetRepeatedScalar( + cmessage, field_descriptor, index); + PyList_Append(list, s); + Py_DECREF(s); + } + } else { + if (step > 0) return list; + for (Py_ssize_t index = from; index > to; index += step) { + if (index < 0 || index >= length) break; + PyObject* s = InternalGetRepeatedScalar( + cmessage, field_descriptor, index); + PyList_Append(list, s); + Py_DECREF(s); + } + } + return list; +} + +// ------ C Constructor/Destructor: + +static int CMessageInit(CMessage* self, PyObject *args, PyObject *kwds) { + self->message = NULL; + return 0; +} + +static void CMessageDealloc(CMessage* self) { + if (self->free_message) { + if (self->read_only) { + PyErr_WriteUnraisable(reinterpret_cast<PyObject*>(self)); + } + delete self->message; + } + self->ob_type->tp_free(reinterpret_cast<PyObject*>(self)); +} + +// ------ Methods: + +static PyObject* CMessage_Clear(CMessage* self, PyObject* args) { + AssureWritable(self); + self->message->Clear(); + Py_RETURN_NONE; +} + +static PyObject* CMessage_IsInitialized(CMessage* self, PyObject* args) { + return PyBool_FromLong(self->message->IsInitialized() ? 1 : 0); +} + +static PyObject* CMessage_HasField(CMessage* self, PyObject* arg) { + char* field_name; + if (PyString_AsStringAndSize(arg, &field_name, NULL) < 0) { + return NULL; + } + + google::protobuf::Message* message = self->message; + const google::protobuf::Descriptor* descriptor = message->GetDescriptor(); + const google::protobuf::FieldDescriptor* field_descriptor = + descriptor->FindFieldByName(field_name); + if (field_descriptor == NULL) { + PyErr_Format(PyExc_ValueError, "Unknown field %s.", field_name); + return NULL; + } + + bool has_field = + message->GetReflection()->HasField(*message, field_descriptor); + return PyBool_FromLong(has_field ? 1 : 0); +} + +static PyObject* CMessage_HasFieldByDescriptor(CMessage* self, PyObject* arg) { + CFieldDescriptor* cfield_descriptor = NULL; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), + &CFieldDescriptor_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a field descriptor"); + return NULL; + } + cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg); + + google::protobuf::Message* message = self->message; + const google::protobuf::FieldDescriptor* field_descriptor = + cfield_descriptor->descriptor; + + if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) { + PyErr_SetString(PyExc_KeyError, + "Field does not belong to message!"); + return NULL; + } + + if (FIELD_IS_REPEATED(field_descriptor)) { + PyErr_SetString(PyExc_KeyError, + "Field is repeated. A singular method is required."); + return NULL; + } + + bool has_field = + message->GetReflection()->HasField(*message, field_descriptor); + return PyBool_FromLong(has_field ? 1 : 0); +} + +static PyObject* CMessage_ClearFieldByDescriptor( + CMessage* self, PyObject* arg) { + CFieldDescriptor* cfield_descriptor = NULL; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), + &CFieldDescriptor_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a field descriptor"); + return NULL; + } + cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg); + + google::protobuf::Message* message = self->message; + const google::protobuf::FieldDescriptor* field_descriptor = + cfield_descriptor->descriptor; + + if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) { + PyErr_SetString(PyExc_KeyError, + "Field does not belong to message!"); + return NULL; + } + + message->GetReflection()->ClearField(message, field_descriptor); + Py_RETURN_NONE; +} + +static PyObject* CMessage_ClearField(CMessage* self, PyObject* arg) { + char* field_name; + if (PyString_AsStringAndSize(arg, &field_name, NULL) < 0) { + return NULL; + } + + google::protobuf::Message* message = self->message; + const google::protobuf::Descriptor* descriptor = message->GetDescriptor(); + const google::protobuf::FieldDescriptor* field_descriptor = + descriptor->FindFieldByName(field_name); + if (field_descriptor == NULL) { + PyErr_Format(PyExc_ValueError, "Unknown field %s.", field_name); + return NULL; + } + + message->GetReflection()->ClearField(message, field_descriptor); + Py_RETURN_NONE; +} + +static PyObject* CMessage_GetScalar(CMessage* self, PyObject* arg) { + CFieldDescriptor* cdescriptor = NULL; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), + &CFieldDescriptor_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a field descriptor"); + return NULL; + } + cdescriptor = reinterpret_cast<CFieldDescriptor*>(arg); + + google::protobuf::Message* message = self->message; + return InternalGetScalar(message, cdescriptor->descriptor); +} + +static PyObject* CMessage_GetRepeatedScalar(CMessage* self, PyObject* args) { + CFieldDescriptor* cfield_descriptor; + PyObject* slice; + if (!PyArg_ParseTuple(args, C("O!O:GetRepeatedScalar"), + &CFieldDescriptor_Type, &cfield_descriptor, &slice)) { + return NULL; + } + + return InternalGetRepeatedScalarSlice( + self, cfield_descriptor->descriptor, slice); +} + +static PyObject* CMessage_AssignRepeatedScalar(CMessage* self, PyObject* args) { + CFieldDescriptor* cfield_descriptor; + PyObject* slice; + if (!PyArg_ParseTuple(args, C("O!O:AssignRepeatedScalar"), + &CFieldDescriptor_Type, &cfield_descriptor, &slice)) { + return NULL; + } + + AssureWritable(self); + google::protobuf::Message* message = self->message; + message->GetReflection()->ClearField(message, cfield_descriptor->descriptor); + + PyObject* iter = PyObject_GetIter(slice); + PyObject* next; + while ((next = PyIter_Next(iter)) != NULL) { + if (InternalAddRepeatedScalar( + message, cfield_descriptor->descriptor, next) == NULL) { + Py_DECREF(iter); + return NULL; + } + } + Py_DECREF(iter); + Py_RETURN_NONE; +} + +static PyObject* CMessage_DeleteRepeatedField(CMessage* self, PyObject* args) { + CFieldDescriptor* cfield_descriptor; + PyObject* slice; + if (!PyArg_ParseTuple(args, C("O!O:DeleteRepeatedField"), + &CFieldDescriptor_Type, &cfield_descriptor, &slice)) { + return NULL; + } + AssureWritable(self); + + Py_ssize_t length, from, to, step, slice_length; + google::protobuf::Message* message = self->message; + const google::protobuf::FieldDescriptor* field_descriptor = + cfield_descriptor->descriptor; + const google::protobuf::Reflection* reflection = message->GetReflection(); + int min, max; + length = reflection->FieldSize(*message, field_descriptor); + + if (PyInt_Check(slice) || PyLong_Check(slice)) { + from = to = PyLong_AsLong(slice); + if (from < 0) { + from = to = length + from; + } + step = 1; + min = max = from; + + // Range check. + if (from < 0 || from >= length) { + PyErr_Format(PyExc_IndexError, "list assignment index out of range"); + return NULL; + } + } else if (PySlice_Check(slice)) { + from = to = step = slice_length = 0; + PySlice_GetIndicesEx( + reinterpret_cast<PySliceObject*>(slice), + length, &from, &to, &step, &slice_length); + if (from < to) { + min = from; + max = to - 1; + } else { + min = to + 1; + max = from; + } + } else { + PyErr_SetString(PyExc_TypeError, "list indices must be integers"); + return NULL; + } + + Py_ssize_t i = from; + std::vector<bool> to_delete(length, false); + while (i >= min && i <= max) { + to_delete[i] = true; + i += step; + } + + to = 0; + for (i = 0; i < length; ++i) { + if (!to_delete[i]) { + if (i != to) { + reflection->SwapElements(message, field_descriptor, i, to); + } + ++to; + } + } + + while (i > to) { + reflection->RemoveLast(message, field_descriptor); + --i; + } + + Py_RETURN_NONE; +} + + +static PyObject* CMessage_SetScalar(CMessage* self, PyObject* args) { + CFieldDescriptor* cfield_descriptor; + PyObject* arg; + if (!PyArg_ParseTuple(args, C("O!O:SetScalar"), + &CFieldDescriptor_Type, &cfield_descriptor, &arg)) { + return NULL; + } + AssureWritable(self); + + return InternalSetScalar(self->message, cfield_descriptor->descriptor, arg); +} + +static PyObject* CMessage_AddRepeatedScalar(CMessage* self, PyObject* args) { + CFieldDescriptor* cfield_descriptor; + PyObject* value; + if (!PyArg_ParseTuple(args, C("O!O:AddRepeatedScalar"), + &CFieldDescriptor_Type, &cfield_descriptor, &value)) { + return NULL; + } + AssureWritable(self); + + return InternalAddRepeatedScalar( + self->message, cfield_descriptor->descriptor, value); +} + +static PyObject* CMessage_FieldLength(CMessage* self, PyObject* arg) { + CFieldDescriptor* cfield_descriptor; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), + &CFieldDescriptor_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a field descriptor"); + return NULL; + } + cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg); + + google::protobuf::Message* message = self->message; + int length = message->GetReflection()->FieldSize( + *message, cfield_descriptor->descriptor); + return PyInt_FromLong(length); +} + +static PyObject* CMessage_DebugString(CMessage* self, PyObject* args) { + return PyString_FromString(self->message->DebugString().c_str()); +} + +static PyObject* CMessage_SerializeToString(CMessage* self, PyObject* args) { + int size = self->message->ByteSize(); + if (size <= 0) { + return PyString_FromString(""); + } + PyObject* result = PyString_FromStringAndSize(NULL, size); + if (result == NULL) { + return NULL; + } + char* buffer = PyString_AS_STRING(result); + self->message->SerializeWithCachedSizesToArray( + reinterpret_cast<uint8*>(buffer)); + return result; +} + +static PyObject* CMessage_SerializePartialToString( + CMessage* self, PyObject* args) { + string contents; + self->message->SerializePartialToString(&contents); + return PyString_FromStringAndSize(contents.c_str(), contents.size()); +} + +static PyObject* CMessageStr(CMessage* self) { + char str[1024]; + str[sizeof(str) - 1] = 0; + snprintf(str, sizeof(str) - 1, "CMessage: <%p>", self->message); + return PyString_FromString(str); +} + +static PyObject* CMessage_MergeFrom(CMessage* self, PyObject* arg) { + CMessage* other_message; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a message"); + return NULL; + } + + other_message = reinterpret_cast<CMessage*>(arg); + if (other_message->message->GetDescriptor() != + self->message->GetDescriptor()) { + PyErr_Format(PyExc_TypeError, + "Tried to merge from a message with a different type. " + "to: %s, from: %s", + self->message->GetDescriptor()->full_name().c_str(), + other_message->message->GetDescriptor()->full_name().c_str()); + return NULL; + } + AssureWritable(self); + + self->message->MergeFrom(*other_message->message); + Py_RETURN_NONE; +} + +static PyObject* CMessage_CopyFrom(CMessage* self, PyObject* arg) { + CMessage* other_message; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a message"); + return NULL; + } + + other_message = reinterpret_cast<CMessage*>(arg); + if (other_message->message->GetDescriptor() != + self->message->GetDescriptor()) { + PyErr_Format(PyExc_TypeError, + "Tried to copy from a message with a different type. " + "to: %s, from: %s", + self->message->GetDescriptor()->full_name().c_str(), + other_message->message->GetDescriptor()->full_name().c_str()); + return NULL; + } + + AssureWritable(self); + + self->message->CopyFrom(*other_message->message); + Py_RETURN_NONE; +} + +static PyObject* CMessage_MergeFromString(CMessage* self, PyObject* arg) { + const void* data; + Py_ssize_t data_length; + if (PyObject_AsReadBuffer(arg, &data, &data_length) < 0) { + return NULL; + } + + AssureWritable(self); + google::protobuf::io::CodedInputStream input( + reinterpret_cast<const uint8*>(data), data_length); + bool success = self->message->MergePartialFromCodedStream(&input); + if (success) { + return PyInt_FromLong(self->message->ByteSize()); + } else { + return PyInt_FromLong(-1); + } +} + +static PyObject* CMessage_ByteSize(CMessage* self, PyObject* args) { + return PyLong_FromLong(self->message->ByteSize()); +} + +static PyObject* CMessage_SetInParent(CMessage* self, PyObject* args) { + AssureWritable(self); + Py_RETURN_NONE; +} + +static PyObject* CMessage_SwapRepeatedFieldElements( + CMessage* self, PyObject* args) { + CFieldDescriptor* cfield_descriptor; + int index1, index2; + if (!PyArg_ParseTuple(args, C("O!ii:SwapRepeatedFieldElements"), + &CFieldDescriptor_Type, &cfield_descriptor, + &index1, &index2)) { + return NULL; + } + + google::protobuf::Message* message = self->message; + const google::protobuf::Reflection* reflection = message->GetReflection(); + + reflection->SwapElements( + message, cfield_descriptor->descriptor, index1, index2); + Py_RETURN_NONE; +} + +static PyObject* CMessage_AddMessage(CMessage* self, PyObject* arg) { + CFieldDescriptor* cfield_descriptor; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), + &CFieldDescriptor_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a field descriptor"); + return NULL; + } + cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg); + AssureWritable(self); + + CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type); + if (py_cmsg == NULL) { + return NULL; + } + + google::protobuf::Message* message = self->message; + const google::protobuf::Reflection* reflection = message->GetReflection(); + google::protobuf::Message* sub_message = + reflection->AddMessage(message, cfield_descriptor->descriptor); + + py_cmsg->parent = NULL; + py_cmsg->full_name = sub_message->GetDescriptor()->full_name().c_str(); + py_cmsg->message = sub_message; + py_cmsg->free_message = false; + py_cmsg->read_only = false; + return reinterpret_cast<PyObject*>(py_cmsg); +} + +static PyObject* CMessage_GetRepeatedMessage(CMessage* self, PyObject* args) { + CFieldDescriptor* cfield_descriptor; + PyObject* slice; + if (!PyArg_ParseTuple(args, C("O!O:GetRepeatedMessage"), + &CFieldDescriptor_Type, &cfield_descriptor, &slice)) { + return NULL; + } + + return InternalGetRepeatedScalarSlice( + self, cfield_descriptor->descriptor, slice); +} + +static PyObject* CMessage_NewSubMessage(CMessage* self, PyObject* arg) { + CFieldDescriptor* cfield_descriptor; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), + &CFieldDescriptor_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a field descriptor"); + return NULL; + } + cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg); + + CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type); + if (py_cmsg == NULL) { + return NULL; + } + + google::protobuf::Message* message = self->message; + const google::protobuf::Reflection* reflection = message->GetReflection(); + const google::protobuf::Message& sub_message = + reflection->GetMessage(*message, cfield_descriptor->descriptor, + global_message_factory); + + py_cmsg->full_name = sub_message.GetDescriptor()->full_name().c_str(); + py_cmsg->parent = self; + py_cmsg->parent_field = cfield_descriptor; + py_cmsg->message = const_cast<google::protobuf::Message*>(&sub_message); + py_cmsg->free_message = false; + py_cmsg->read_only = true; + return reinterpret_cast<PyObject*>(py_cmsg); +} + +static PyObject* CMessage_MutableMessage(CMessage* self, PyObject* arg) { + CFieldDescriptor* cfield_descriptor; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), + &CFieldDescriptor_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a field descriptor"); + return NULL; + } + cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg); + AssureWritable(self); + + CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type); + if (py_cmsg == NULL) { + return NULL; + } + + google::protobuf::Message* message = self->message; + const google::protobuf::Reflection* reflection = message->GetReflection(); + google::protobuf::Message* mutable_message = + reflection->MutableMessage(message, cfield_descriptor->descriptor, + global_message_factory); + + py_cmsg->full_name = mutable_message->GetDescriptor()->full_name().c_str(); + py_cmsg->message = mutable_message; + py_cmsg->free_message = false; + py_cmsg->read_only = false; + return reinterpret_cast<PyObject*>(py_cmsg); +} + +static PyObject* CMessage_Equals(CMessage* self, PyObject* arg) { + CMessage* other_message; + if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) { + PyErr_SetString(PyExc_TypeError, "Must be a message"); + return NULL; + } + other_message = reinterpret_cast<CMessage*>(arg); + + if (other_message->message == self->message) { + return PyBool_FromLong(1); + } + + if (other_message->message->GetDescriptor() != + self->message->GetDescriptor()) { + return PyBool_FromLong(0); + } + + return PyBool_FromLong(1); +} + +static PyObject* CMessage_ListFields(CMessage* self, PyObject* args) { + google::protobuf::Message* message = self->message; + const google::protobuf::Reflection* reflection = message->GetReflection(); + vector<const google::protobuf::FieldDescriptor*> fields; + reflection->ListFields(*message, &fields); + + PyObject* list = PyList_New(fields.size()); + if (list == NULL) { + return NULL; + } + + for (unsigned int i = 0; i < fields.size(); ++i) { + bool is_extension = fields[i]->is_extension(); + PyObject* t = PyTuple_New(2); + if (t == NULL) { + Py_DECREF(list); + return NULL; + } + + PyObject* is_extension_object = PyBool_FromLong(is_extension ? 1 : 0); + + PyObject* field_name; + const string* s; + if (is_extension) { + s = &fields[i]->full_name(); + } else { + s = &fields[i]->name(); + } + field_name = PyString_FromStringAndSize(s->c_str(), s->length()); + if (field_name == NULL) { + Py_DECREF(list); + Py_DECREF(t); + return NULL; + } + + PyTuple_SET_ITEM(t, 0, is_extension_object); + PyTuple_SET_ITEM(t, 1, field_name); + PyList_SET_ITEM(list, i, t); + } + + return list; +} + +static PyObject* CMessage_FindInitializationErrors(CMessage* self) { + google::protobuf::Message* message = self->message; + vector<string> errors; + message->FindInitializationErrors(&errors); + + PyObject* error_list = PyList_New(errors.size()); + if (error_list == NULL) { + return NULL; + } + for (unsigned int i = 0; i < errors.size(); ++i) { + const string& error = errors[i]; + PyObject* error_string = PyString_FromStringAndSize( + error.c_str(), error.length()); + if (error_string == NULL) { + Py_DECREF(error_list); + return NULL; + } + PyList_SET_ITEM(error_list, i, error_string); + } + return error_list; +} + +// ------ Python Constructor: + +PyObject* Python_NewCMessage(PyObject* ignored, PyObject* arg) { + const char* message_type = PyString_AsString(arg); + if (message_type == NULL) { + return NULL; + } + + const google::protobuf::Message* message = CreateMessage(message_type); + if (message == NULL) { + PyErr_Format(PyExc_TypeError, "Couldn't create message of type %s!", + message_type); + return NULL; + } + + CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type); + if (py_cmsg == NULL) { + return NULL; + } + py_cmsg->message = message->New(); + py_cmsg->free_message = true; + py_cmsg->full_name = message->GetDescriptor()->full_name().c_str(); + py_cmsg->read_only = false; + py_cmsg->parent = NULL; + py_cmsg->parent_field = NULL; + return reinterpret_cast<PyObject*>(py_cmsg); +} + +// --- Module Functions (exposed to Python): + +PyMethodDef methods[] = { + { C("NewCMessage"), (PyCFunction)Python_NewCMessage, + METH_O, + C("Creates a new C++ protocol message, given its full name.") }, + { C("NewCDescriptorPool"), (PyCFunction)Python_NewCDescriptorPool, + METH_NOARGS, + C("Creates a new C++ descriptor pool.") }, + { C("BuildFile"), (PyCFunction)Python_BuildFile, + METH_O, + C("Registers a new protocol buffer file in the global C++ descriptor " + "pool.") }, + {NULL} +}; + +// --- Exposing the C proto living inside Python proto to C code: + +extern const Message* (*GetCProtoInsidePyProtoPtr)(PyObject* msg); +extern Message* (*MutableCProtoInsidePyProtoPtr)(PyObject* msg); + +static const google::protobuf::Message* GetCProtoInsidePyProtoImpl(PyObject* msg) { + PyObject* c_msg_obj = PyObject_GetAttrString(msg, "_cmsg"); + if (c_msg_obj == NULL) { + PyErr_Clear(); + return NULL; + } + Py_DECREF(c_msg_obj); + if (!PyObject_TypeCheck(c_msg_obj, &CMessage_Type)) { + return NULL; + } + CMessage* c_msg = reinterpret_cast<CMessage*>(c_msg_obj); + return c_msg->message; +} + +static google::protobuf::Message* MutableCProtoInsidePyProtoImpl(PyObject* msg) { + PyObject* c_msg_obj = PyObject_GetAttrString(msg, "_cmsg"); + if (c_msg_obj == NULL) { + PyErr_Clear(); + return NULL; + } + Py_DECREF(c_msg_obj); + if (!PyObject_TypeCheck(c_msg_obj, &CMessage_Type)) { + return NULL; + } + CMessage* c_msg = reinterpret_cast<CMessage*>(c_msg_obj); + AssureWritable(c_msg); + return c_msg->message; +} + +// --- Module Init Function: + +static const char module_docstring[] = +"python-proto2 is a module that can be used to enhance proto2 Python API\n" +"performance.\n" +"\n" +"It provides access to the protocol buffers C++ reflection API that\n" +"implements the basic protocol buffer functions."; + +extern "C" { + void init_net_proto2___python() { + // Initialize constants. + kPythonZero = PyInt_FromLong(0); + kint32min_py = PyInt_FromLong(kint32min); + kint32max_py = PyInt_FromLong(kint32max); + kuint32max_py = PyLong_FromLongLong(kuint32max); + kint64min_py = PyLong_FromLongLong(kint64min); + kint64max_py = PyLong_FromLongLong(kint64max); + kuint64max_py = PyLong_FromUnsignedLongLong(kuint64max); + + global_message_factory = new DynamicMessageFactory(GetDescriptorPool()); + global_message_factory->SetDelegateToGeneratedFactory(true); + + // Export our functions to Python. + PyObject *m; + m = Py_InitModule3(C("_net_proto2___python"), methods, C(module_docstring)); + if (m == NULL) { + return; + } + + AddConstants(m); + + CMessage_Type.tp_new = PyType_GenericNew; + if (PyType_Ready(&CMessage_Type) < 0) { + return; + } + + if (!InitDescriptor()) { + return; + } + + // Override {Get,Mutable}CProtoInsidePyProto. + GetCProtoInsidePyProtoPtr = GetCProtoInsidePyProtoImpl; + MutableCProtoInsidePyProtoPtr = MutableCProtoInsidePyProtoImpl; + } +} + +} // namespace python +} // namespace protobuf +} // namespace google diff --git a/python/google/protobuf/pyext/python_descriptor.cc b/python/google/protobuf/pyext/python_descriptor.cc new file mode 100644 index 00000000..fb87bad1 --- /dev/null +++ b/python/google/protobuf/pyext/python_descriptor.cc @@ -0,0 +1,334 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: petar@google.com (Petar Petrov) + +#include <Python.h> + +#include <google/protobuf/pyext/python_descriptor.h> +#include <google/protobuf/descriptor.pb.h> + +#define C(str) const_cast<char*>(str) + +namespace google { +namespace protobuf { +namespace python { + +static void CFieldDescriptorDealloc(CFieldDescriptor* self); + +static google::protobuf::DescriptorPool* g_descriptor_pool = NULL; + +static PyObject* CFieldDescriptor_GetFullName( + CFieldDescriptor* self, void *closure) { + Py_XINCREF(self->full_name); + return self->full_name; +} + +static PyObject* CFieldDescriptor_GetName( + CFieldDescriptor *self, void *closure) { + Py_XINCREF(self->name); + return self->name; +} + +static PyObject* CFieldDescriptor_GetCppType( + CFieldDescriptor *self, void *closure) { + Py_XINCREF(self->cpp_type); + return self->cpp_type; +} + +static PyObject* CFieldDescriptor_GetLabel( + CFieldDescriptor *self, void *closure) { + Py_XINCREF(self->label); + return self->label; +} + +static PyObject* CFieldDescriptor_GetID( + CFieldDescriptor *self, void *closure) { + Py_XINCREF(self->id); + return self->id; +} + + +static PyGetSetDef CFieldDescriptorGetters[] = { + { C("full_name"), + (getter)CFieldDescriptor_GetFullName, NULL, "Full name", NULL}, + { C("name"), + (getter)CFieldDescriptor_GetName, NULL, "last name", NULL}, + { C("cpp_type"), + (getter)CFieldDescriptor_GetCppType, NULL, "C++ Type", NULL}, + { C("label"), + (getter)CFieldDescriptor_GetLabel, NULL, "Label", NULL}, + { C("id"), + (getter)CFieldDescriptor_GetID, NULL, "ID", NULL}, + {NULL} +}; + +PyTypeObject CFieldDescriptor_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, + C("google3.net.google.protobuf.python.internal." + "_net_proto2___python." + "CFieldDescriptor"), // tp_name + sizeof(CFieldDescriptor), // tp_basicsize + 0, // tp_itemsize + (destructor)CFieldDescriptorDealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_compare + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + C("A Field Descriptor"), // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter + 0, // tp_iternext + 0, // tp_methods + 0, // tp_members + CFieldDescriptorGetters, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + PyType_GenericNew, // tp_new + PyObject_Del, // tp_free +}; + +static void CFieldDescriptorDealloc(CFieldDescriptor* self) { + Py_DECREF(self->full_name); + Py_DECREF(self->name); + Py_DECREF(self->cpp_type); + Py_DECREF(self->label); + Py_DECREF(self->id); + self->ob_type->tp_free(reinterpret_cast<PyObject*>(self)); +} + +typedef struct { + PyObject_HEAD + + const google::protobuf::DescriptorPool* pool; +} CDescriptorPool; + +static void CDescriptorPoolDealloc(CDescriptorPool* self); + +static PyObject* CDescriptorPool_NewCDescriptor( + const google::protobuf::FieldDescriptor* field_descriptor) { + CFieldDescriptor* cfield_descriptor = PyObject_New( + CFieldDescriptor, &CFieldDescriptor_Type); + if (cfield_descriptor == NULL) { + return NULL; + } + cfield_descriptor->descriptor = field_descriptor; + + cfield_descriptor->full_name = PyString_FromString( + field_descriptor->full_name().c_str()); + cfield_descriptor->name = PyString_FromString( + field_descriptor->name().c_str()); + cfield_descriptor->cpp_type = PyLong_FromLong(field_descriptor->cpp_type()); + cfield_descriptor->label = PyLong_FromLong(field_descriptor->label()); + cfield_descriptor->id = PyLong_FromVoidPtr(cfield_descriptor); + return reinterpret_cast<PyObject*>(cfield_descriptor); +} + +static PyObject* CDescriptorPool_FindFieldByName( + CDescriptorPool* self, PyObject* arg) { + const char* full_field_name = PyString_AsString(arg); + if (full_field_name == NULL) { + return NULL; + } + + const google::protobuf::FieldDescriptor* field_descriptor = NULL; + + field_descriptor = self->pool->FindFieldByName(full_field_name); + if (field_descriptor == NULL) { + PyErr_Format(PyExc_TypeError, "Couldn't find field %.200s", + full_field_name); + return NULL; + } + + return CDescriptorPool_NewCDescriptor(field_descriptor); +} + +static PyObject* CDescriptorPool_FindExtensionByName( + CDescriptorPool* self, PyObject* arg) { + const char* full_field_name = PyString_AsString(arg); + if (full_field_name == NULL) { + return NULL; + } + + const google::protobuf::FieldDescriptor* field_descriptor = + self->pool->FindExtensionByName(full_field_name); + if (field_descriptor == NULL) { + PyErr_Format(PyExc_TypeError, "Couldn't find field %.200s", + full_field_name); + return NULL; + } + + return CDescriptorPool_NewCDescriptor(field_descriptor); +} + +static PyMethodDef CDescriptorPoolMethods[] = { + { C("FindFieldByName"), + (PyCFunction)CDescriptorPool_FindFieldByName, + METH_O, + C("Searches for a field descriptor by full name.") }, + { C("FindExtensionByName"), + (PyCFunction)CDescriptorPool_FindExtensionByName, + METH_O, + C("Searches for extension descriptor by full name.") }, + {NULL} +}; + +PyTypeObject CDescriptorPool_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, + C("google3.net.google.protobuf.python.internal." + "_net_proto2___python." + "CFieldDescriptor"), // tp_name + sizeof(CDescriptorPool), // tp_basicsize + 0, // tp_itemsize + (destructor)CDescriptorPoolDealloc, // tp_dealloc + 0, // tp_print + 0, // tp_getattr + 0, // tp_setattr + 0, // tp_compare + 0, // tp_repr + 0, // tp_as_number + 0, // tp_as_sequence + 0, // tp_as_mapping + 0, // tp_hash + 0, // tp_call + 0, // tp_str + 0, // tp_getattro + 0, // tp_setattro + 0, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + C("A Descriptor Pool"), // tp_doc + 0, // tp_traverse + 0, // tp_clear + 0, // tp_richcompare + 0, // tp_weaklistoffset + 0, // tp_iter + 0, // tp_iternext + CDescriptorPoolMethods, // tp_methods + 0, // tp_members + 0, // tp_getset + 0, // tp_base + 0, // tp_dict + 0, // tp_descr_get + 0, // tp_descr_set + 0, // tp_dictoffset + 0, // tp_init + PyType_GenericAlloc, // tp_alloc + PyType_GenericNew, // tp_new + PyObject_Del, // tp_free +}; + +static void CDescriptorPoolDealloc(CDescriptorPool* self) { + self->ob_type->tp_free(reinterpret_cast<PyObject*>(self)); +} + +google::protobuf::DescriptorPool* GetDescriptorPool() { + if (g_descriptor_pool == NULL) { + g_descriptor_pool = new google::protobuf::DescriptorPool( + google::protobuf::DescriptorPool::generated_pool()); + } + return g_descriptor_pool; +} + +PyObject* Python_NewCDescriptorPool(PyObject* ignored, PyObject* args) { + CDescriptorPool* cdescriptor_pool = PyObject_New( + CDescriptorPool, &CDescriptorPool_Type); + if (cdescriptor_pool == NULL) { + return NULL; + } + cdescriptor_pool->pool = GetDescriptorPool(); + return reinterpret_cast<PyObject*>(cdescriptor_pool); +} + +PyObject* Python_BuildFile(PyObject* ignored, PyObject* arg) { + char* message_type; + Py_ssize_t message_len; + + if (PyString_AsStringAndSize(arg, &message_type, &message_len) < 0) { + return NULL; + } + + google::protobuf::FileDescriptorProto file_proto; + if (!file_proto.ParseFromArray(message_type, message_len)) { + PyErr_SetString(PyExc_TypeError, "Couldn't parse file content!"); + return NULL; + } + + // If this file is already in the generated pool, don't add it again. + if (google::protobuf::DescriptorPool::generated_pool()->FindFileByName( + file_proto.name()) != NULL) { + Py_RETURN_NONE; + } + + const google::protobuf::FileDescriptor* descriptor = GetDescriptorPool()->BuildFile( + file_proto); + if (descriptor == NULL) { + PyErr_SetString(PyExc_TypeError, + "Couldn't build proto file into descriptor pool!"); + return NULL; + } + + Py_RETURN_NONE; +} + +bool InitDescriptor() { + CFieldDescriptor_Type.tp_new = PyType_GenericNew; + if (PyType_Ready(&CFieldDescriptor_Type) < 0) + return false; + + CDescriptorPool_Type.tp_new = PyType_GenericNew; + if (PyType_Ready(&CDescriptorPool_Type) < 0) + return false; + return true; +} + +} // namespace python +} // namespace protobuf +} // namespace google diff --git a/python/google/protobuf/pyext/python_descriptor.h b/python/google/protobuf/pyext/python_descriptor.h new file mode 100644 index 00000000..5232680f --- /dev/null +++ b/python/google/protobuf/pyext/python_descriptor.h @@ -0,0 +1,87 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: petar@google.com (Petar Petrov) + +#ifndef GOOGLE_PROTOBUF_PYTHON_DESCRIPTOR_H__ +#define GOOGLE_PROTOBUF_PYTHON_DESCRIPTOR_H__ + +#include <Python.h> +#include <structmember.h> + +#include <google/protobuf/descriptor.h> + +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#endif + +namespace google { +namespace protobuf { +namespace python { + +typedef struct { + PyObject_HEAD + + // The proto2 descriptor that this object represents. + const google::protobuf::FieldDescriptor* descriptor; + + // Full name of the field (PyString). + PyObject* full_name; + + // Name of the field (PyString). + PyObject* name; + + // C++ type of the field (PyLong). + PyObject* cpp_type; + + // Name of the field (PyLong). + PyObject* label; + + // Identity of the descriptor (PyLong used as a poiner). + PyObject* id; +} CFieldDescriptor; + +extern PyTypeObject CFieldDescriptor_Type; + +extern PyTypeObject CDescriptorPool_Type; + + +PyObject* Python_NewCDescriptorPool(PyObject* ignored, PyObject* args); +PyObject* Python_BuildFile(PyObject* ignored, PyObject* args); +bool InitDescriptor(); +google::protobuf::DescriptorPool* GetDescriptorPool(); + +} // namespace python +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_PYTHON_DESCRIPTOR_H__ diff --git a/python/google/protobuf/pyext/python_protobuf.cc b/python/google/protobuf/pyext/python_protobuf.cc new file mode 100644 index 00000000..1b1ab5d1 --- /dev/null +++ b/python/google/protobuf/pyext/python_protobuf.cc @@ -0,0 +1,63 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: qrczak@google.com (Marcin Kowalczyk) + +#include <google/protobuf/pyext/python_protobuf.h> + +namespace google { +namespace protobuf { +namespace python { + +static const Message* GetCProtoInsidePyProtoStub(PyObject* msg) { + return NULL; +} +static Message* MutableCProtoInsidePyProtoStub(PyObject* msg) { + return NULL; +} + +// This is initialized with a default, stub implementation. +// If python-google.protobuf.cc is loaded, the function pointer is overridden +// with a full implementation. +const Message* (*GetCProtoInsidePyProtoPtr)(PyObject* msg) = + GetCProtoInsidePyProtoStub; +Message* (*MutableCProtoInsidePyProtoPtr)(PyObject* msg) = + MutableCProtoInsidePyProtoStub; + +const Message* GetCProtoInsidePyProto(PyObject* msg) { + return GetCProtoInsidePyProtoPtr(msg); +} +Message* MutableCProtoInsidePyProto(PyObject* msg) { + return MutableCProtoInsidePyProtoPtr(msg); +} + +} // namespace python +} // namespace protobuf +} // namespace google diff --git a/python/google/protobuf/pyext/python_protobuf.h b/python/google/protobuf/pyext/python_protobuf.h new file mode 100644 index 00000000..c5b0b1cd --- /dev/null +++ b/python/google/protobuf/pyext/python_protobuf.h @@ -0,0 +1,57 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: qrczak@google.com (Marcin Kowalczyk) +// +// This module exposes the C proto inside the given Python proto, in +// case the Python proto is implemented with a C proto. + +#ifndef GOOGLE_PROTOBUF_PYTHON_PYTHON_PROTOBUF_H__ +#define GOOGLE_PROTOBUF_PYTHON_PYTHON_PROTOBUF_H__ + +#include <Python.h> + +namespace google { +namespace protobuf { + +class Message; + +namespace python { + +// Return the pointer to the C proto inside the given Python proto, +// or NULL when this is not a Python proto implemented with a C proto. +const Message* GetCProtoInsidePyProto(PyObject* msg); +Message* MutableCProtoInsidePyProto(PyObject* msg); + +} // namespace python +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_PYTHON_PYTHON_PROTOBUF_H__ diff --git a/python/google/protobuf/reflection.py b/python/google/protobuf/reflection.py index 45e5ba88..1373c882 100755 --- a/python/google/protobuf/reflection.py +++ b/python/google/protobuf/reflection.py @@ -29,9 +29,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # This code is meant to work on Python 2.4 and above only. -# -# TODO(robinson): Helpers for verbose, common checks like seeing if a -# descriptor's cpp_type is CPPTYPE_MESSAGE. """Contains a metaclass and helper functions used to create protocol message classes from Descriptor objects at runtime. @@ -50,27 +47,22 @@ this file*. __author__ = 'robinson@google.com (Will Robinson)' -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO -import struct -import weakref -# We use "as" to avoid name collisions with variables. -from google.protobuf.internal import containers -from google.protobuf.internal import decoder -from google.protobuf.internal import encoder -from google.protobuf.internal import message_listener as message_listener_mod -from google.protobuf.internal import type_checkers -from google.protobuf.internal import wire_format +from google.protobuf.internal import api_implementation from google.protobuf import descriptor as descriptor_mod -from google.protobuf import message as message_mod -from google.protobuf import text_format - _FieldDescriptor = descriptor_mod.FieldDescriptor +if api_implementation.Type() == 'cpp': + from google.protobuf.internal import cpp_message + _NewMessage = cpp_message.NewMessage + _InitMessage = cpp_message.InitMessage +else: + from google.protobuf.internal import python_message + _NewMessage = python_message.NewMessage + _InitMessage = python_message.InitMessage + + class GeneratedProtocolMessageType(type): """Metaclass for protocol message classes created at runtime from Descriptors. @@ -120,10 +112,12 @@ class GeneratedProtocolMessageType(type): Newly-allocated class. """ descriptor = dictionary[GeneratedProtocolMessageType._DESCRIPTOR_KEY] - _AddSlots(descriptor, dictionary) - _AddClassAttributesForNestedExtensions(descriptor, dictionary) + _NewMessage(descriptor, dictionary) superclass = super(GeneratedProtocolMessageType, cls) - return superclass.__new__(cls, name, bases, dictionary) + + new_class = superclass.__new__(cls, name, bases, dictionary) + setattr(descriptor, '_concrete_class', new_class) + return new_class def __init__(cls, name, bases, dictionary): """Here we perform the majority of our work on the class. @@ -143,1018 +137,6 @@ class GeneratedProtocolMessageType(type): type. """ descriptor = dictionary[GeneratedProtocolMessageType._DESCRIPTOR_KEY] - - cls._decoders_by_tag = {} - cls._extensions_by_name = {} - cls._extensions_by_number = {} - if (descriptor.has_options and - descriptor.GetOptions().message_set_wire_format): - cls._decoders_by_tag[decoder.MESSAGE_SET_ITEM_TAG] = ( - decoder.MessageSetItemDecoder(cls._extensions_by_number)) - - # We act as a "friend" class of the descriptor, setting - # its _concrete_class attribute the first time we use a - # given descriptor to initialize a concrete protocol message - # class. We also attach stuff to each FieldDescriptor for quick - # lookup later on. - concrete_class_attr_name = '_concrete_class' - if not hasattr(descriptor, concrete_class_attr_name): - setattr(descriptor, concrete_class_attr_name, cls) - for field in descriptor.fields: - _AttachFieldHelpers(cls, field) - - _AddEnumValues(descriptor, cls) - _AddInitMethod(descriptor, cls) - _AddPropertiesForFields(descriptor, cls) - _AddPropertiesForExtensions(descriptor, cls) - _AddStaticMethods(cls) - _AddMessageMethods(descriptor, cls) - _AddPrivateHelperMethods(cls) + _InitMessage(descriptor, cls) superclass = super(GeneratedProtocolMessageType, cls) superclass.__init__(name, bases, dictionary) - - -# Stateless helpers for GeneratedProtocolMessageType below. -# Outside clients should not access these directly. -# -# I opted not to make any of these methods on the metaclass, to make it more -# clear that I'm not really using any state there and to keep clients from -# thinking that they have direct access to these construction helpers. - - -def _PropertyName(proto_field_name): - """Returns the name of the public property attribute which - clients can use to get and (in some cases) set the value - of a protocol message field. - - Args: - proto_field_name: The protocol message field name, exactly - as it appears (or would appear) in a .proto file. - """ - # TODO(robinson): Escape Python keywords (e.g., yield), and test this support. - # nnorwitz makes my day by writing: - # """ - # FYI. See the keyword module in the stdlib. This could be as simple as: - # - # if keyword.iskeyword(proto_field_name): - # return proto_field_name + "_" - # return proto_field_name - # """ - # Kenton says: The above is a BAD IDEA. People rely on being able to use - # getattr() and setattr() to reflectively manipulate field values. If we - # rename the properties, then every such user has to also make sure to apply - # the same transformation. Note that currently if you name a field "yield", - # you can still access it just fine using getattr/setattr -- it's not even - # that cumbersome to do so. - # TODO(kenton): Remove this method entirely if/when everyone agrees with my - # position. - return proto_field_name - - -def _VerifyExtensionHandle(message, extension_handle): - """Verify that the given extension handle is valid.""" - - if not isinstance(extension_handle, _FieldDescriptor): - raise KeyError('HasExtension() expects an extension handle, got: %s' % - extension_handle) - - if not extension_handle.is_extension: - raise KeyError('"%s" is not an extension.' % extension_handle.full_name) - - if extension_handle.containing_type is not message.DESCRIPTOR: - raise KeyError('Extension "%s" extends message type "%s", but this ' - 'message is of type "%s".' % - (extension_handle.full_name, - extension_handle.containing_type.full_name, - message.DESCRIPTOR.full_name)) - - -def _AddSlots(message_descriptor, dictionary): - """Adds a __slots__ entry to dictionary, containing the names of all valid - attributes for this message type. - - Args: - message_descriptor: A Descriptor instance describing this message type. - dictionary: Class dictionary to which we'll add a '__slots__' entry. - """ - dictionary['__slots__'] = ['_cached_byte_size', - '_cached_byte_size_dirty', - '_fields', - '_is_present_in_parent', - '_listener', - '_listener_for_children', - '__weakref__'] - - -def _IsMessageSetExtension(field): - return (field.is_extension and - field.containing_type.has_options and - field.containing_type.GetOptions().message_set_wire_format and - field.type == _FieldDescriptor.TYPE_MESSAGE and - field.message_type == field.extension_scope and - field.label == _FieldDescriptor.LABEL_OPTIONAL) - - -def _AttachFieldHelpers(cls, field_descriptor): - is_repeated = (field_descriptor.label == _FieldDescriptor.LABEL_REPEATED) - is_packed = (field_descriptor.has_options and - field_descriptor.GetOptions().packed) - - if _IsMessageSetExtension(field_descriptor): - field_encoder = encoder.MessageSetItemEncoder(field_descriptor.number) - sizer = encoder.MessageSetItemSizer(field_descriptor.number) - else: - field_encoder = type_checkers.TYPE_TO_ENCODER[field_descriptor.type]( - field_descriptor.number, is_repeated, is_packed) - sizer = type_checkers.TYPE_TO_SIZER[field_descriptor.type]( - field_descriptor.number, is_repeated, is_packed) - - field_descriptor._encoder = field_encoder - field_descriptor._sizer = sizer - field_descriptor._default_constructor = _DefaultValueConstructorForField( - field_descriptor) - - def AddDecoder(wiretype, is_packed): - tag_bytes = encoder.TagBytes(field_descriptor.number, wiretype) - cls._decoders_by_tag[tag_bytes] = ( - type_checkers.TYPE_TO_DECODER[field_descriptor.type]( - field_descriptor.number, is_repeated, is_packed, - field_descriptor, field_descriptor._default_constructor)) - - AddDecoder(type_checkers.FIELD_TYPE_TO_WIRE_TYPE[field_descriptor.type], - False) - - if is_repeated and wire_format.IsTypePackable(field_descriptor.type): - # To support wire compatibility of adding packed = true, add a decoder for - # packed values regardless of the field's options. - AddDecoder(wire_format.WIRETYPE_LENGTH_DELIMITED, True) - - -def _AddClassAttributesForNestedExtensions(descriptor, dictionary): - extension_dict = descriptor.extensions_by_name - for extension_name, extension_field in extension_dict.iteritems(): - assert extension_name not in dictionary - dictionary[extension_name] = extension_field - - -def _AddEnumValues(descriptor, cls): - """Sets class-level attributes for all enum fields defined in this message. - - Args: - descriptor: Descriptor object for this message type. - cls: Class we're constructing for this message type. - """ - for enum_type in descriptor.enum_types: - for enum_value in enum_type.values: - setattr(cls, enum_value.name, enum_value.number) - - -def _DefaultValueConstructorForField(field): - """Returns a function which returns a default value for a field. - - Args: - field: FieldDescriptor object for this field. - - The returned function has one argument: - message: Message instance containing this field, or a weakref proxy - of same. - - That function in turn returns a default value for this field. The default - value may refer back to |message| via a weak reference. - """ - - if field.label == _FieldDescriptor.LABEL_REPEATED: - if field.default_value != []: - raise ValueError('Repeated field default value not empty list: %s' % ( - field.default_value)) - if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - # We can't look at _concrete_class yet since it might not have - # been set. (Depends on order in which we initialize the classes). - message_type = field.message_type - def MakeRepeatedMessageDefault(message): - return containers.RepeatedCompositeFieldContainer( - message._listener_for_children, field.message_type) - return MakeRepeatedMessageDefault - else: - type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type) - def MakeRepeatedScalarDefault(message): - return containers.RepeatedScalarFieldContainer( - message._listener_for_children, type_checker) - return MakeRepeatedScalarDefault - - if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - # _concrete_class may not yet be initialized. - message_type = field.message_type - def MakeSubMessageDefault(message): - result = message_type._concrete_class() - result._SetListener(message._listener_for_children) - return result - return MakeSubMessageDefault - - def MakeScalarDefault(message): - return field.default_value - return MakeScalarDefault - - -def _AddInitMethod(message_descriptor, cls): - """Adds an __init__ method to cls.""" - fields = message_descriptor.fields - def init(self, **kwargs): - self._cached_byte_size = 0 - self._cached_byte_size_dirty = False - self._fields = {} - self._is_present_in_parent = False - self._listener = message_listener_mod.NullMessageListener() - self._listener_for_children = _Listener(self) - for field_name, field_value in kwargs.iteritems(): - field = _GetFieldByName(message_descriptor, field_name) - if field is None: - raise TypeError("%s() got an unexpected keyword argument '%s'" % - (message_descriptor.name, field_name)) - if field.label == _FieldDescriptor.LABEL_REPEATED: - copy = field._default_constructor(self) - if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: # Composite - for val in field_value: - copy.add().MergeFrom(val) - else: # Scalar - copy.extend(field_value) - self._fields[field] = copy - elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - copy = field._default_constructor(self) - copy.MergeFrom(field_value) - self._fields[field] = copy - else: - setattr(self, field_name, field_value) - - init.__module__ = None - init.__doc__ = None - cls.__init__ = init - - -def _GetFieldByName(message_descriptor, field_name): - """Returns a field descriptor by field name. - - Args: - message_descriptor: A Descriptor describing all fields in message. - field_name: The name of the field to retrieve. - Returns: - The field descriptor associated with the field name. - """ - try: - return message_descriptor.fields_by_name[field_name] - except KeyError: - raise ValueError('Protocol message has no "%s" field.' % field_name) - - -def _AddPropertiesForFields(descriptor, cls): - """Adds properties for all fields in this protocol message type.""" - for field in descriptor.fields: - _AddPropertiesForField(field, cls) - - if descriptor.is_extendable: - # _ExtensionDict is just an adaptor with no state so we allocate a new one - # every time it is accessed. - cls.Extensions = property(lambda self: _ExtensionDict(self)) - - -def _AddPropertiesForField(field, cls): - """Adds a public property for a protocol message field. - Clients can use this property to get and (in the case - of non-repeated scalar fields) directly set the value - of a protocol message field. - - Args: - field: A FieldDescriptor for this field. - cls: The class we're constructing. - """ - # Catch it if we add other types that we should - # handle specially here. - assert _FieldDescriptor.MAX_CPPTYPE == 10 - - constant_name = field.name.upper() + "_FIELD_NUMBER" - setattr(cls, constant_name, field.number) - - if field.label == _FieldDescriptor.LABEL_REPEATED: - _AddPropertiesForRepeatedField(field, cls) - elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - _AddPropertiesForNonRepeatedCompositeField(field, cls) - else: - _AddPropertiesForNonRepeatedScalarField(field, cls) - - -def _AddPropertiesForRepeatedField(field, cls): - """Adds a public property for a "repeated" protocol message field. Clients - can use this property to get the value of the field, which will be either a - _RepeatedScalarFieldContainer or _RepeatedCompositeFieldContainer (see - below). - - Note that when clients add values to these containers, we perform - type-checking in the case of repeated scalar fields, and we also set any - necessary "has" bits as a side-effect. - - Args: - field: A FieldDescriptor for this field. - cls: The class we're constructing. - """ - proto_field_name = field.name - property_name = _PropertyName(proto_field_name) - - def getter(self): - field_value = self._fields.get(field) - if field_value is None: - # Construct a new object to represent this field. - field_value = field._default_constructor(self) - - # Atomically check if another thread has preempted us and, if not, swap - # in the new object we just created. If someone has preempted us, we - # take that object and discard ours. - # WARNING: We are relying on setdefault() being atomic. This is true - # in CPython but we haven't investigated others. This warning appears - # in several other locations in this file. - field_value = self._fields.setdefault(field, field_value) - return field_value - getter.__module__ = None - getter.__doc__ = 'Getter for %s.' % proto_field_name - - # We define a setter just so we can throw an exception with a more - # helpful error message. - def setter(self, new_value): - raise AttributeError('Assignment not allowed to repeated field ' - '"%s" in protocol message object.' % proto_field_name) - - doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name - setattr(cls, property_name, property(getter, setter, doc=doc)) - - -def _AddPropertiesForNonRepeatedScalarField(field, cls): - """Adds a public property for a nonrepeated, scalar protocol message field. - Clients can use this property to get and directly set the value of the field. - Note that when the client sets the value of a field by using this property, - all necessary "has" bits are set as a side-effect, and we also perform - type-checking. - - Args: - field: A FieldDescriptor for this field. - cls: The class we're constructing. - """ - proto_field_name = field.name - property_name = _PropertyName(proto_field_name) - type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type) - default_value = field.default_value - - def getter(self): - return self._fields.get(field, default_value) - getter.__module__ = None - getter.__doc__ = 'Getter for %s.' % proto_field_name - def setter(self, new_value): - type_checker.CheckValue(new_value) - self._fields[field] = new_value - # Check _cached_byte_size_dirty inline to improve performance, since scalar - # setters are called frequently. - if not self._cached_byte_size_dirty: - self._Modified() - setter.__module__ = None - setter.__doc__ = 'Setter for %s.' % proto_field_name - - # Add a property to encapsulate the getter/setter. - doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name - setattr(cls, property_name, property(getter, setter, doc=doc)) - - -def _AddPropertiesForNonRepeatedCompositeField(field, cls): - """Adds a public property for a nonrepeated, composite protocol message field. - A composite field is a "group" or "message" field. - - Clients can use this property to get the value of the field, but cannot - assign to the property directly. - - Args: - field: A FieldDescriptor for this field. - cls: The class we're constructing. - """ - # TODO(robinson): Remove duplication with similar method - # for non-repeated scalars. - proto_field_name = field.name - property_name = _PropertyName(proto_field_name) - message_type = field.message_type - - def getter(self): - field_value = self._fields.get(field) - if field_value is None: - # Construct a new object to represent this field. - field_value = message_type._concrete_class() - field_value._SetListener(self._listener_for_children) - - # Atomically check if another thread has preempted us and, if not, swap - # in the new object we just created. If someone has preempted us, we - # take that object and discard ours. - # WARNING: We are relying on setdefault() being atomic. This is true - # in CPython but we haven't investigated others. This warning appears - # in several other locations in this file. - field_value = self._fields.setdefault(field, field_value) - return field_value - getter.__module__ = None - getter.__doc__ = 'Getter for %s.' % proto_field_name - - # We define a setter just so we can throw an exception with a more - # helpful error message. - def setter(self, new_value): - raise AttributeError('Assignment not allowed to composite field ' - '"%s" in protocol message object.' % proto_field_name) - - # Add a property to encapsulate the getter. - doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name - setattr(cls, property_name, property(getter, setter, doc=doc)) - - -def _AddPropertiesForExtensions(descriptor, cls): - """Adds properties for all fields in this protocol message type.""" - extension_dict = descriptor.extensions_by_name - for extension_name, extension_field in extension_dict.iteritems(): - constant_name = extension_name.upper() + "_FIELD_NUMBER" - setattr(cls, constant_name, extension_field.number) - - -def _AddStaticMethods(cls): - # TODO(robinson): This probably needs to be thread-safe(?) - def RegisterExtension(extension_handle): - extension_handle.containing_type = cls.DESCRIPTOR - _AttachFieldHelpers(cls, extension_handle) - - # Try to insert our extension, failing if an extension with the same number - # already exists. - actual_handle = cls._extensions_by_number.setdefault( - extension_handle.number, extension_handle) - if actual_handle is not extension_handle: - raise AssertionError( - 'Extensions "%s" and "%s" both try to extend message type "%s" with ' - 'field number %d.' % - (extension_handle.full_name, actual_handle.full_name, - cls.DESCRIPTOR.full_name, extension_handle.number)) - - cls._extensions_by_name[extension_handle.full_name] = extension_handle - - handle = extension_handle # avoid line wrapping - if _IsMessageSetExtension(handle): - # MessageSet extension. Also register under type name. - cls._extensions_by_name[ - extension_handle.message_type.full_name] = extension_handle - - cls.RegisterExtension = staticmethod(RegisterExtension) - - def FromString(s): - message = cls() - message.MergeFromString(s) - return message - cls.FromString = staticmethod(FromString) - - -def _IsPresent(item): - """Given a (FieldDescriptor, value) tuple from _fields, return true if the - value should be included in the list returned by ListFields().""" - - if item[0].label == _FieldDescriptor.LABEL_REPEATED: - return bool(item[1]) - elif item[0].cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - return item[1]._is_present_in_parent - else: - return True - - -def _AddListFieldsMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - - def ListFields(self): - all_fields = [item for item in self._fields.iteritems() if _IsPresent(item)] - all_fields.sort(key = lambda item: item[0].number) - return all_fields - - cls.ListFields = ListFields - - -def _AddHasFieldMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - - singular_fields = {} - for field in message_descriptor.fields: - if field.label != _FieldDescriptor.LABEL_REPEATED: - singular_fields[field.name] = field - - def HasField(self, field_name): - try: - field = singular_fields[field_name] - except KeyError: - raise ValueError( - 'Protocol message has no singular "%s" field.' % field_name) - - if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - value = self._fields.get(field) - return value is not None and value._is_present_in_parent - else: - return field in self._fields - cls.HasField = HasField - - -def _AddClearFieldMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - def ClearField(self, field_name): - try: - field = message_descriptor.fields_by_name[field_name] - except KeyError: - raise ValueError('Protocol message has no "%s" field.' % field_name) - - if field in self._fields: - # Note: If the field is a sub-message, its listener will still point - # at us. That's fine, because the worst than can happen is that it - # will call _Modified() and invalidate our byte size. Big deal. - del self._fields[field] - - # Always call _Modified() -- even if nothing was changed, this is - # a mutating method, and thus calling it should cause the field to become - # present in the parent message. - self._Modified() - - cls.ClearField = ClearField - - -def _AddClearExtensionMethod(cls): - """Helper for _AddMessageMethods().""" - def ClearExtension(self, extension_handle): - _VerifyExtensionHandle(self, extension_handle) - - # Similar to ClearField(), above. - if extension_handle in self._fields: - del self._fields[extension_handle] - self._Modified() - cls.ClearExtension = ClearExtension - - -def _AddClearMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - def Clear(self): - # Clear fields. - self._fields = {} - self._Modified() - cls.Clear = Clear - - -def _AddHasExtensionMethod(cls): - """Helper for _AddMessageMethods().""" - def HasExtension(self, extension_handle): - _VerifyExtensionHandle(self, extension_handle) - if extension_handle.label == _FieldDescriptor.LABEL_REPEATED: - raise KeyError('"%s" is repeated.' % extension_handle.full_name) - - if extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - value = self._fields.get(extension_handle) - return value is not None and value._is_present_in_parent - else: - return extension_handle in self._fields - cls.HasExtension = HasExtension - - -def _AddEqualsMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - def __eq__(self, other): - if (not isinstance(other, message_mod.Message) or - other.DESCRIPTOR != self.DESCRIPTOR): - return False - - if self is other: - return True - - return self.ListFields() == other.ListFields() - - cls.__eq__ = __eq__ - - -def _AddStrMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - def __str__(self): - return text_format.MessageToString(self) - cls.__str__ = __str__ - - -def _AddSetListenerMethod(cls): - """Helper for _AddMessageMethods().""" - def SetListener(self, listener): - if listener is None: - self._listener = message_listener_mod.NullMessageListener() - else: - self._listener = listener - cls._SetListener = SetListener - - -def _BytesForNonRepeatedElement(value, field_number, field_type): - """Returns the number of bytes needed to serialize a non-repeated element. - The returned byte count includes space for tag information and any - other additional space associated with serializing value. - - Args: - value: Value we're serializing. - field_number: Field number of this value. (Since the field number - is stored as part of a varint-encoded tag, this has an impact - on the total bytes required to serialize the value). - field_type: The type of the field. One of the TYPE_* constants - within FieldDescriptor. - """ - try: - fn = type_checkers.TYPE_TO_BYTE_SIZE_FN[field_type] - return fn(field_number, value) - except KeyError: - raise message_mod.EncodeError('Unrecognized field type: %d' % field_type) - - -def _AddByteSizeMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - - def ByteSize(self): - if not self._cached_byte_size_dirty: - return self._cached_byte_size - - size = 0 - for field_descriptor, field_value in self.ListFields(): - size += field_descriptor._sizer(field_value) - - self._cached_byte_size = size - self._cached_byte_size_dirty = False - self._listener_for_children.dirty = False - return size - - cls.ByteSize = ByteSize - - -def _AddSerializeToStringMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - - def SerializeToString(self): - # Check if the message has all of its required fields set. - errors = [] - if not self.IsInitialized(): - raise message_mod.EncodeError( - 'Message is missing required fields: ' + - ','.join(self.FindInitializationErrors())) - return self.SerializePartialToString() - cls.SerializeToString = SerializeToString - - -def _AddSerializePartialToStringMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - - def SerializePartialToString(self): - out = StringIO() - self._InternalSerialize(out.write) - return out.getvalue() - cls.SerializePartialToString = SerializePartialToString - - def InternalSerialize(self, write_bytes): - for field_descriptor, field_value in self.ListFields(): - field_descriptor._encoder(write_bytes, field_value) - cls._InternalSerialize = InternalSerialize - - -def _AddMergeFromStringMethod(message_descriptor, cls): - """Helper for _AddMessageMethods().""" - def MergeFromString(self, serialized): - length = len(serialized) - try: - if self._InternalParse(serialized, 0, length) != length: - # The only reason _InternalParse would return early is if it - # encountered an end-group tag. - raise message_mod.DecodeError('Unexpected end-group tag.') - except IndexError: - raise message_mod.DecodeError('Truncated message.') - except struct.error, e: - raise message_mod.DecodeError(e) - return length # Return this for legacy reasons. - cls.MergeFromString = MergeFromString - - local_ReadTag = decoder.ReadTag - local_SkipField = decoder.SkipField - decoders_by_tag = cls._decoders_by_tag - - def InternalParse(self, buffer, pos, end): - self._Modified() - field_dict = self._fields - while pos != end: - (tag_bytes, new_pos) = local_ReadTag(buffer, pos) - field_decoder = decoders_by_tag.get(tag_bytes) - if field_decoder is None: - new_pos = local_SkipField(buffer, new_pos, end, tag_bytes) - if new_pos == -1: - return pos - pos = new_pos - else: - pos = field_decoder(buffer, new_pos, end, self, field_dict) - return pos - cls._InternalParse = InternalParse - - -def _AddIsInitializedMethod(message_descriptor, cls): - """Adds the IsInitialized and FindInitializationError methods to the - protocol message class.""" - - required_fields = [field for field in message_descriptor.fields - if field.label == _FieldDescriptor.LABEL_REQUIRED] - - def IsInitialized(self, errors=None): - """Checks if all required fields of a message are set. - - Args: - errors: A list which, if provided, will be populated with the field - paths of all missing required fields. - - Returns: - True iff the specified message has all required fields set. - """ - - # Performance is critical so we avoid HasField() and ListFields(). - - for field in required_fields: - if (field not in self._fields or - (field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE and - not self._fields[field]._is_present_in_parent)): - if errors is not None: - errors.extend(self.FindInitializationErrors()) - return False - - for field, value in self._fields.iteritems(): - if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - if field.label == _FieldDescriptor.LABEL_REPEATED: - for element in value: - if not element.IsInitialized(): - if errors is not None: - errors.extend(self.FindInitializationErrors()) - return False - elif value._is_present_in_parent and not value.IsInitialized(): - if errors is not None: - errors.extend(self.FindInitializationErrors()) - return False - - return True - - cls.IsInitialized = IsInitialized - - def FindInitializationErrors(self): - """Finds required fields which are not initialized. - - Returns: - A list of strings. Each string is a path to an uninitialized field from - the top-level message, e.g. "foo.bar[5].baz". - """ - - errors = [] # simplify things - - for field in required_fields: - if not self.HasField(field.name): - errors.append(field.name) - - for field, value in self.ListFields(): - if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - if field.is_extension: - name = "(%s)" % field.full_name - else: - name = field.name - - if field.label == _FieldDescriptor.LABEL_REPEATED: - for i in xrange(len(value)): - element = value[i] - prefix = "%s[%d]." % (name, i) - sub_errors = element.FindInitializationErrors() - errors += [ prefix + error for error in sub_errors ] - else: - prefix = name + "." - sub_errors = value.FindInitializationErrors() - errors += [ prefix + error for error in sub_errors ] - - return errors - - cls.FindInitializationErrors = FindInitializationErrors - - -def _AddMergeFromMethod(cls): - LABEL_REPEATED = _FieldDescriptor.LABEL_REPEATED - CPPTYPE_MESSAGE = _FieldDescriptor.CPPTYPE_MESSAGE - - def MergeFrom(self, msg): - if not isinstance(msg, cls): - raise TypeError( - "Parameter to MergeFrom() must be instance of same class.") - - assert msg is not self - self._Modified() - - fields = self._fields - - for field, value in msg._fields.iteritems(): - if field.label == LABEL_REPEATED: - field_value = fields.get(field) - if field_value is None: - # Construct a new object to represent this field. - field_value = field._default_constructor(self) - fields[field] = field_value - field_value.MergeFrom(value) - elif field.cpp_type == CPPTYPE_MESSAGE: - if value._is_present_in_parent: - field_value = fields.get(field) - if field_value is None: - # Construct a new object to represent this field. - field_value = field._default_constructor(self) - fields[field] = field_value - field_value.MergeFrom(value) - else: - self._fields[field] = value - cls.MergeFrom = MergeFrom - - -def _AddMessageMethods(message_descriptor, cls): - """Adds implementations of all Message methods to cls.""" - _AddListFieldsMethod(message_descriptor, cls) - _AddHasFieldMethod(message_descriptor, cls) - _AddClearFieldMethod(message_descriptor, cls) - if message_descriptor.is_extendable: - _AddClearExtensionMethod(cls) - _AddHasExtensionMethod(cls) - _AddClearMethod(message_descriptor, cls) - _AddEqualsMethod(message_descriptor, cls) - _AddStrMethod(message_descriptor, cls) - _AddSetListenerMethod(cls) - _AddByteSizeMethod(message_descriptor, cls) - _AddSerializeToStringMethod(message_descriptor, cls) - _AddSerializePartialToStringMethod(message_descriptor, cls) - _AddMergeFromStringMethod(message_descriptor, cls) - _AddIsInitializedMethod(message_descriptor, cls) - _AddMergeFromMethod(cls) - - -def _AddPrivateHelperMethods(cls): - """Adds implementation of private helper methods to cls.""" - - def Modified(self): - """Sets the _cached_byte_size_dirty bit to true, - and propagates this to our listener iff this was a state change. - """ - - # Note: Some callers check _cached_byte_size_dirty before calling - # _Modified() as an extra optimization. So, if this method is ever - # changed such that it does stuff even when _cached_byte_size_dirty is - # already true, the callers need to be updated. - if not self._cached_byte_size_dirty: - self._cached_byte_size_dirty = True - self._listener_for_children.dirty = True - self._is_present_in_parent = True - self._listener.Modified() - - cls._Modified = Modified - cls.SetInParent = Modified - - -class _Listener(object): - - """MessageListener implementation that a parent message registers with its - child message. - - In order to support semantics like: - - foo.bar.baz.qux = 23 - assert foo.HasField('bar') - - ...child objects must have back references to their parents. - This helper class is at the heart of this support. - """ - - def __init__(self, parent_message): - """Args: - parent_message: The message whose _Modified() method we should call when - we receive Modified() messages. - """ - # This listener establishes a back reference from a child (contained) object - # to its parent (containing) object. We make this a weak reference to avoid - # creating cyclic garbage when the client finishes with the 'parent' object - # in the tree. - if isinstance(parent_message, weakref.ProxyType): - self._parent_message_weakref = parent_message - else: - self._parent_message_weakref = weakref.proxy(parent_message) - - # As an optimization, we also indicate directly on the listener whether - # or not the parent message is dirty. This way we can avoid traversing - # up the tree in the common case. - self.dirty = False - - def Modified(self): - if self.dirty: - return - try: - # Propagate the signal to our parents iff this is the first field set. - self._parent_message_weakref._Modified() - except ReferenceError: - # We can get here if a client has kept a reference to a child object, - # and is now setting a field on it, but the child's parent has been - # garbage-collected. This is not an error. - pass - - -# TODO(robinson): Move elsewhere? This file is getting pretty ridiculous... -# TODO(robinson): Unify error handling of "unknown extension" crap. -# TODO(robinson): Support iteritems()-style iteration over all -# extensions with the "has" bits turned on? -class _ExtensionDict(object): - - """Dict-like container for supporting an indexable "Extensions" - field on proto instances. - - Note that in all cases we expect extension handles to be - FieldDescriptors. - """ - - def __init__(self, extended_message): - """extended_message: Message instance for which we are the Extensions dict. - """ - - self._extended_message = extended_message - - def __getitem__(self, extension_handle): - """Returns the current value of the given extension handle.""" - - _VerifyExtensionHandle(self._extended_message, extension_handle) - - result = self._extended_message._fields.get(extension_handle) - if result is not None: - return result - - if extension_handle.label == _FieldDescriptor.LABEL_REPEATED: - result = extension_handle._default_constructor(self._extended_message) - elif extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: - result = extension_handle.message_type._concrete_class() - try: - result._SetListener(self._extended_message._listener_for_children) - except ReferenceError: - pass - else: - # Singular scalar -- just return the default without inserting into the - # dict. - return extension_handle.default_value - - # Atomically check if another thread has preempted us and, if not, swap - # in the new object we just created. If someone has preempted us, we - # take that object and discard ours. - # WARNING: We are relying on setdefault() being atomic. This is true - # in CPython but we haven't investigated others. This warning appears - # in several other locations in this file. - result = self._extended_message._fields.setdefault( - extension_handle, result) - - return result - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return False - - my_fields = self._extended_message.ListFields() - other_fields = other._extended_message.ListFields() - - # Get rid of non-extension fields. - my_fields = [ field for field in my_fields if field.is_extension ] - other_fields = [ field for field in other_fields if field.is_extension ] - - return my_fields == other_fields - - def __ne__(self, other): - return not self == other - - # Note that this is only meaningful for non-repeated, scalar extension - # fields. Note also that we may have to call _Modified() when we do - # successfully set a field this way, to set any necssary "has" bits in the - # ancestors of the extended message. - def __setitem__(self, extension_handle, value): - """If extension_handle specifies a non-repeated, scalar extension - field, sets the value of that field. - """ - - _VerifyExtensionHandle(self._extended_message, extension_handle) - - if (extension_handle.label == _FieldDescriptor.LABEL_REPEATED or - extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE): - raise TypeError( - 'Cannot assign to extension "%s" because it is a repeated or ' - 'composite type.' % extension_handle.full_name) - - # It's slightly wasteful to lookup the type checker each time, - # but we expect this to be a vanishingly uncommon case anyway. - type_checker = type_checkers.GetTypeChecker( - extension_handle.cpp_type, extension_handle.type) - type_checker.CheckValue(value) - self._extended_message._fields[extension_handle] = value - self._extended_message._Modified() - - def _FindExtensionByName(self, name): - """Tries to find a known extension with the specified name. - - Args: - name: Extension full name. - - Returns: - Extension field descriptor. - """ - return self._extended_message._extensions_by_name.get(name, None) diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index cc6ac902..6d77b543 100755 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -53,24 +53,26 @@ class ParseError(Exception): """Thrown in case of ASCII parsing error.""" -def MessageToString(message): +def MessageToString(message, as_utf8=False, as_one_line=False): out = cStringIO.StringIO() - PrintMessage(message, out) + PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line) result = out.getvalue() out.close() + if as_one_line: + return result.rstrip() return result -def PrintMessage(message, out, indent = 0): +def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False): for field, value in message.ListFields(): if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: for element in value: - PrintField(field, element, out, indent) + PrintField(field, element, out, indent, as_utf8, as_one_line) else: - PrintField(field, value, out, indent) + PrintField(field, value, out, indent, as_utf8, as_one_line) -def PrintField(field, value, out, indent = 0): +def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False): """Print a single field name/value pair. For repeated fields, the value should be a single element.""" @@ -96,23 +98,35 @@ def PrintField(field, value, out, indent = 0): # don't include it. out.write(': ') - PrintFieldValue(field, value, out, indent) - out.write('\n') + PrintFieldValue(field, value, out, indent, as_utf8, as_one_line) + if as_one_line: + out.write(' ') + else: + out.write('\n') -def PrintFieldValue(field, value, out, indent = 0): +def PrintFieldValue(field, value, out, indent=0, + as_utf8=False, as_one_line=False): """Print a single field value (not including name). For repeated fields, the value should be a single element.""" if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: - out.write(' {\n') - PrintMessage(value, out, indent + 2) - out.write(' ' * indent + '}') + if as_one_line: + out.write(' { ') + PrintMessage(value, out, indent, as_utf8, as_one_line) + out.write('}') + else: + out.write(' {\n') + PrintMessage(value, out, indent + 2, as_utf8, as_one_line) + out.write(' ' * indent + '}') elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: out.write(field.enum_type.values_by_number[value].name) elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: out.write('\"') - out.write(_CEscape(value)) + if type(value) is unicode: + out.write(_CEscape(value.encode('utf-8'), as_utf8)) + else: + out.write(_CEscape(value, as_utf8)) out.write('\"') elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: if value: @@ -334,10 +348,10 @@ class _Tokenizer(object): Returns: True iff the end was reached. """ - return not self._lines and not self._current_line + return self.token == '' def _PopLine(self): - while not self._current_line: + while len(self._current_line) <= self._column: if not self._lines: self._current_line = '' return @@ -348,11 +362,10 @@ class _Tokenizer(object): def _SkipWhitespace(self): while True: self._PopLine() - match = re.match(self._WHITESPACE, self._current_line) + match = self._WHITESPACE.match(self._current_line, self._column) if not match: break length = len(match.group(0)) - self._current_line = self._current_line[length:] self._column += length def TryConsume(self, token): @@ -402,7 +415,7 @@ class _Tokenizer(object): ParseError: If an identifier couldn't be consumed. """ result = self.token - if not re.match(self._IDENTIFIER, result): + if not self._IDENTIFIER.match(result): raise self._ParseError('Expected identifier.') self.NextToken() return result @@ -481,13 +494,13 @@ class _Tokenizer(object): ParseError: If a floating point number couldn't be consumed. """ text = self.token - if re.match(self._FLOAT_INFINITY, text): + if self._FLOAT_INFINITY.match(text): self.NextToken() if text.startswith('-'): return -_INFINITY return _INFINITY - if re.match(self._FLOAT_NAN, text): + if self._FLOAT_NAN.match(text): self.NextToken() return _NAN @@ -507,10 +520,10 @@ class _Tokenizer(object): Raises: ParseError: If a boolean value couldn't be consumed. """ - if self.token == 'true': + if self.token in ('true', 't', '1'): self.NextToken() return True - elif self.token == 'false': + elif self.token in ('false', 'f', '0'): self.NextToken() return False else: @@ -525,7 +538,11 @@ class _Tokenizer(object): Raises: ParseError: If a string value couldn't be consumed. """ - return unicode(self.ConsumeByteString(), 'utf-8') + bytes = self.ConsumeByteString() + try: + return unicode(bytes, 'utf-8') + except UnicodeDecodeError, e: + raise self._StringParseError(e) def ConsumeByteString(self): """Consumes a byte array value. @@ -609,7 +626,7 @@ class _Tokenizer(object): def _ParseError(self, message): """Creates and *returns* a ParseError for the current token.""" return ParseError('%d:%d : %s' % ( - self._line + 1, self._column + 1, message)) + self._line + 1, self._column - len(self.token) + 1, message)) def _IntegerParseError(self, e): return self._ParseError('Couldn\'t parse integer: ' + str(e)) @@ -617,27 +634,27 @@ class _Tokenizer(object): def _FloatParseError(self, e): return self._ParseError('Couldn\'t parse number: ' + str(e)) + def _StringParseError(self, e): + return self._ParseError('Couldn\'t parse string: ' + str(e)) + def NextToken(self): """Reads the next meaningful token.""" self._previous_line = self._line self._previous_column = self._column - if self.AtEnd(): - self.token = '' - return + self._column += len(self.token) + self._SkipWhitespace() - # Make sure there is data to work on. - self._PopLine() + if not self._lines and len(self._current_line) <= self._column: + self.token = '' + return - match = re.match(self._TOKEN, self._current_line) + match = self._TOKEN.match(self._current_line, self._column) if match: token = match.group(0) - self._current_line = self._current_line[len(token):] self.token = token else: - self.token = self._current_line[0] - self._current_line = self._current_line[1:] - self._SkipWhitespace() + self.token = self._current_line[self._column] # text.encode('string_escape') does not seem to satisfy our needs as it @@ -645,7 +662,7 @@ class _Tokenizer(object): # C++ unescaping function allows hex escapes to be any length. So, # "\0011".encode('string_escape') ends up being "\\x011", which will be # decoded in C++ as a single-character string with char code 0x11. -def _CEscape(text): +def _CEscape(text, as_utf8): def escape(c): o = ord(c) if o == 10: return r"\n" # optional escape @@ -656,12 +673,13 @@ def _CEscape(text): if o == 34: return r'\"' # necessary escape if o == 92: return r"\\" # necessary escape - if o >= 127 or o < 32: return "\\%03o" % o # necessary escapes + # necessary escapes + if not as_utf8 and (o >= 127 or o < 32): return "\\%03o" % o return c return "".join([escape(c) for c in text]) -_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-f-A-F])') +_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-fA-F])') def _CUnescape(text): diff --git a/python/setup.py b/python/setup.py index 81887300..47646a81 100755 --- a/python/setup.py +++ b/python/setup.py @@ -7,7 +7,7 @@ from ez_setup import use_setuptools use_setuptools() -from setuptools import setup +from setuptools import setup, Extension from distutils.spawn import find_executable import sys import os @@ -60,6 +60,7 @@ def MakeTestSuite(): del sys.modules['google'] generate_proto("../src/google/protobuf/unittest.proto") + generate_proto("../src/google/protobuf/unittest_custom_options.proto") generate_proto("../src/google/protobuf/unittest_import.proto") generate_proto("../src/google/protobuf/unittest_mset.proto") generate_proto("../src/google/protobuf/unittest_no_generic_services.proto") @@ -94,24 +95,39 @@ if __name__ == '__main__': for (dirpath, dirnames, filenames) in os.walk("."): for filename in filenames: filepath = os.path.join(dirpath, filename) - if filepath.endswith("_pb2.py") or filepath.endswith(".pyc"): + if filepath.endswith("_pb2.py") or filepath.endswith(".pyc") or \ + filepath.endswith(".so") or filepath.endswith(".o"): os.remove(filepath) else: # Generate necessary .proto file if it doesn't exist. # TODO(kenton): Maybe we should hook this into a distutils command? generate_proto("../src/google/protobuf/descriptor.proto") + python_c_extension = Extension("google.protobuf.internal._net_proto2___python", + [ "google/protobuf/pyext/python_descriptor.cc", + "google/protobuf/pyext/python_protobuf.cc", + "google/protobuf/pyext/python-proto2.cc", + ], + include_dirs = [ "../src", ".", ], + libraries = [ "protobuf" ], + runtime_library_dirs = [ "../src/.libs" ], + library_dirs = [ "../src/.libs" ], + ) + setup(name = 'protobuf', - version = '2.3.1-pre', + version = '2.4.0-pre', packages = [ 'google' ], namespace_packages = [ 'google' ], test_suite = 'setup.MakeTestSuite', # Must list modules explicitly so that we don't install tests. py_modules = [ + 'google.protobuf.internal.api_implementation', 'google.protobuf.internal.containers', + 'google.protobuf.internal.cpp_message', 'google.protobuf.internal.decoder', 'google.protobuf.internal.encoder', 'google.protobuf.internal.message_listener', + 'google.protobuf.internal.python_message', 'google.protobuf.internal.type_checkers', 'google.protobuf.internal.wire_format', 'google.protobuf.descriptor', @@ -121,6 +137,7 @@ if __name__ == '__main__': 'google.protobuf.service', 'google.protobuf.service_reflection', 'google.protobuf.text_format' ], + ext_modules = [ python_c_extension ], url = 'http://code.google.com/p/protobuf/', maintainer = maintainer_email, maintainer_email = 'protobuf@googlegroups.com', |