From 40ee551715c3a784ea6132dbf604b0e665ca2def Mon Sep 17 00:00:00 2001 From: temporal Date: Thu, 10 Jul 2008 02:12:20 +0000 Subject: Initial checkin. --- src/google/protobuf/dynamic_message.cc | 475 +++++++++++++++++++++++++++++++++ 1 file changed, 475 insertions(+) create mode 100644 src/google/protobuf/dynamic_message.cc (limited to 'src/google/protobuf/dynamic_message.cc') diff --git a/src/google/protobuf/dynamic_message.cc b/src/google/protobuf/dynamic_message.cc new file mode 100644 index 00000000..43e2451e --- /dev/null +++ b/src/google/protobuf/dynamic_message.cc @@ -0,0 +1,475 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// DynamicMessage is implemented by constructing a data structure which +// has roughly the same memory layout as a generated message would have. +// Then, we use GeneratedMessageReflection to implement our reflection +// interface. All the other operations we need to implement (e.g. +// parsing, copying, etc.) are already implemented in terms of +// Message::Reflection, so the rest is easy. +// +// The up side of this strategy is that it's very efficient. We don't +// need to use hash_maps or generic representations of fields. The +// down side is that this is a low-level memory management hack which +// can be tricky to get right. +// +// As mentioned in the header, we only expose a DynamicMessageFactory +// publicly, not the DynamicMessage class itself. This is because +// GenericMessageReflection wants to have a pointer to a "default" +// copy of the class, with all fields initialized to their default +// values. We only want to construct one of these per message type, +// so DynamicMessageFactory stores a cache of default messages for +// each type it sees (each unique Descriptor pointer). The code +// refers to the "default" copy of the class as the "prototype". +// +// Note on memory allocation: This module often calls "operator new()" +// to allocate untyped memory, rather than calling something like +// "new uint8[]". This is because "operator new()" means "Give me some +// space which I can use as I please." while "new uint8[]" means "Give +// me an array of 8-bit integers.". In practice, the later may return +// a pointer that is not aligned correctly for general use. I believe +// Item 8 of "More Effective C++" discusses this in more detail, though +// I don't have the book on me right now so I'm not sure. + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace google { +namespace protobuf { + +using internal::WireFormat; +using internal::ExtensionSet; +using internal::GeneratedMessageReflection; +using internal::GenericRepeatedField; + + +// =================================================================== +// Some helper tables and functions... + +namespace { + +// Compute the byte size of the in-memory representation of the field. +int FieldSpaceUsed(const FieldDescriptor* field) { + typedef FieldDescriptor FD; // avoid line wrapping + if (field->label() == FD::LABEL_REPEATED) { + switch (field->cpp_type()) { + case FD::CPPTYPE_INT32 : return sizeof(RepeatedField); + case FD::CPPTYPE_INT64 : return sizeof(RepeatedField); + case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField); + case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField); + case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField); + case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField); + case FD::CPPTYPE_BOOL : return sizeof(RepeatedField); + case FD::CPPTYPE_ENUM : return sizeof(RepeatedField); + case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField); + + case FD::CPPTYPE_STRING: + return sizeof(RepeatedPtrField); + break; + } + } else { + switch (field->cpp_type()) { + case FD::CPPTYPE_INT32 : return sizeof(int32 ); + case FD::CPPTYPE_INT64 : return sizeof(int64 ); + case FD::CPPTYPE_UINT32 : return sizeof(uint32 ); + case FD::CPPTYPE_UINT64 : return sizeof(uint64 ); + case FD::CPPTYPE_DOUBLE : return sizeof(double ); + case FD::CPPTYPE_FLOAT : return sizeof(float ); + case FD::CPPTYPE_BOOL : return sizeof(bool ); + case FD::CPPTYPE_ENUM : return sizeof(int ); + case FD::CPPTYPE_MESSAGE: return sizeof(Message*); + + case FD::CPPTYPE_STRING: + return sizeof(string*); + break; + } + } + + GOOGLE_LOG(DFATAL) << "Can't get here."; + return 0; +} + +struct DescendingFieldSizeOrder { + inline bool operator()(const FieldDescriptor* a, + const FieldDescriptor* b) { + // All repeated fields come first. + if (a->is_repeated()) { + if (b->is_repeated()) { + // Repeated fields and are not ordered with respect to each other. + return false; + } else { + return true; + } + } else if (b->is_repeated()) { + return false; + } else { + // Remaining fields in descending order by size. + return FieldSpaceUsed(a) > FieldSpaceUsed(b); + } + } +}; + +inline int DivideRoundingUp(int i, int j) { + return (i + (j - 1)) / j; +} + +#define bitsizeof(T) (sizeof(T) * 8) + +} // namespace + +// =================================================================== + +class DynamicMessage : public Message { + public: + DynamicMessage(const Descriptor* descriptor, + uint8* base, const uint8* prototype_base, + int size, const int offsets[], + const DescriptorPool* pool, DynamicMessageFactory* factory); + ~DynamicMessage(); + + // Called on the prototype after construction to initialize message fields. + void CrossLinkPrototypes(DynamicMessageFactory* factory); + + // implements Message ---------------------------------------------- + + Message* New() const; + + int GetCachedSize() const; + void SetCachedSize(int size) const; + + const Descriptor* GetDescriptor() const; + const Reflection* GetReflection() const; + Reflection* GetReflection(); + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage); + + inline bool is_prototype() { return base_ == prototype_base_; } + + const Descriptor* descriptor_; + const DescriptorPool* descriptor_pool_; + DynamicMessageFactory* factory_; + scoped_ptr extensions_; + GeneratedMessageReflection reflection_; + uint8* base_; + const uint8* prototype_base_; + const int* offsets_; + int size_; + + // TODO(kenton): Make this an atomic when C++ supports it. + mutable int cached_byte_size_; +}; + +DynamicMessage::DynamicMessage(const Descriptor* descriptor, + uint8* base, const uint8* prototype_base, + int size, const int offsets[], + const DescriptorPool* pool, + DynamicMessageFactory* factory) + : descriptor_(descriptor), + descriptor_pool_((pool == NULL) ? descriptor->file()->pool() : pool), + factory_(factory), + extensions_(descriptor->extension_range_count() > 0 ? + new ExtensionSet(descriptor, descriptor_pool_, factory_) : + NULL), + reflection_(descriptor, base, prototype_base, offsets, + // has_bits + reinterpret_cast(base + size) - + DivideRoundingUp(descriptor->field_count(), bitsizeof(uint32)), + extensions_.get()), + base_(base), + prototype_base_(prototype_base), + offsets_(offsets), + size_(size), + cached_byte_size_(0) { + // We need to call constructors for various fields manually and set + // default values where appropriate. We use placement new to call + // constructors. If you haven't heard of placement new, I suggest Googling + // it now. We use placement new even for primitive types that don't have + // constructors for consistency. (In theory, placement new should be used + // any time you are trying to convert untyped memory to typed memory, though + // in practice that's not strictly necessary for types that don't have a + // constructor.) + for (int i = 0; i < descriptor->field_count(); i++) { + const FieldDescriptor* field = descriptor->field(i); + void* field_ptr = base + offsets[i]; + switch (field->cpp_type()) { +#define HANDLE_TYPE(CPPTYPE, TYPE) \ + case FieldDescriptor::CPPTYPE_##CPPTYPE: \ + if (!field->is_repeated()) { \ + new(field_ptr) TYPE(field->default_value_##TYPE()); \ + } else { \ + new(field_ptr) RepeatedField(); \ + } \ + break; + + HANDLE_TYPE(INT32 , int32 ); + HANDLE_TYPE(INT64 , int64 ); + HANDLE_TYPE(UINT32, uint32); + HANDLE_TYPE(UINT64, uint64); + HANDLE_TYPE(DOUBLE, double); + HANDLE_TYPE(FLOAT , float ); + HANDLE_TYPE(BOOL , bool ); +#undef HANDLE_TYPE + + case FieldDescriptor::CPPTYPE_ENUM: + if (!field->is_repeated()) { + new(field_ptr) int(field->default_value_enum()->number()); + } else { + new(field_ptr) RepeatedField(); + } + break; + + case FieldDescriptor::CPPTYPE_STRING: + if (!field->is_repeated()) { + if (is_prototype()) { + new(field_ptr) const string*(&field->default_value_string()); + } else { + string* default_value = + *reinterpret_cast( + prototype_base + offsets[i]); + new(field_ptr) string*(default_value); + } + } else { + new(field_ptr) RepeatedPtrField(); + } + break; + + case FieldDescriptor::CPPTYPE_MESSAGE: { + // If this object is the prototype, its CPPTYPE_MESSAGE fields + // must be initialized later, in CrossLinkPrototypes(), so we don't + // initialize them here. + if (!is_prototype()) { + if (!field->is_repeated()) { + new(field_ptr) Message*(NULL); + } else { + const RepeatedPtrField* prototype_field = + reinterpret_cast*>( + prototype_base + offsets[i]); + new(field_ptr) RepeatedPtrField( + prototype_field->prototype()); + } + } + break; + } + } + } +} + +DynamicMessage::~DynamicMessage() { + // We need to manually run the destructors for repeated fields and strings, + // just as we ran their constructors in the the DynamicMessage constructor. + // Additionally, if any singular embedded messages have been allocated, we + // need to delete them, UNLESS we are the prototype message of this type, + // in which case any embedded messages are other prototypes and shouldn't + // be touched. + const Descriptor* descriptor = GetDescriptor(); + for (int i = 0; i < descriptor->field_count(); i++) { + const FieldDescriptor* field = descriptor->field(i); + void* field_ptr = base_ + offsets_[i]; + + if (field->is_repeated()) { + GenericRepeatedField* field = + reinterpret_cast(field_ptr); + field->~GenericRepeatedField(); + + } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) { + string* ptr = *reinterpret_cast(field_ptr); + if (ptr != &field->default_value_string()) { + delete ptr; + } + } else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) && + !is_prototype()) { + Message* message = *reinterpret_cast(field_ptr); + if (message != NULL) { + delete message; + } + } + } + + // OK, now we can delete our base pointer. + operator delete(base_); + + // When the prototype is deleted, we also want to free the offsets table. + // (The prototype is only deleted when the factory that created it is + // deleted.) + if (is_prototype()) { + delete [] offsets_; + } +} + +void DynamicMessage::CrossLinkPrototypes(DynamicMessageFactory* factory) { + // This should only be called on the prototype message. + GOOGLE_CHECK(is_prototype()); + + // Cross-link default messages. + for (int i = 0; i < descriptor_->field_count(); i++) { + const FieldDescriptor* field = descriptor_->field(i); + void* field_ptr = base_ + offsets_[i]; + + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { + // For fields with message types, we need to cross-link with the + // prototype for the field's type. + const Message* field_prototype = + factory->GetPrototype(field->message_type()); + + if (field->is_repeated()) { + // For repeated fields, we actually construct the RepeatedPtrField + // here, but only for fields with message types. All other repeated + // fields are constructed in DynamicMessage's constructor. + new(field_ptr) RepeatedPtrField(field_prototype); + } else { + // For singular fields, the field is just a pointer which should + // point to the prototype. (OK to const_cast here because the + // prototype itself will only be available const to the outside + // world.) + new(field_ptr) Message*(const_cast(field_prototype)); + } + } + } +} + +Message* DynamicMessage::New() const { + uint8* new_base = reinterpret_cast(operator new(size_)); + memset(new_base, 0, size_); + + return new DynamicMessage(GetDescriptor(), new_base, prototype_base_, + size_, offsets_, descriptor_pool_, factory_); +} + +int DynamicMessage::GetCachedSize() const { + return cached_byte_size_; +} + +void DynamicMessage::SetCachedSize(int size) const { + // This is theoretically not thread-compatible, but in practice it works + // because if multiple threads write this simultaneously, they will be + // writing the exact same value. + cached_byte_size_ = size; +} + +const Descriptor* DynamicMessage::GetDescriptor() const { + return descriptor_; +} + +const Message::Reflection* DynamicMessage::GetReflection() const { + return &reflection_; +} + +Message::Reflection* DynamicMessage::GetReflection() { + return &reflection_; +} + +// =================================================================== + +struct DynamicMessageFactory::PrototypeMap { + typedef hash_map Map; + Map map_; +}; + +DynamicMessageFactory::DynamicMessageFactory() + : pool_(NULL), prototypes_(new PrototypeMap) { +} + +DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool) + : pool_(pool), prototypes_(new PrototypeMap) { +} + +DynamicMessageFactory::~DynamicMessageFactory() { + for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin(); + iter != prototypes_->map_.end(); ++iter) { + delete iter->second; + } +} + + +const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) { + const Message** target = &prototypes_->map_[type]; + if (*target != NULL) { + // Already exists. + return *target; + } + + // We need to construct all the structures passed to + // GeneratedMessageReflection's constructor. This includes: + // - A block of memory that contains space for all the message's fields. + // - An array of integers indicating the byte offset of each field within + // this block. + // - A big bitfield containing a bit for each field indicating whether + // or not that field is set. + + // Compute size and offsets. + int* offsets = new int[type->field_count()]; + + // Sort the fields of this message in descending order by size. We + // assume that if we then pack the fields tightly in this order, all fields + // will end up properly-aligned, since all field sizes are powers of two or + // are multiples of the system word size. + scoped_array ordered_fields( + new const FieldDescriptor*[type->field_count()]); + for (int i = 0; i < type->field_count(); i++) { + ordered_fields[i] = type->field(i); + } + stable_sort(&ordered_fields[0], &ordered_fields[type->field_count()], + DescendingFieldSizeOrder()); + + // Decide all field offsets by packing in order. + int current_offset = 0; + + for (int i = 0; i < type->field_count(); i++) { + offsets[ordered_fields[i]->index()] = current_offset; + current_offset += FieldSpaceUsed(ordered_fields[i]); + } + + // Allocate space for all fields plus has_bits. We'll stick has_bits on + // the end. + int size = current_offset + + DivideRoundingUp(type->field_count(), bitsizeof(uint32)) * sizeof(uint32); + + // Round size up to the nearest 64-bit boundary just to make sure no + // clever allocators think that alignment is not necessary. This also + // insures that has_bits is properly-aligned, since we'll always align + // has_bits with the end of the structure. + size = DivideRoundingUp(size, sizeof(uint64)) * sizeof(uint64); + uint8* base = reinterpret_cast(operator new(size)); + memset(base, 0, size); + + // Construct message. + DynamicMessage* result = + new DynamicMessage(type, base, base, size, offsets, pool_, this); + *target = result; + result->CrossLinkPrototypes(this); + + return result; +} + +} // namespace protobuf + +} // namespace google -- cgit v1.2.3