aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/dynamic_message.cc
diff options
context:
space:
mode:
authorGravatar temporal <temporal@630680e5-0e50-0410-840e-4b1c322b438d>2008-07-10 02:12:20 +0000
committerGravatar temporal <temporal@630680e5-0e50-0410-840e-4b1c322b438d>2008-07-10 02:12:20 +0000
commit40ee551715c3a784ea6132dbf604b0e665ca2def (patch)
tree6e3ea9674be5b0f59106f88f3afa1313854beebf /src/google/protobuf/dynamic_message.cc
Initial checkin.
Diffstat (limited to 'src/google/protobuf/dynamic_message.cc')
-rw-r--r--src/google/protobuf/dynamic_message.cc475
1 files changed, 475 insertions, 0 deletions
diff --git a/src/google/protobuf/dynamic_message.cc b/src/google/protobuf/dynamic_message.cc
new file mode 100644
index 00000000..43e2451e
--- /dev/null
+++ b/src/google/protobuf/dynamic_message.cc
@@ -0,0 +1,475 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// DynamicMessage is implemented by constructing a data structure which
+// has roughly the same memory layout as a generated message would have.
+// Then, we use GeneratedMessageReflection to implement our reflection
+// interface. All the other operations we need to implement (e.g.
+// parsing, copying, etc.) are already implemented in terms of
+// Message::Reflection, so the rest is easy.
+//
+// The up side of this strategy is that it's very efficient. We don't
+// need to use hash_maps or generic representations of fields. The
+// down side is that this is a low-level memory management hack which
+// can be tricky to get right.
+//
+// As mentioned in the header, we only expose a DynamicMessageFactory
+// publicly, not the DynamicMessage class itself. This is because
+// GenericMessageReflection wants to have a pointer to a "default"
+// copy of the class, with all fields initialized to their default
+// values. We only want to construct one of these per message type,
+// so DynamicMessageFactory stores a cache of default messages for
+// each type it sees (each unique Descriptor pointer). The code
+// refers to the "default" copy of the class as the "prototype".
+//
+// Note on memory allocation: This module often calls "operator new()"
+// to allocate untyped memory, rather than calling something like
+// "new uint8[]". This is because "operator new()" means "Give me some
+// space which I can use as I please." while "new uint8[]" means "Give
+// me an array of 8-bit integers.". In practice, the later may return
+// a pointer that is not aligned correctly for general use. I believe
+// Item 8 of "More Effective C++" discusses this in more detail, though
+// I don't have the book on me right now so I'm not sure.
+
+#include <algorithm>
+#include <google/protobuf/stubs/hash.h>
+
+#include <google/protobuf/stubs/common.h>
+
+#include <google/protobuf/dynamic_message.h>
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/descriptor.pb.h>
+#include <google/protobuf/generated_message_reflection.h>
+#include <google/protobuf/reflection_ops.h>
+#include <google/protobuf/repeated_field.h>
+#include <google/protobuf/extension_set.h>
+#include <google/protobuf/wire_format.h>
+
+namespace google {
+namespace protobuf {
+
+using internal::WireFormat;
+using internal::ExtensionSet;
+using internal::GeneratedMessageReflection;
+using internal::GenericRepeatedField;
+
+
+// ===================================================================
+// Some helper tables and functions...
+
+namespace {
+
+// Compute the byte size of the in-memory representation of the field.
+int FieldSpaceUsed(const FieldDescriptor* field) {
+ typedef FieldDescriptor FD; // avoid line wrapping
+ if (field->label() == FD::LABEL_REPEATED) {
+ switch (field->cpp_type()) {
+ case FD::CPPTYPE_INT32 : return sizeof(RepeatedField<int32 >);
+ case FD::CPPTYPE_INT64 : return sizeof(RepeatedField<int64 >);
+ case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32 >);
+ case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64 >);
+ case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double >);
+ case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField<float >);
+ case FD::CPPTYPE_BOOL : return sizeof(RepeatedField<bool >);
+ case FD::CPPTYPE_ENUM : return sizeof(RepeatedField<int >);
+ case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>);
+
+ case FD::CPPTYPE_STRING:
+ return sizeof(RepeatedPtrField<string>);
+ break;
+ }
+ } else {
+ switch (field->cpp_type()) {
+ case FD::CPPTYPE_INT32 : return sizeof(int32 );
+ case FD::CPPTYPE_INT64 : return sizeof(int64 );
+ case FD::CPPTYPE_UINT32 : return sizeof(uint32 );
+ case FD::CPPTYPE_UINT64 : return sizeof(uint64 );
+ case FD::CPPTYPE_DOUBLE : return sizeof(double );
+ case FD::CPPTYPE_FLOAT : return sizeof(float );
+ case FD::CPPTYPE_BOOL : return sizeof(bool );
+ case FD::CPPTYPE_ENUM : return sizeof(int );
+ case FD::CPPTYPE_MESSAGE: return sizeof(Message*);
+
+ case FD::CPPTYPE_STRING:
+ return sizeof(string*);
+ break;
+ }
+ }
+
+ GOOGLE_LOG(DFATAL) << "Can't get here.";
+ return 0;
+}
+
+struct DescendingFieldSizeOrder {
+ inline bool operator()(const FieldDescriptor* a,
+ const FieldDescriptor* b) {
+ // All repeated fields come first.
+ if (a->is_repeated()) {
+ if (b->is_repeated()) {
+ // Repeated fields and are not ordered with respect to each other.
+ return false;
+ } else {
+ return true;
+ }
+ } else if (b->is_repeated()) {
+ return false;
+ } else {
+ // Remaining fields in descending order by size.
+ return FieldSpaceUsed(a) > FieldSpaceUsed(b);
+ }
+ }
+};
+
+inline int DivideRoundingUp(int i, int j) {
+ return (i + (j - 1)) / j;
+}
+
+#define bitsizeof(T) (sizeof(T) * 8)
+
+} // namespace
+
+// ===================================================================
+
+class DynamicMessage : public Message {
+ public:
+ DynamicMessage(const Descriptor* descriptor,
+ uint8* base, const uint8* prototype_base,
+ int size, const int offsets[],
+ const DescriptorPool* pool, DynamicMessageFactory* factory);
+ ~DynamicMessage();
+
+ // Called on the prototype after construction to initialize message fields.
+ void CrossLinkPrototypes(DynamicMessageFactory* factory);
+
+ // implements Message ----------------------------------------------
+
+ Message* New() const;
+
+ int GetCachedSize() const;
+ void SetCachedSize(int size) const;
+
+ const Descriptor* GetDescriptor() const;
+ const Reflection* GetReflection() const;
+ Reflection* GetReflection();
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage);
+
+ inline bool is_prototype() { return base_ == prototype_base_; }
+
+ const Descriptor* descriptor_;
+ const DescriptorPool* descriptor_pool_;
+ DynamicMessageFactory* factory_;
+ scoped_ptr<ExtensionSet> extensions_;
+ GeneratedMessageReflection reflection_;
+ uint8* base_;
+ const uint8* prototype_base_;
+ const int* offsets_;
+ int size_;
+
+ // TODO(kenton): Make this an atomic<int> when C++ supports it.
+ mutable int cached_byte_size_;
+};
+
+DynamicMessage::DynamicMessage(const Descriptor* descriptor,
+ uint8* base, const uint8* prototype_base,
+ int size, const int offsets[],
+ const DescriptorPool* pool,
+ DynamicMessageFactory* factory)
+ : descriptor_(descriptor),
+ descriptor_pool_((pool == NULL) ? descriptor->file()->pool() : pool),
+ factory_(factory),
+ extensions_(descriptor->extension_range_count() > 0 ?
+ new ExtensionSet(descriptor, descriptor_pool_, factory_) :
+ NULL),
+ reflection_(descriptor, base, prototype_base, offsets,
+ // has_bits
+ reinterpret_cast<uint32*>(base + size) -
+ DivideRoundingUp(descriptor->field_count(), bitsizeof(uint32)),
+ extensions_.get()),
+ base_(base),
+ prototype_base_(prototype_base),
+ offsets_(offsets),
+ size_(size),
+ cached_byte_size_(0) {
+ // We need to call constructors for various fields manually and set
+ // default values where appropriate. We use placement new to call
+ // constructors. If you haven't heard of placement new, I suggest Googling
+ // it now. We use placement new even for primitive types that don't have
+ // constructors for consistency. (In theory, placement new should be used
+ // any time you are trying to convert untyped memory to typed memory, though
+ // in practice that's not strictly necessary for types that don't have a
+ // constructor.)
+ for (int i = 0; i < descriptor->field_count(); i++) {
+ const FieldDescriptor* field = descriptor->field(i);
+ void* field_ptr = base + offsets[i];
+ switch (field->cpp_type()) {
+#define HANDLE_TYPE(CPPTYPE, TYPE) \
+ case FieldDescriptor::CPPTYPE_##CPPTYPE: \
+ if (!field->is_repeated()) { \
+ new(field_ptr) TYPE(field->default_value_##TYPE()); \
+ } else { \
+ new(field_ptr) RepeatedField<TYPE>(); \
+ } \
+ break;
+
+ HANDLE_TYPE(INT32 , int32 );
+ HANDLE_TYPE(INT64 , int64 );
+ HANDLE_TYPE(UINT32, uint32);
+ HANDLE_TYPE(UINT64, uint64);
+ HANDLE_TYPE(DOUBLE, double);
+ HANDLE_TYPE(FLOAT , float );
+ HANDLE_TYPE(BOOL , bool );
+#undef HANDLE_TYPE
+
+ case FieldDescriptor::CPPTYPE_ENUM:
+ if (!field->is_repeated()) {
+ new(field_ptr) int(field->default_value_enum()->number());
+ } else {
+ new(field_ptr) RepeatedField<int>();
+ }
+ break;
+
+ case FieldDescriptor::CPPTYPE_STRING:
+ if (!field->is_repeated()) {
+ if (is_prototype()) {
+ new(field_ptr) const string*(&field->default_value_string());
+ } else {
+ string* default_value =
+ *reinterpret_cast<string* const*>(
+ prototype_base + offsets[i]);
+ new(field_ptr) string*(default_value);
+ }
+ } else {
+ new(field_ptr) RepeatedPtrField<string>();
+ }
+ break;
+
+ case FieldDescriptor::CPPTYPE_MESSAGE: {
+ // If this object is the prototype, its CPPTYPE_MESSAGE fields
+ // must be initialized later, in CrossLinkPrototypes(), so we don't
+ // initialize them here.
+ if (!is_prototype()) {
+ if (!field->is_repeated()) {
+ new(field_ptr) Message*(NULL);
+ } else {
+ const RepeatedPtrField<Message>* prototype_field =
+ reinterpret_cast<const RepeatedPtrField<Message>*>(
+ prototype_base + offsets[i]);
+ new(field_ptr) RepeatedPtrField<Message>(
+ prototype_field->prototype());
+ }
+ }
+ break;
+ }
+ }
+ }
+}
+
+DynamicMessage::~DynamicMessage() {
+ // We need to manually run the destructors for repeated fields and strings,
+ // just as we ran their constructors in the the DynamicMessage constructor.
+ // Additionally, if any singular embedded messages have been allocated, we
+ // need to delete them, UNLESS we are the prototype message of this type,
+ // in which case any embedded messages are other prototypes and shouldn't
+ // be touched.
+ const Descriptor* descriptor = GetDescriptor();
+ for (int i = 0; i < descriptor->field_count(); i++) {
+ const FieldDescriptor* field = descriptor->field(i);
+ void* field_ptr = base_ + offsets_[i];
+
+ if (field->is_repeated()) {
+ GenericRepeatedField* field =
+ reinterpret_cast<GenericRepeatedField*>(field_ptr);
+ field->~GenericRepeatedField();
+
+ } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
+ string* ptr = *reinterpret_cast<string**>(field_ptr);
+ if (ptr != &field->default_value_string()) {
+ delete ptr;
+ }
+ } else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) &&
+ !is_prototype()) {
+ Message* message = *reinterpret_cast<Message**>(field_ptr);
+ if (message != NULL) {
+ delete message;
+ }
+ }
+ }
+
+ // OK, now we can delete our base pointer.
+ operator delete(base_);
+
+ // When the prototype is deleted, we also want to free the offsets table.
+ // (The prototype is only deleted when the factory that created it is
+ // deleted.)
+ if (is_prototype()) {
+ delete [] offsets_;
+ }
+}
+
+void DynamicMessage::CrossLinkPrototypes(DynamicMessageFactory* factory) {
+ // This should only be called on the prototype message.
+ GOOGLE_CHECK(is_prototype());
+
+ // Cross-link default messages.
+ for (int i = 0; i < descriptor_->field_count(); i++) {
+ const FieldDescriptor* field = descriptor_->field(i);
+ void* field_ptr = base_ + offsets_[i];
+
+ if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
+ // For fields with message types, we need to cross-link with the
+ // prototype for the field's type.
+ const Message* field_prototype =
+ factory->GetPrototype(field->message_type());
+
+ if (field->is_repeated()) {
+ // For repeated fields, we actually construct the RepeatedPtrField
+ // here, but only for fields with message types. All other repeated
+ // fields are constructed in DynamicMessage's constructor.
+ new(field_ptr) RepeatedPtrField<Message>(field_prototype);
+ } else {
+ // For singular fields, the field is just a pointer which should
+ // point to the prototype. (OK to const_cast here because the
+ // prototype itself will only be available const to the outside
+ // world.)
+ new(field_ptr) Message*(const_cast<Message*>(field_prototype));
+ }
+ }
+ }
+}
+
+Message* DynamicMessage::New() const {
+ uint8* new_base = reinterpret_cast<uint8*>(operator new(size_));
+ memset(new_base, 0, size_);
+
+ return new DynamicMessage(GetDescriptor(), new_base, prototype_base_,
+ size_, offsets_, descriptor_pool_, factory_);
+}
+
+int DynamicMessage::GetCachedSize() const {
+ return cached_byte_size_;
+}
+
+void DynamicMessage::SetCachedSize(int size) const {
+ // This is theoretically not thread-compatible, but in practice it works
+ // because if multiple threads write this simultaneously, they will be
+ // writing the exact same value.
+ cached_byte_size_ = size;
+}
+
+const Descriptor* DynamicMessage::GetDescriptor() const {
+ return descriptor_;
+}
+
+const Message::Reflection* DynamicMessage::GetReflection() const {
+ return &reflection_;
+}
+
+Message::Reflection* DynamicMessage::GetReflection() {
+ return &reflection_;
+}
+
+// ===================================================================
+
+struct DynamicMessageFactory::PrototypeMap {
+ typedef hash_map<const Descriptor*, const Message*> Map;
+ Map map_;
+};
+
+DynamicMessageFactory::DynamicMessageFactory()
+ : pool_(NULL), prototypes_(new PrototypeMap) {
+}
+
+DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool)
+ : pool_(pool), prototypes_(new PrototypeMap) {
+}
+
+DynamicMessageFactory::~DynamicMessageFactory() {
+ for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin();
+ iter != prototypes_->map_.end(); ++iter) {
+ delete iter->second;
+ }
+}
+
+
+const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) {
+ const Message** target = &prototypes_->map_[type];
+ if (*target != NULL) {
+ // Already exists.
+ return *target;
+ }
+
+ // We need to construct all the structures passed to
+ // GeneratedMessageReflection's constructor. This includes:
+ // - A block of memory that contains space for all the message's fields.
+ // - An array of integers indicating the byte offset of each field within
+ // this block.
+ // - A big bitfield containing a bit for each field indicating whether
+ // or not that field is set.
+
+ // Compute size and offsets.
+ int* offsets = new int[type->field_count()];
+
+ // Sort the fields of this message in descending order by size. We
+ // assume that if we then pack the fields tightly in this order, all fields
+ // will end up properly-aligned, since all field sizes are powers of two or
+ // are multiples of the system word size.
+ scoped_array<const FieldDescriptor*> ordered_fields(
+ new const FieldDescriptor*[type->field_count()]);
+ for (int i = 0; i < type->field_count(); i++) {
+ ordered_fields[i] = type->field(i);
+ }
+ stable_sort(&ordered_fields[0], &ordered_fields[type->field_count()],
+ DescendingFieldSizeOrder());
+
+ // Decide all field offsets by packing in order.
+ int current_offset = 0;
+
+ for (int i = 0; i < type->field_count(); i++) {
+ offsets[ordered_fields[i]->index()] = current_offset;
+ current_offset += FieldSpaceUsed(ordered_fields[i]);
+ }
+
+ // Allocate space for all fields plus has_bits. We'll stick has_bits on
+ // the end.
+ int size = current_offset +
+ DivideRoundingUp(type->field_count(), bitsizeof(uint32)) * sizeof(uint32);
+
+ // Round size up to the nearest 64-bit boundary just to make sure no
+ // clever allocators think that alignment is not necessary. This also
+ // insures that has_bits is properly-aligned, since we'll always align
+ // has_bits with the end of the structure.
+ size = DivideRoundingUp(size, sizeof(uint64)) * sizeof(uint64);
+ uint8* base = reinterpret_cast<uint8*>(operator new(size));
+ memset(base, 0, size);
+
+ // Construct message.
+ DynamicMessage* result =
+ new DynamicMessage(type, base, base, size, offsets, pool_, this);
+ *target = result;
+ result->CrossLinkPrototypes(this);
+
+ return result;
+}
+
+} // namespace protobuf
+
+} // namespace google