diff options
Diffstat (limited to 'ruby/ext')
-rw-r--r-- | ruby/ext/protobuf_c/defs.c | 1286 | ||||
-rw-r--r-- | ruby/ext/protobuf_c/encode_decode.c | 755 | ||||
-rw-r--r-- | ruby/ext/protobuf_c/extconf.rb | 23 | ||||
-rw-r--r-- | ruby/ext/protobuf_c/message.c | 463 | ||||
-rw-r--r-- | ruby/ext/protobuf_c/protobuf.c | 102 | ||||
-rw-r--r-- | ruby/ext/protobuf_c/protobuf.h | 404 | ||||
-rw-r--r-- | ruby/ext/protobuf_c/repeated_field.c | 597 | ||||
-rw-r--r-- | ruby/ext/protobuf_c/storage.c | 577 |
8 files changed, 4207 insertions, 0 deletions
diff --git a/ruby/ext/protobuf_c/defs.c b/ruby/ext/protobuf_c/defs.c new file mode 100644 index 00000000..bb6f10e1 --- /dev/null +++ b/ruby/ext/protobuf_c/defs.c @@ -0,0 +1,1286 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2014 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "protobuf.h" + +// ----------------------------------------------------------------------------- +// Common utilities. +// ----------------------------------------------------------------------------- + +const char* kDescriptorInstanceVar = "descriptor"; + +static const char* get_str(VALUE str) { + Check_Type(str, T_STRING); + return RSTRING_PTR(str); +} + +static VALUE rb_str_maybe_null(const char* s) { + if (s == NULL) { + s = ""; + } + return rb_str_new2(s); +} + +static upb_def* check_notfrozen(const upb_def* def) { + if (upb_def_isfrozen(def)) { + rb_raise(rb_eRuntimeError, + "Attempt to modify a frozen descriptor. Once descriptors are " + "added to the descriptor pool, they may not be modified."); + } + return (upb_def*)def; +} + +static upb_msgdef* check_msg_notfrozen(const upb_msgdef* def) { + return (upb_msgdef*)check_notfrozen((const upb_def*)def); +} + +static upb_fielddef* check_field_notfrozen(const upb_fielddef* def) { + return (upb_fielddef*)check_notfrozen((const upb_def*)def); +} + +static upb_enumdef* check_enum_notfrozen(const upb_enumdef* def) { + return (upb_enumdef*)check_notfrozen((const upb_def*)def); +} + +// ----------------------------------------------------------------------------- +// DescriptorPool. +// ----------------------------------------------------------------------------- + +#define DEFINE_CLASS(name, string_name) \ + VALUE c ## name; \ + const rb_data_type_t _ ## name ## _type = { \ + string_name, \ + { name ## _mark, name ## _free, NULL }, \ + }; \ + name* ruby_to_ ## name(VALUE val) { \ + name* ret; \ + TypedData_Get_Struct(val, name, &_ ## name ## _type, ret); \ + return ret; \ + } \ + +#define DEFINE_SELF(type, var, rb_var) \ + type* var = ruby_to_ ## type(rb_var); + +// Global singleton DescriptorPool. The user is free to create others, but this +// is used by generated code. +VALUE generated_pool; + +DEFINE_CLASS(DescriptorPool, "Google::Protobuf::DescriptorPool"); + +void DescriptorPool_mark(void* _self) { +} + +void DescriptorPool_free(void* _self) { + DescriptorPool* self = _self; + upb_symtab_unref(self->symtab, &self->symtab); + xfree(self); +} + +/* + * call-seq: + * DescriptorPool.new => pool + * + * Creates a new, empty, descriptor pool. + */ +VALUE DescriptorPool_alloc(VALUE klass) { + DescriptorPool* self = ALLOC(DescriptorPool); + self->symtab = upb_symtab_new(&self->symtab); + return TypedData_Wrap_Struct(klass, &_DescriptorPool_type, self); +} + +void DescriptorPool_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "DescriptorPool", rb_cObject); + rb_define_alloc_func(klass, DescriptorPool_alloc); + rb_define_method(klass, "add", DescriptorPool_add, 1); + rb_define_method(klass, "build", DescriptorPool_build, 0); + rb_define_method(klass, "lookup", DescriptorPool_lookup, 1); + rb_define_singleton_method(klass, "generated_pool", + DescriptorPool_generated_pool, 0); + cDescriptorPool = klass; + rb_gc_register_address(&cDescriptorPool); + + generated_pool = rb_class_new_instance(0, NULL, klass); + rb_gc_register_address(&generated_pool); +} + +static void add_descriptor_to_pool(DescriptorPool* self, + Descriptor* descriptor) { + CHECK_UPB( + upb_symtab_add(self->symtab, (upb_def**)&descriptor->msgdef, 1, + NULL, &status), + "Adding Descriptor to DescriptorPool failed"); +} + +static void add_enumdesc_to_pool(DescriptorPool* self, + EnumDescriptor* enumdesc) { + CHECK_UPB( + upb_symtab_add(self->symtab, (upb_def**)&enumdesc->enumdef, 1, + NULL, &status), + "Adding EnumDescriptor to DescriptorPool failed"); +} + +/* + * call-seq: + * DescriptorPool.add(descriptor) + * + * Adds the given Descriptor or EnumDescriptor to this pool. All references to + * other types in a Descriptor's fields must be resolvable within this pool or + * an exception will be raised. + */ +VALUE DescriptorPool_add(VALUE _self, VALUE def) { + DEFINE_SELF(DescriptorPool, self, _self); + VALUE def_klass = rb_obj_class(def); + if (def_klass == cDescriptor) { + add_descriptor_to_pool(self, ruby_to_Descriptor(def)); + } else if (def_klass == cEnumDescriptor) { + add_enumdesc_to_pool(self, ruby_to_EnumDescriptor(def)); + } else { + rb_raise(rb_eArgError, + "Second argument must be a Descriptor or EnumDescriptor."); + } + return Qnil; +} + +/* + * call-seq: + * DescriptorPool.build(&block) + * + * Invokes the block with a Builder instance as self. All message and enum types + * added within the block are committed to the pool atomically, and may refer + * (co)recursively to each other. The user should call Builder#add_message and + * Builder#add_enum within the block as appropriate. This is the recommended, + * idiomatic way to define new message and enum types. + */ +VALUE DescriptorPool_build(VALUE _self) { + VALUE ctx = rb_class_new_instance(0, NULL, cBuilder); + VALUE block = rb_block_proc(); + rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); + rb_funcall(ctx, rb_intern("finalize_to_pool"), 1, _self); + return Qnil; +} + +/* + * call-seq: + * DescriptorPool.lookup(name) => descriptor + * + * Finds a Descriptor or EnumDescriptor by name and returns it, or nil if none + * exists with the given name. + */ +VALUE DescriptorPool_lookup(VALUE _self, VALUE name) { + DEFINE_SELF(DescriptorPool, self, _self); + const char* name_str = get_str(name); + const upb_def* def = upb_symtab_lookup(self->symtab, name_str); + if (!def) { + return Qnil; + } + return get_def_obj(def); +} + +/* + * call-seq: + * DescriptorPool.generated_pool => descriptor_pool + * + * Class method that returns the global DescriptorPool. This is a singleton into + * which generated-code message and enum types are registered. The user may also + * register types in this pool for convenience so that they do not have to hold + * a reference to a private pool instance. + */ +VALUE DescriptorPool_generated_pool(VALUE _self) { + return generated_pool; +} + +// ----------------------------------------------------------------------------- +// Descriptor. +// ----------------------------------------------------------------------------- + +DEFINE_CLASS(Descriptor, "Google::Protobuf::Descriptor"); + +void Descriptor_mark(void* _self) { + Descriptor* self = _self; + rb_gc_mark(self->klass); +} + +void Descriptor_free(void* _self) { + Descriptor* self = _self; + upb_msgdef_unref(self->msgdef, &self->msgdef); + if (self->layout) { + free_layout(self->layout); + } + if (self->fill_handlers) { + upb_handlers_unref(self->fill_handlers, &self->fill_handlers); + } + if (self->fill_method) { + upb_pbdecodermethod_unref(self->fill_method, &self->fill_method); + } + if (self->pb_serialize_handlers) { + upb_handlers_unref(self->pb_serialize_handlers, + &self->pb_serialize_handlers); + } + if (self->json_serialize_handlers) { + upb_handlers_unref(self->pb_serialize_handlers, + &self->json_serialize_handlers); + } + xfree(self); +} + +/* + * call-seq: + * Descriptor.new => descriptor + * + * Creates a new, empty, message type descriptor. At a minimum, its name must be + * set before it is added to a pool. It cannot be used to create messages until + * it is added to a pool, after which it becomes immutable (as part of a + * finalization process). + */ +VALUE Descriptor_alloc(VALUE klass) { + Descriptor* self = ALLOC(Descriptor); + VALUE ret = TypedData_Wrap_Struct(klass, &_Descriptor_type, self); + self->msgdef = upb_msgdef_new(&self->msgdef); + self->klass = Qnil; + self->layout = NULL; + self->fill_handlers = NULL; + self->fill_method = NULL; + self->pb_serialize_handlers = NULL; + self->json_serialize_handlers = NULL; + return ret; +} + +void Descriptor_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "Descriptor", rb_cObject); + rb_define_alloc_func(klass, Descriptor_alloc); + rb_define_method(klass, "each", Descriptor_each, 0); + rb_define_method(klass, "lookup", Descriptor_lookup, 1); + rb_define_method(klass, "add_field", Descriptor_add_field, 1); + rb_define_method(klass, "msgclass", Descriptor_msgclass, 0); + rb_define_method(klass, "name", Descriptor_name, 0); + rb_define_method(klass, "name=", Descriptor_name_set, 1); + rb_include_module(klass, rb_mEnumerable); + cDescriptor = klass; + rb_gc_register_address(&cDescriptor); +} + +/* + * call-seq: + * Descriptor.name => name + * + * Returns the name of this message type as a fully-qualfied string (e.g., + * My.Package.MessageType). + */ +VALUE Descriptor_name(VALUE _self) { + DEFINE_SELF(Descriptor, self, _self); + return rb_str_maybe_null(upb_msgdef_fullname(self->msgdef)); +} + +/* + * call-seq: + * Descriptor.name = name + * + * Assigns a name to this message type. The descriptor must not have been added + * to a pool yet. + */ +VALUE Descriptor_name_set(VALUE _self, VALUE str) { + DEFINE_SELF(Descriptor, self, _self); + upb_msgdef* mut_def = check_msg_notfrozen(self->msgdef); + const char* name = get_str(str); + CHECK_UPB( + upb_msgdef_setfullname(mut_def, name, &status), + "Error setting Descriptor name"); + return Qnil; +} + +/* + * call-seq: + * Descriptor.each(&block) + * + * Iterates over fields in this message type, yielding to the block on each one. + */ +VALUE Descriptor_each(VALUE _self) { + DEFINE_SELF(Descriptor, self, _self); + + upb_msg_iter it; + for (upb_msg_begin(&it, self->msgdef); + !upb_msg_done(&it); + upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + VALUE obj = get_def_obj(field); + rb_yield(obj); + } + return Qnil; +} + +/* + * call-seq: + * Descriptor.lookup(name) => FieldDescriptor + * + * Returns the field descriptor for the field with the given name, if present, + * or nil if none. + */ +VALUE Descriptor_lookup(VALUE _self, VALUE name) { + DEFINE_SELF(Descriptor, self, _self); + const char* s = get_str(name); + const upb_fielddef* field = upb_msgdef_ntofz(self->msgdef, s); + if (field == NULL) { + return Qnil; + } + return get_def_obj(field); +} + +/* + * call-seq: + * Descriptor.add_field(field) => nil + * + * Adds the given FieldDescriptor to this message type. The descriptor must not + * have been added to a pool yet. Raises an exception if a field with the same + * name or number already exists. Sub-type references (e.g. for fields of type + * message) are not resolved at this point. + */ +VALUE Descriptor_add_field(VALUE _self, VALUE obj) { + DEFINE_SELF(Descriptor, self, _self); + upb_msgdef* mut_def = check_msg_notfrozen(self->msgdef); + FieldDescriptor* def = ruby_to_FieldDescriptor(obj); + upb_fielddef* mut_field_def = check_field_notfrozen(def->fielddef); + CHECK_UPB( + upb_msgdef_addfield(mut_def, mut_field_def, NULL, &status), + "Adding field to Descriptor failed"); + add_def_obj(def->fielddef, obj); + return Qnil; +} + +/* + * call-seq: + * Descriptor.msgclass => message_klass + * + * Returns the Ruby class created for this message type. Valid only once the + * message type has been added to a pool. + */ +VALUE Descriptor_msgclass(VALUE _self) { + DEFINE_SELF(Descriptor, self, _self); + if (!upb_def_isfrozen((const upb_def*)self->msgdef)) { + rb_raise(rb_eRuntimeError, + "Cannot fetch message class from a Descriptor not yet in a pool."); + } + if (self->klass == Qnil) { + self->klass = build_class_from_descriptor(self); + } + return self->klass; +} + +// ----------------------------------------------------------------------------- +// FieldDescriptor. +// ----------------------------------------------------------------------------- + +DEFINE_CLASS(FieldDescriptor, "Google::Protobuf::FieldDescriptor"); + +void FieldDescriptor_mark(void* _self) { +} + +void FieldDescriptor_free(void* _self) { + FieldDescriptor* self = _self; + upb_fielddef_unref(self->fielddef, &self->fielddef); + xfree(self); +} + +/* + * call-seq: + * FieldDescriptor.new => field + * + * Returns a new field descriptor. Its name, type, etc. must be set before it is + * added to a message type. + */ +VALUE FieldDescriptor_alloc(VALUE klass) { + FieldDescriptor* self = ALLOC(FieldDescriptor); + VALUE ret = TypedData_Wrap_Struct(klass, &_FieldDescriptor_type, self); + upb_fielddef* fielddef = upb_fielddef_new(&self->fielddef); + upb_fielddef_setpacked(fielddef, false); + self->fielddef = fielddef; + return ret; +} + +void FieldDescriptor_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "FieldDescriptor", rb_cObject); + rb_define_alloc_func(klass, FieldDescriptor_alloc); + rb_define_method(klass, "name", FieldDescriptor_name, 0); + rb_define_method(klass, "name=", FieldDescriptor_name_set, 1); + rb_define_method(klass, "type", FieldDescriptor_type, 0); + rb_define_method(klass, "type=", FieldDescriptor_type_set, 1); + rb_define_method(klass, "label", FieldDescriptor_label, 0); + rb_define_method(klass, "label=", FieldDescriptor_label_set, 1); + rb_define_method(klass, "number", FieldDescriptor_number, 0); + rb_define_method(klass, "number=", FieldDescriptor_number_set, 1); + rb_define_method(klass, "submsg_name", FieldDescriptor_submsg_name, 0); + rb_define_method(klass, "submsg_name=", FieldDescriptor_submsg_name_set, 1); + rb_define_method(klass, "subtype", FieldDescriptor_subtype, 0); + rb_define_method(klass, "get", FieldDescriptor_get, 1); + rb_define_method(klass, "set", FieldDescriptor_set, 2); + cFieldDescriptor = klass; + rb_gc_register_address(&cFieldDescriptor); +} + +/* + * call-seq: + * FieldDescriptor.name => name + * + * Returns the name of this field. + */ +VALUE FieldDescriptor_name(VALUE _self) { + DEFINE_SELF(FieldDescriptor, self, _self); + return rb_str_maybe_null(upb_fielddef_name(self->fielddef)); +} + +/* + * call-seq: + * FieldDescriptor.name = name + * + * Sets the name of this field. Cannot be called once the containing message + * type, if any, is added to a pool. + */ +VALUE FieldDescriptor_name_set(VALUE _self, VALUE str) { + DEFINE_SELF(FieldDescriptor, self, _self); + upb_fielddef* mut_def = check_field_notfrozen(self->fielddef); + const char* name = get_str(str); + CHECK_UPB(upb_fielddef_setname(mut_def, name, &status), + "Error setting FieldDescriptor name"); + return Qnil; +} + +upb_fieldtype_t ruby_to_fieldtype(VALUE type) { + if (TYPE(type) != T_SYMBOL) { + rb_raise(rb_eArgError, "Expected symbol for field type."); + } + + upb_fieldtype_t upb_type = -1; + +#define CONVERT(upb, ruby) \ + if (SYM2ID(type) == rb_intern( # ruby )) { \ + upb_type = UPB_TYPE_ ## upb; \ + } + + CONVERT(FLOAT, float); + CONVERT(DOUBLE, double); + CONVERT(BOOL, bool); + CONVERT(STRING, string); + CONVERT(BYTES, bytes); + CONVERT(MESSAGE, message); + CONVERT(ENUM, enum); + CONVERT(INT32, int32); + CONVERT(INT64, int64); + CONVERT(UINT32, uint32); + CONVERT(UINT64, uint64); + +#undef CONVERT + + if (upb_type == -1) { + rb_raise(rb_eArgError, "Unknown field type."); + } + + return upb_type; +} + +VALUE fieldtype_to_ruby(upb_fieldtype_t type) { + switch (type) { +#define CONVERT(upb, ruby) \ + case UPB_TYPE_ ## upb : return ID2SYM(rb_intern( # ruby )); + CONVERT(FLOAT, float); + CONVERT(DOUBLE, double); + CONVERT(BOOL, bool); + CONVERT(STRING, string); + CONVERT(BYTES, bytes); + CONVERT(MESSAGE, message); + CONVERT(ENUM, enum); + CONVERT(INT32, int32); + CONVERT(INT64, int64); + CONVERT(UINT32, uint32); + CONVERT(UINT64, uint64); +#undef CONVERT + } + return Qnil; +} + +/* + * call-seq: + * FieldDescriptor.type => type + * + * Returns this field's type, as a Ruby symbol, or nil if not yet set. + * + * Valid field types are: + * :int32, :int64, :uint32, :uint64, :float, :double, :bool, :string, + * :bytes, :message. + */ +VALUE FieldDescriptor_type(VALUE _self) { + DEFINE_SELF(FieldDescriptor, self, _self); + if (!upb_fielddef_typeisset(self->fielddef)) { + return Qnil; + } + return fieldtype_to_ruby(upb_fielddef_type(self->fielddef)); +} + +/* + * call-seq: + * FieldDescriptor.type = type + * + * Sets this field's type. Cannot be called if field is part of a message type + * already in a pool. + */ +VALUE FieldDescriptor_type_set(VALUE _self, VALUE type) { + DEFINE_SELF(FieldDescriptor, self, _self); + upb_fielddef* mut_def = check_field_notfrozen(self->fielddef); + upb_fielddef_settype(mut_def, ruby_to_fieldtype(type)); + return Qnil; +} + +/* + * call-seq: + * FieldDescriptor.label => label + * + * Returns this field's label (i.e., plurality), as a Ruby symbol. + * + * Valid field labels are: + * :optional, :repeated + */ +VALUE FieldDescriptor_label(VALUE _self) { + DEFINE_SELF(FieldDescriptor, self, _self); + switch (upb_fielddef_label(self->fielddef)) { +#define CONVERT(upb, ruby) \ + case UPB_LABEL_ ## upb : return ID2SYM(rb_intern( # ruby )); + + CONVERT(OPTIONAL, optional); + CONVERT(REQUIRED, required); + CONVERT(REPEATED, repeated); + +#undef CONVERT + } + + return Qnil; +} + +/* + * call-seq: + * FieldDescriptor.label = label + * + * Sets the label on this field. Cannot be called if field is part of a message + * type already in a pool. + */ +VALUE FieldDescriptor_label_set(VALUE _self, VALUE label) { + DEFINE_SELF(FieldDescriptor, self, _self); + upb_fielddef* mut_def = check_field_notfrozen(self->fielddef); + if (TYPE(label) != T_SYMBOL) { + rb_raise(rb_eArgError, "Expected symbol for field label."); + } + + upb_label_t upb_label = -1; + +#define CONVERT(upb, ruby) \ + if (SYM2ID(label) == rb_intern( # ruby )) { \ + upb_label = UPB_LABEL_ ## upb; \ + } + + CONVERT(OPTIONAL, optional); + CONVERT(REQUIRED, required); + CONVERT(REPEATED, repeated); + +#undef CONVERT + + if (upb_label == -1) { + rb_raise(rb_eArgError, "Unknown field label."); + } + + upb_fielddef_setlabel(mut_def, upb_label); + + return Qnil; +} + +/* + * call-seq: + * FieldDescriptor.number => number + * + * Returns the tag number for this field. + */ +VALUE FieldDescriptor_number(VALUE _self) { + DEFINE_SELF(FieldDescriptor, self, _self); + return INT2NUM(upb_fielddef_number(self->fielddef)); +} + +/* + * call-seq: + * FieldDescriptor.number = number + * + * Sets the tag number for this field. Cannot be called if field is part of a + * message type already in a pool. + */ +VALUE FieldDescriptor_number_set(VALUE _self, VALUE number) { + DEFINE_SELF(FieldDescriptor, self, _self); + upb_fielddef* mut_def = check_field_notfrozen(self->fielddef); + CHECK_UPB(upb_fielddef_setnumber(mut_def, NUM2INT(number), &status), + "Error setting field number"); + return Qnil; +} + +/* + * call-seq: + * FieldDescriptor.submsg_name => submsg_name + * + * Returns the name of the message or enum type corresponding to this field, if + * it is a message or enum field (respectively), or nil otherwise. This type + * name will be resolved within the context of the pool to which the containing + * message type is added. + */ +VALUE FieldDescriptor_submsg_name(VALUE _self) { + DEFINE_SELF(FieldDescriptor, self, _self); + if (!upb_fielddef_hassubdef(self->fielddef)) { + return Qnil; + } + return rb_str_maybe_null(upb_fielddef_subdefname(self->fielddef)); +} + +/* + * call-seq: + * FieldDescriptor.submsg_name = submsg_name + * + * Sets the name of the message or enum type corresponding to this field, if it + * is a message or enum field (respectively). This type name will be resolved + * within the context of the pool to which the containing message type is added. + * Cannot be called on field that are not of message or enum type, or on fields + * that are part of a message type already added to a pool. + */ +VALUE FieldDescriptor_submsg_name_set(VALUE _self, VALUE value) { + DEFINE_SELF(FieldDescriptor, self, _self); + upb_fielddef* mut_def = check_field_notfrozen(self->fielddef); + if (!upb_fielddef_hassubdef(self->fielddef)) { + rb_raise(rb_eTypeError, "FieldDescriptor does not have subdef."); + } + const char* str = get_str(value); + CHECK_UPB(upb_fielddef_setsubdefname(mut_def, str, &status), + "Error setting submessage name"); + return Qnil; +} + +/* + * call-seq: + * FieldDescriptor.subtype => message_or_enum_descriptor + * + * Returns the message or enum descriptor corresponding to this field's type if + * it is a message or enum field, respectively, or nil otherwise. Cannot be + * called *until* the containing message type is added to a pool (and thus + * resolved). + */ +VALUE FieldDescriptor_subtype(VALUE _self) { + DEFINE_SELF(FieldDescriptor, self, _self); + if (!upb_fielddef_hassubdef(self->fielddef)) { + return Qnil; + } + const upb_def* def = upb_fielddef_subdef(self->fielddef); + if (def == NULL) { + return Qnil; + } + return get_def_obj(def); +} + +/* + * call-seq: + * FieldDescriptor.get(message) => value + * + * Returns the value set for this field on the given message. Raises an + * exception if message is of the wrong type. + */ +VALUE FieldDescriptor_get(VALUE _self, VALUE msg_rb) { + DEFINE_SELF(FieldDescriptor, self, _self); + MessageHeader* msg; + TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); + if (msg->descriptor->msgdef != upb_fielddef_containingtype(self->fielddef)) { + rb_raise(rb_eTypeError, "get method called on wrong message type"); + } + return layout_get(msg->descriptor->layout, Message_data(msg), self->fielddef); +} + +/* + * call-seq: + * FieldDescriptor.set(message, value) + * + * Sets the value corresponding to this field to the given value on the given + * message. Raises an exception if message is of the wrong type. Performs the + * ordinary type-checks for field setting. + */ +VALUE FieldDescriptor_set(VALUE _self, VALUE msg_rb, VALUE value) { + DEFINE_SELF(FieldDescriptor, self, _self); + MessageHeader* msg; + TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); + if (msg->descriptor->msgdef != upb_fielddef_containingtype(self->fielddef)) { + rb_raise(rb_eTypeError, "set method called on wrong message type"); + } + layout_set(msg->descriptor->layout, Message_data(msg), self->fielddef, value); + return Qnil; +} + +// ----------------------------------------------------------------------------- +// EnumDescriptor. +// ----------------------------------------------------------------------------- + +DEFINE_CLASS(EnumDescriptor, "Google::Protobuf::EnumDescriptor"); + +void EnumDescriptor_mark(void* _self) { + EnumDescriptor* self = _self; + rb_gc_mark(self->module); +} + +void EnumDescriptor_free(void* _self) { + EnumDescriptor* self = _self; + upb_enumdef_unref(self->enumdef, &self->enumdef); + xfree(self); +} + +/* + * call-seq: + * EnumDescriptor.new => enum_descriptor + * + * Creates a new, empty, enum descriptor. Must be added to a pool before the + * enum type can be used. The enum type may only be modified prior to adding to + * a pool. + */ +VALUE EnumDescriptor_alloc(VALUE klass) { + EnumDescriptor* self = ALLOC(EnumDescriptor); + VALUE ret = TypedData_Wrap_Struct(klass, &_EnumDescriptor_type, self); + self->enumdef = upb_enumdef_new(&self->enumdef); + self->module = Qnil; + return ret; +} + +void EnumDescriptor_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "EnumDescriptor", rb_cObject); + rb_define_alloc_func(klass, EnumDescriptor_alloc); + rb_define_method(klass, "name", EnumDescriptor_name, 0); + rb_define_method(klass, "name=", EnumDescriptor_name_set, 1); + rb_define_method(klass, "add_value", EnumDescriptor_add_value, 2); + rb_define_method(klass, "lookup_name", EnumDescriptor_lookup_name, 1); + rb_define_method(klass, "lookup_value", EnumDescriptor_lookup_value, 1); + rb_define_method(klass, "each", EnumDescriptor_each, 0); + rb_define_method(klass, "enummodule", EnumDescriptor_enummodule, 0); + rb_include_module(klass, rb_mEnumerable); + cEnumDescriptor = klass; + rb_gc_register_address(&cEnumDescriptor); +} + +/* + * call-seq: + * EnumDescriptor.name => name + * + * Returns the name of this enum type. + */ +VALUE EnumDescriptor_name(VALUE _self) { + DEFINE_SELF(EnumDescriptor, self, _self); + return rb_str_maybe_null(upb_enumdef_fullname(self->enumdef)); +} + +/* + * call-seq: + * EnumDescriptor.name = name + * + * Sets the name of this enum type. Cannot be called if the enum type has + * already been added to a pool. + */ +VALUE EnumDescriptor_name_set(VALUE _self, VALUE str) { + DEFINE_SELF(EnumDescriptor, self, _self); + upb_enumdef* mut_def = check_enum_notfrozen(self->enumdef); + const char* name = get_str(str); + CHECK_UPB(upb_enumdef_setfullname(mut_def, name, &status), + "Error setting EnumDescriptor name"); + return Qnil; +} + +/* + * call-seq: + * EnumDescriptor.add_value(key, value) + * + * Adds a new key => value mapping to this enum type. Key must be given as a + * Ruby symbol. Cannot be called if the enum type has already been added to a + * pool. Will raise an exception if the key or value is already in use. + */ +VALUE EnumDescriptor_add_value(VALUE _self, VALUE name, VALUE number) { + DEFINE_SELF(EnumDescriptor, self, _self); + upb_enumdef* mut_def = check_enum_notfrozen(self->enumdef); + const char* name_str = rb_id2name(SYM2ID(name)); + int32_t val = NUM2INT(number); + CHECK_UPB(upb_enumdef_addval(mut_def, name_str, val, &status), + "Error adding value to enum"); + return Qnil; +} + +/* + * call-seq: + * EnumDescriptor.lookup_name(name) => value + * + * Returns the numeric value corresponding to the given key name (as a Ruby + * symbol), or nil if none. + */ +VALUE EnumDescriptor_lookup_name(VALUE _self, VALUE name) { + DEFINE_SELF(EnumDescriptor, self, _self); + const char* name_str= rb_id2name(SYM2ID(name)); + int32_t val = 0; + if (upb_enumdef_ntoiz(self->enumdef, name_str, &val)) { + return INT2NUM(val); + } else { + return Qnil; + } +} + +/* + * call-seq: + * EnumDescriptor.lookup_value(name) => value + * + * Returns the key name (as a Ruby symbol) corresponding to the integer value, + * or nil if none. + */ +VALUE EnumDescriptor_lookup_value(VALUE _self, VALUE number) { + DEFINE_SELF(EnumDescriptor, self, _self); + int32_t val = NUM2INT(number); + const char* name = upb_enumdef_iton(self->enumdef, val); + if (name != NULL) { + return ID2SYM(rb_intern(name)); + } else { + return Qnil; + } +} + +/* + * call-seq: + * EnumDescriptor.each(&block) + * + * Iterates over key => value mappings in this enum's definition, yielding to + * the block with (key, value) arguments for each one. + */ +VALUE EnumDescriptor_each(VALUE _self) { + DEFINE_SELF(EnumDescriptor, self, _self); + + upb_enum_iter it; + for (upb_enum_begin(&it, self->enumdef); + !upb_enum_done(&it); + upb_enum_next(&it)) { + VALUE key = ID2SYM(rb_intern(upb_enum_iter_name(&it))); + VALUE number = INT2NUM(upb_enum_iter_number(&it)); + rb_yield_values(2, key, number); + } + + return Qnil; +} + +/* + * call-seq: + * EnumDescriptor.enummodule => module + * + * Returns the Ruby module corresponding to this enum type. Cannot be called + * until the enum descriptor has been added to a pool. + */ +VALUE EnumDescriptor_enummodule(VALUE _self) { + DEFINE_SELF(EnumDescriptor, self, _self); + if (!upb_def_isfrozen((const upb_def*)self->enumdef)) { + rb_raise(rb_eRuntimeError, + "Cannot fetch enum module from an EnumDescriptor not yet " + "in a pool."); + } + if (self->module == Qnil) { + self->module = build_module_from_enumdesc(self); + } + return self->module; +} + +// ----------------------------------------------------------------------------- +// MessageBuilderContext. +// ----------------------------------------------------------------------------- + +DEFINE_CLASS(MessageBuilderContext, + "Google::Protobuf::Internal::MessageBuilderContext"); + +void MessageBuilderContext_mark(void* _self) { + MessageBuilderContext* self = _self; + rb_gc_mark(self->descriptor); +} + +void MessageBuilderContext_free(void* _self) { + MessageBuilderContext* self = _self; + xfree(self); +} + +VALUE MessageBuilderContext_alloc(VALUE klass) { + MessageBuilderContext* self = ALLOC(MessageBuilderContext); + VALUE ret = TypedData_Wrap_Struct( + klass, &_MessageBuilderContext_type, self); + self->descriptor = Qnil; + return ret; +} + +void MessageBuilderContext_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "MessageBuilderContext", rb_cObject); + rb_define_alloc_func(klass, MessageBuilderContext_alloc); + rb_define_method(klass, "initialize", + MessageBuilderContext_initialize, 1); + rb_define_method(klass, "optional", MessageBuilderContext_optional, -1); + rb_define_method(klass, "required", MessageBuilderContext_required, -1); + rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1); + cMessageBuilderContext = klass; + rb_gc_register_address(&cMessageBuilderContext); +} + +/* + * call-seq: + * MessageBuilderContext.new(desc) => context + * + * Create a new builder context around the given message descriptor. This class + * is intended to serve as a DSL context to be used with #instance_eval. + */ +VALUE MessageBuilderContext_initialize(VALUE _self, VALUE msgdef) { + DEFINE_SELF(MessageBuilderContext, self, _self); + self->descriptor = msgdef; + return Qnil; +} + +static VALUE msgdef_add_field(VALUE msgdef, + const char* label, VALUE name, + VALUE type, VALUE number, + VALUE type_class) { + VALUE fielddef = rb_class_new_instance(0, NULL, cFieldDescriptor); + VALUE name_str = rb_str_new2(rb_id2name(SYM2ID(name))); + + rb_funcall(fielddef, rb_intern("label="), 1, ID2SYM(rb_intern(label))); + rb_funcall(fielddef, rb_intern("name="), 1, name_str); + rb_funcall(fielddef, rb_intern("type="), 1, type); + rb_funcall(fielddef, rb_intern("number="), 1, number); + + if (type_class != Qnil) { + if (TYPE(type_class) != T_STRING) { + rb_raise(rb_eArgError, "Expected string for type class"); + } + // Make it an absolute type name by prepending a dot. + type_class = rb_str_append(rb_str_new2("."), type_class); + rb_funcall(fielddef, rb_intern("submsg_name="), 1, type_class); + } + + rb_funcall(msgdef, rb_intern("add_field"), 1, fielddef); + return fielddef; +} + +/* + * call-seq: + * MessageBuilderContext.optional(name, type, number, type_class = nil) + * + * Defines a new optional field on this message type with the given type, tag + * number, and type class (for message and enum fields). The type must be a Ruby + * symbol (as accepted by FieldDescriptor#type=) and the type_class must be a + * string, if present (as accepted by FieldDescriptor#submsg_name=). + */ +VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self) { + DEFINE_SELF(MessageBuilderContext, self, _self); + + if (argc < 3) { + rb_raise(rb_eArgError, "Expected at least 3 arguments."); + } + VALUE name = argv[0]; + VALUE type = argv[1]; + VALUE number = argv[2]; + VALUE type_class = (argc > 3) ? argv[3] : Qnil; + + return msgdef_add_field(self->descriptor, "optional", + name, type, number, type_class); +} + +/* + * call-seq: + * MessageBuilderContext.required(name, type, number, type_class = nil) + * + * Defines a new required field on this message type with the given type, tag + * number, and type class (for message and enum fields). The type must be a Ruby + * symbol (as accepted by FieldDescriptor#type=) and the type_class must be a + * string, if present (as accepted by FieldDescriptor#submsg_name=). + * + * Proto3 does not have required fields, but this method exists for + * completeness. Any attempt to add a message type with required fields to a + * pool will currently result in an error. + */ +VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self) { + DEFINE_SELF(MessageBuilderContext, self, _self); + + if (argc < 3) { + rb_raise(rb_eArgError, "Expected at least 3 arguments."); + } + VALUE name = argv[0]; + VALUE type = argv[1]; + VALUE number = argv[2]; + VALUE type_class = (argc > 3) ? argv[3] : Qnil; + + return msgdef_add_field(self->descriptor, "required", + name, type, number, type_class); +} + +/* + * call-seq: + * MessageBuilderContext.repeated(name, type, number, type_class = nil) + * + * Defines a new repeated field on this message type with the given type, tag + * number, and type class (for message and enum fields). The type must be a Ruby + * symbol (as accepted by FieldDescriptor#type=) and the type_class must be a + * string, if present (as accepted by FieldDescriptor#submsg_name=). + */ +VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) { + DEFINE_SELF(MessageBuilderContext, self, _self); + + if (argc < 3) { + rb_raise(rb_eArgError, "Expected at least 3 arguments."); + } + VALUE name = argv[0]; + VALUE type = argv[1]; + VALUE number = argv[2]; + VALUE type_class = (argc > 3) ? argv[3] : Qnil; + + return msgdef_add_field(self->descriptor, "repeated", + name, type, number, type_class); +} + +// ----------------------------------------------------------------------------- +// EnumBuilderContext. +// ----------------------------------------------------------------------------- + +DEFINE_CLASS(EnumBuilderContext, + "Google::Protobuf::Internal::EnumBuilderContext"); + +void EnumBuilderContext_mark(void* _self) { + EnumBuilderContext* self = _self; + rb_gc_mark(self->enumdesc); +} + +void EnumBuilderContext_free(void* _self) { + EnumBuilderContext* self = _self; + xfree(self); +} + +VALUE EnumBuilderContext_alloc(VALUE klass) { + EnumBuilderContext* self = ALLOC(EnumBuilderContext); + VALUE ret = TypedData_Wrap_Struct( + klass, &_EnumBuilderContext_type, self); + self->enumdesc = Qnil; + return ret; +} + +void EnumBuilderContext_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "EnumBuilderContext", rb_cObject); + rb_define_alloc_func(klass, EnumBuilderContext_alloc); + rb_define_method(klass, "initialize", + EnumBuilderContext_initialize, 1); + rb_define_method(klass, "value", EnumBuilderContext_value, 2); + cEnumBuilderContext = klass; + rb_gc_register_address(&cEnumBuilderContext); +} + +/* + * call-seq: + * EnumBuilderContext.new(enumdesc) => context + * + * Create a new builder context around the given enum descriptor. This class is + * intended to serve as a DSL context to be used with #instance_eval. + */ +VALUE EnumBuilderContext_initialize(VALUE _self, VALUE enumdef) { + DEFINE_SELF(EnumBuilderContext, self, _self); + self->enumdesc = enumdef; + return Qnil; +} + +static VALUE enumdef_add_value(VALUE enumdef, + VALUE name, VALUE number) { + rb_funcall(enumdef, rb_intern("add_value"), 2, name, number); + return Qnil; +} + +/* + * call-seq: + * EnumBuilder.add_value(name, number) + * + * Adds the given name => number mapping to the enum type. Name must be a Ruby + * symbol. + */ +VALUE EnumBuilderContext_value(VALUE _self, VALUE name, VALUE number) { + DEFINE_SELF(EnumBuilderContext, self, _self); + return enumdef_add_value(self->enumdesc, name, number); +} + +// ----------------------------------------------------------------------------- +// Builder. +// ----------------------------------------------------------------------------- + +DEFINE_CLASS(Builder, "Google::Protobuf::Internal::Builder"); + +void Builder_mark(void* _self) { + Builder* self = _self; + rb_gc_mark(self->pending_list); +} + +void Builder_free(void* _self) { + Builder* self = _self; + xfree(self->defs); + xfree(self); +} + +/* + * call-seq: + * Builder.new => builder + * + * Creates a new Builder. A Builder can accumulate a set of new message and enum + * descriptors and atomically register them into a pool in a way that allows for + * (co)recursive type references. + */ +VALUE Builder_alloc(VALUE klass) { + Builder* self = ALLOC(Builder); + VALUE ret = TypedData_Wrap_Struct( + klass, &_Builder_type, self); + self->pending_list = rb_ary_new(); + self->defs = NULL; + return ret; +} + +void Builder_register(VALUE module) { + VALUE klass = rb_define_class_under(module, "Builder", rb_cObject); + rb_define_alloc_func(klass, Builder_alloc); + rb_define_method(klass, "add_message", Builder_add_message, 1); + rb_define_method(klass, "add_enum", Builder_add_enum, 1); + rb_define_method(klass, "finalize_to_pool", Builder_finalize_to_pool, 1); + cBuilder = klass; + rb_gc_register_address(&cBuilder); +} + +/* + * call-seq: + * Builder.add_message(name, &block) + * + * Creates a new, empty descriptor with the given name, and invokes the block in + * the context of a MessageBuilderContext on that descriptor. The block can then + * call, e.g., MessageBuilderContext#optional and MessageBuilderContext#repeated + * methods to define the message fields. + * + * This is the recommended, idiomatic way to build message definitions. + */ +VALUE Builder_add_message(VALUE _self, VALUE name) { + DEFINE_SELF(Builder, self, _self); + VALUE msgdef = rb_class_new_instance(0, NULL, cDescriptor); + VALUE ctx = rb_class_new_instance(1, &msgdef, cMessageBuilderContext); + VALUE block = rb_block_proc(); + rb_funcall(msgdef, rb_intern("name="), 1, name); + rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); + rb_ary_push(self->pending_list, msgdef); + return Qnil; +} + +/* + * call-seq: + * Builder.add_enum(name, &block) + * + * Creates a new, empty enum descriptor with the given name, and invokes the block in + * the context of an EnumBuilderContext on that descriptor. The block can then + * call EnumBuilderContext#add_value to define the enum values. + * + * This is the recommended, idiomatic way to build enum definitions. + */ +VALUE Builder_add_enum(VALUE _self, VALUE name) { + DEFINE_SELF(Builder, self, _self); + VALUE enumdef = rb_class_new_instance(0, NULL, cEnumDescriptor); + VALUE ctx = rb_class_new_instance(1, &enumdef, cEnumBuilderContext); + VALUE block = rb_block_proc(); + rb_funcall(enumdef, rb_intern("name="), 1, name); + rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block); + rb_ary_push(self->pending_list, enumdef); + return Qnil; +} + +static void validate_msgdef(const upb_msgdef* msgdef) { + // Verify that no required fields exist. proto3 does not support these. + upb_msg_iter it; + for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + if (upb_fielddef_label(field) == UPB_LABEL_REQUIRED) { + rb_raise(rb_eTypeError, "Required fields are unsupported in proto3."); + } + } +} + +static void validate_enumdef(const upb_enumdef* enumdef) { + // Verify that an entry exists with integer value 0. (This is the default + // value.) + const char* lookup = upb_enumdef_iton(enumdef, 0); + if (lookup == NULL) { + rb_raise(rb_eTypeError, + "Enum definition does not contain a value for '0'."); + } +} + +/* + * call-seq: + * Builder.finalize_to_pool(pool) + * + * Adds all accumulated message and enum descriptors created in this builder + * context to the given pool. The operation occurs atomically, and all + * descriptors can refer to each other (including in cycles). This is the only + * way to build (co)recursive message definitions. + * + * This method is usually called automatically by DescriptorPool#build after it + * invokes the given user block in the context of the builder. The user should + * not normally need to call this manually because a Builder is not normally + * created manually. + */ +VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb) { + DEFINE_SELF(Builder, self, _self); + + DescriptorPool* pool = ruby_to_DescriptorPool(pool_rb); + + REALLOC_N(self->defs, upb_def*, RARRAY_LEN(self->pending_list)); + + for (int i = 0; i < RARRAY_LEN(self->pending_list); i++) { + VALUE def_rb = rb_ary_entry(self->pending_list, i); + if (CLASS_OF(def_rb) == cDescriptor) { + self->defs[i] = (upb_def*)ruby_to_Descriptor(def_rb)->msgdef; + validate_msgdef((const upb_msgdef*)self->defs[i]); + } else if (CLASS_OF(def_rb) == cEnumDescriptor) { + self->defs[i] = (upb_def*)ruby_to_EnumDescriptor(def_rb)->enumdef; + validate_enumdef((const upb_enumdef*)self->defs[i]); + } + } + + CHECK_UPB(upb_symtab_add(pool->symtab, (upb_def**)self->defs, + RARRAY_LEN(self->pending_list), NULL, &status), + "Unable to add defs to DescriptorPool"); + + for (int i = 0; i < RARRAY_LEN(self->pending_list); i++) { + VALUE def_rb = rb_ary_entry(self->pending_list, i); + add_def_obj(self->defs[i], def_rb); + } + + self->pending_list = rb_ary_new(); + return Qnil; +} diff --git a/ruby/ext/protobuf_c/encode_decode.c b/ruby/ext/protobuf_c/encode_decode.c new file mode 100644 index 00000000..8aba3c9e --- /dev/null +++ b/ruby/ext/protobuf_c/encode_decode.c @@ -0,0 +1,755 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2014 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "protobuf.h" + +// ----------------------------------------------------------------------------- +// Parsing. +// ----------------------------------------------------------------------------- + +#define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs) + +// Creates a handlerdata that simply contains the offset for this field. +static const void* newhandlerdata(upb_handlers* h, uint32_t ofs) { + size_t* hd_ofs = ALLOC(size_t); + *hd_ofs = ofs; + upb_handlers_addcleanup(h, hd_ofs, free); + return hd_ofs; +} + +typedef struct { + size_t ofs; + const upb_msgdef *md; +} submsg_handlerdata_t; + +// Creates a handlerdata that contains offset and submessage type information. +static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs, + const upb_fielddef* f) { + submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t); + hd->ofs = ofs; + hd->md = upb_fielddef_msgsubdef(f); + upb_handlers_addcleanup(h, hd, free); + return hd; +} + +// A handler that starts a repeated field. Gets the Repeated*Field instance for +// this field (such an instance always exists even in an empty message). +static void *startseq_handler(void* closure, const void* hd) { + MessageHeader* msg = closure; + const size_t *ofs = hd; + return (void*)DEREF(Message_data(msg), *ofs, VALUE); +} + +// Handlers that append primitive values to a repeated field (a regular Ruby +// array for now). +#define DEFINE_APPEND_HANDLER(type, ctype) \ + static bool append##type##_handler(void *closure, const void *hd, \ + ctype val) { \ + VALUE ary = (VALUE)closure; \ + RepeatedField_push_native(ary, &val); \ + return true; \ + } + +DEFINE_APPEND_HANDLER(bool, bool) +DEFINE_APPEND_HANDLER(int32, int32_t) +DEFINE_APPEND_HANDLER(uint32, uint32_t) +DEFINE_APPEND_HANDLER(float, float) +DEFINE_APPEND_HANDLER(int64, int64_t) +DEFINE_APPEND_HANDLER(uint64, uint64_t) +DEFINE_APPEND_HANDLER(double, double) + +// Appends a string to a repeated field (a regular Ruby array for now). +static void* appendstr_handler(void *closure, + const void *hd, + size_t size_hint) { + VALUE ary = (VALUE)closure; + VALUE str = rb_str_new2(""); + rb_enc_associate(str, kRubyStringUtf8Encoding); + RepeatedField_push(ary, str); + return (void*)str; +} + +// Appends a 'bytes' string to a repeated field (a regular Ruby array for now). +static void* appendbytes_handler(void *closure, + const void *hd, + size_t size_hint) { + VALUE ary = (VALUE)closure; + VALUE str = rb_str_new2(""); + rb_enc_associate(str, kRubyString8bitEncoding); + RepeatedField_push(ary, str); + return (void*)str; +} + +// Sets a non-repeated string field in a message. +static void* str_handler(void *closure, + const void *hd, + size_t size_hint) { + MessageHeader* msg = closure; + const size_t *ofs = hd; + VALUE str = rb_str_new2(""); + rb_enc_associate(str, kRubyStringUtf8Encoding); + DEREF(Message_data(msg), *ofs, VALUE) = str; + return (void*)str; +} + +// Sets a non-repeated 'bytes' field in a message. +static void* bytes_handler(void *closure, + const void *hd, + size_t size_hint) { + MessageHeader* msg = closure; + const size_t *ofs = hd; + VALUE str = rb_str_new2(""); + rb_enc_associate(str, kRubyString8bitEncoding); + DEREF(Message_data(msg), *ofs, VALUE) = str; + return (void*)str; +} + +static size_t stringdata_handler(void* closure, const void* hd, + const char* str, size_t len, + const upb_bufhandle* handle) { + VALUE rb_str = (VALUE)closure; + rb_str_cat(rb_str, str, len); + return len; +} + +// Appends a submessage to a repeated field (a regular Ruby array for now). +static void *appendsubmsg_handler(void *closure, const void *hd) { + VALUE ary = (VALUE)closure; + const submsg_handlerdata_t *submsgdata = hd; + VALUE subdesc = + get_def_obj((void*)submsgdata->md); + VALUE subklass = Descriptor_msgclass(subdesc); + + VALUE submsg_rb = rb_class_new_instance(0, NULL, subklass); + RepeatedField_push(ary, submsg_rb); + + MessageHeader* submsg; + TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg); + return submsg; +} + +// Sets a non-repeated submessage field in a message. +static void *submsg_handler(void *closure, const void *hd) { + MessageHeader* msg = closure; + const submsg_handlerdata_t* submsgdata = hd; + VALUE subdesc = + get_def_obj((void*)submsgdata->md); + VALUE subklass = Descriptor_msgclass(subdesc); + + if (DEREF(Message_data(msg), submsgdata->ofs, VALUE) == Qnil) { + DEREF(Message_data(msg), submsgdata->ofs, VALUE) = + rb_class_new_instance(0, NULL, subklass); + } + + VALUE submsg_rb = DEREF(Message_data(msg), submsgdata->ofs, VALUE); + MessageHeader* submsg; + TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg); + return submsg; +} + +static void add_handlers_for_message(const void *closure, upb_handlers *h) { + Descriptor* desc = ruby_to_Descriptor( + get_def_obj((void*)upb_handlers_msgdef(h))); + // Ensure layout exists. We may be invoked to create handlers for a given + // message if we are included as a submsg of another message type before our + // class is actually built, so to work around this, we just create the layout + // (and handlers, in the class-building function) on-demand. + if (desc->layout == NULL) { + desc->layout = create_layout(desc->msgdef); + } + + upb_msg_iter i; + + for (upb_msg_begin(&i, desc->msgdef); + !upb_msg_done(&i); + upb_msg_next(&i)) { + const upb_fielddef *f = upb_msg_iter_field(&i); + size_t offset = desc->layout->offsets[upb_fielddef_index(f)]; + + if (upb_fielddef_isseq(f)) { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset)); + upb_handlers_setstartseq(h, f, startseq_handler, &attr); + upb_handlerattr_uninit(&attr); + + switch (upb_fielddef_type(f)) { + +#define SET_HANDLER(utype, ltype) \ + case utype: \ + upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \ + break; + + SET_HANDLER(UPB_TYPE_BOOL, bool); + SET_HANDLER(UPB_TYPE_INT32, int32); + SET_HANDLER(UPB_TYPE_UINT32, uint32); + SET_HANDLER(UPB_TYPE_ENUM, int32); + SET_HANDLER(UPB_TYPE_FLOAT, float); + SET_HANDLER(UPB_TYPE_INT64, int64); + SET_HANDLER(UPB_TYPE_UINT64, uint64); + SET_HANDLER(UPB_TYPE_DOUBLE, double); + +#undef SET_HANDLER + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; + upb_handlers_setstartstr(h, f, is_bytes ? + appendbytes_handler : appendstr_handler, + NULL); + upb_handlers_setstring(h, f, stringdata_handler, NULL); + } + case UPB_TYPE_MESSAGE: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f)); + upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + } + } + + switch (upb_fielddef_type(f)) { + case UPB_TYPE_BOOL: + case UPB_TYPE_INT32: + case UPB_TYPE_UINT32: + case UPB_TYPE_ENUM: + case UPB_TYPE_FLOAT: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT64: + case UPB_TYPE_DOUBLE: + // The shim writes directly at the given offset (instead of using + // DEREF()) so we need to add the msg overhead. + upb_shim_set(h, f, offset + sizeof(MessageHeader), -1); + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES; + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset)); + upb_handlers_setstartstr(h, f, + is_bytes ? bytes_handler : str_handler, + &attr); + upb_handlers_setstring(h, f, stringdata_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + case UPB_TYPE_MESSAGE: { + upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; + upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f)); + upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr); + upb_handlerattr_uninit(&attr); + break; + } + } + } +} + +// Creates upb handlers for populating a message. +static const upb_handlers *new_fill_handlers(Descriptor* desc, + const void* owner) { + // TODO(cfallin, haberman): once upb gets a caching/memoization layer for + // handlers, reuse subdef handlers so that e.g. if we already parse + // B-with-field-of-type-C, we don't have to rebuild the whole hierarchy to + // parse A-with-field-of-type-B-with-field-of-type-C. + return upb_handlers_newfrozen(desc->msgdef, owner, + add_handlers_for_message, NULL); +} + +// Constructs the handlers for filling a message's data into an in-memory +// object. +const upb_handlers* get_fill_handlers(Descriptor* desc) { + if (!desc->fill_handlers) { + desc->fill_handlers = + new_fill_handlers(desc, &desc->fill_handlers); + } + return desc->fill_handlers; +} + +// Constructs the upb decoder method for parsing messages of this type. +// This is called from the message class creation code. +const upb_pbdecodermethod *new_fillmsg_decodermethod(Descriptor* desc, + const void* owner) { + const upb_handlers* handlers = get_fill_handlers(desc); + upb_pbdecodermethodopts opts; + upb_pbdecodermethodopts_init(&opts, handlers); + + const upb_pbdecodermethod *ret = upb_pbdecodermethod_new(&opts, owner); + return ret; +} + +static const upb_pbdecodermethod *msgdef_decodermethod(Descriptor* desc) { + if (desc->fill_method == NULL) { + desc->fill_method = new_fillmsg_decodermethod( + desc, &desc->fill_method); + } + return desc->fill_method; +} + +/* + * call-seq: + * MessageClass.decode(data) => message + * + * Decodes the given data (as a string containing bytes in protocol buffers wire + * format) under the interpretration given by this message class's definition + * and returns a message object with the corresponding field values. + */ +VALUE Message_decode(VALUE klass, VALUE data) { + VALUE descriptor = rb_iv_get(klass, kDescriptorInstanceVar); + Descriptor* desc = ruby_to_Descriptor(descriptor); + VALUE msgklass = Descriptor_msgclass(descriptor); + + if (TYPE(data) != T_STRING) { + rb_raise(rb_eArgError, "Expected string for binary protobuf data."); + } + + VALUE msg_rb = rb_class_new_instance(0, NULL, msgklass); + MessageHeader* msg; + TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); + + const upb_pbdecodermethod* method = msgdef_decodermethod(desc); + const upb_handlers* h = upb_pbdecodermethod_desthandlers(method); + upb_pbdecoder decoder; + upb_sink sink; + upb_status status = UPB_STATUS_INIT; + + upb_pbdecoder_init(&decoder, method, &status); + upb_sink_reset(&sink, h, msg); + upb_pbdecoder_resetoutput(&decoder, &sink); + upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data), + upb_pbdecoder_input(&decoder)); + + upb_pbdecoder_uninit(&decoder); + if (!upb_ok(&status)) { + rb_raise(rb_eRuntimeError, "Error occurred during parsing: %s.", + upb_status_errmsg(&status)); + } + + return msg_rb; +} + +/* + * call-seq: + * MessageClass.decode_json(data) => message + * + * Decodes the given data (as a string containing bytes in protocol buffers wire + * format) under the interpretration given by this message class's definition + * and returns a message object with the corresponding field values. + */ +VALUE Message_decode_json(VALUE klass, VALUE data) { + VALUE descriptor = rb_iv_get(klass, kDescriptorInstanceVar); + Descriptor* desc = ruby_to_Descriptor(descriptor); + VALUE msgklass = Descriptor_msgclass(descriptor); + + if (TYPE(data) != T_STRING) { + rb_raise(rb_eArgError, "Expected string for JSON data."); + } + // TODO(cfallin): Check and respect string encoding. If not UTF-8, we need to + // convert, because string handlers pass data directly to message string + // fields. + + VALUE msg_rb = rb_class_new_instance(0, NULL, msgklass); + MessageHeader* msg; + TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); + + upb_status status = UPB_STATUS_INIT; + upb_json_parser parser; + upb_json_parser_init(&parser, &status); + + upb_sink sink; + upb_sink_reset(&sink, get_fill_handlers(desc), msg); + upb_json_parser_resetoutput(&parser, &sink); + upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data), + upb_json_parser_input(&parser)); + + upb_json_parser_uninit(&parser); + if (!upb_ok(&status)) { + rb_raise(rb_eRuntimeError, "Error occurred during parsing: %s.", + upb_status_errmsg(&status)); + } + + return msg_rb; +} + +// ----------------------------------------------------------------------------- +// Serializing. +// ----------------------------------------------------------------------------- +// +// The code below also comes from upb's prototype Ruby binding, developed by +// haberman@. + +/* stringsink *****************************************************************/ + +// This should probably be factored into a common upb component. + +typedef struct { + upb_byteshandler handler; + upb_bytessink sink; + char *ptr; + size_t len, size; +} stringsink; + +static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { + stringsink *sink = _sink; + sink->len = 0; + return sink; +} + +static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, + size_t len, const upb_bufhandle *handle) { + UPB_UNUSED(hd); + UPB_UNUSED(handle); + + stringsink *sink = _sink; + size_t new_size = sink->size; + + while (sink->len + len > new_size) { + new_size *= 2; + } + + if (new_size != sink->size) { + sink->ptr = realloc(sink->ptr, new_size); + sink->size = new_size; + } + + memcpy(sink->ptr + sink->len, ptr, len); + sink->len += len; + + return len; +} + +void stringsink_init(stringsink *sink) { + upb_byteshandler_init(&sink->handler); + upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); + upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); + + upb_bytessink_reset(&sink->sink, &sink->handler, sink); + + sink->size = 32; + sink->ptr = malloc(sink->size); + sink->len = 0; +} + +void stringsink_uninit(stringsink *sink) { + free(sink->ptr); +} + +/* msgvisitor *****************************************************************/ + +// TODO: If/when we support proto2 semantics in addition to the current proto3 +// semantics, which means that we have true field presence, we will want to +// modify msgvisitor so that it emits all present fields rather than all +// non-default-value fields. +// +// Likewise, when implementing JSON serialization, we may need to have a +// 'verbose' mode that outputs all fields and a 'concise' mode that outputs only +// those with non-default values. + +static void putmsg(VALUE msg, const Descriptor* desc, + upb_sink *sink, int depth); + +static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { + upb_selector_t ret; + bool ok = upb_handlers_getselector(f, type, &ret); + UPB_ASSERT_VAR(ok, ok); + return ret; +} + +static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) { + if (str == Qnil) return; + + assert(BUILTIN_TYPE(str) == RUBY_T_STRING); + upb_sink subsink; + + // Ensure that the string has the correct encoding. We also check at field-set + // time, but the user may have mutated the string object since then. + native_slot_validate_string_encoding(upb_fielddef_type(f), str); + + upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str), + &subsink); + upb_sink_putstring(&subsink, getsel(f, UPB_HANDLER_STRING), RSTRING_PTR(str), + RSTRING_LEN(str), NULL); + upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR)); +} + +static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink, + int depth) { + if (submsg == Qnil) return; + + upb_sink subsink; + VALUE descriptor = rb_iv_get(submsg, kDescriptorInstanceVar); + Descriptor* subdesc = ruby_to_Descriptor(descriptor); + + upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink); + putmsg(submsg, subdesc, &subsink, depth + 1); + upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG)); +} + +static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink, + int depth) { + if (ary == Qnil) return; + + upb_sink subsink; + + upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink); + + upb_fieldtype_t type = upb_fielddef_type(f); + upb_selector_t sel = 0; + if (upb_fielddef_isprimitive(f)) { + sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); + } + + int size = NUM2INT(RepeatedField_length(ary)); + for (int i = 0; i < size; i++) { + void* memory = RepeatedField_index_native(ary, i); + switch (type) { +#define T(upbtypeconst, upbtype, ctype) \ + case upbtypeconst: \ + upb_sink_put##upbtype(&subsink, sel, *((ctype *)memory)); \ + break; + + T(UPB_TYPE_FLOAT, float, float) + T(UPB_TYPE_DOUBLE, double, double) + T(UPB_TYPE_BOOL, bool, int8_t) + case UPB_TYPE_ENUM: + T(UPB_TYPE_INT32, int32, int32_t) + T(UPB_TYPE_UINT32, uint32, uint32_t) + T(UPB_TYPE_INT64, int64, int64_t) + T(UPB_TYPE_UINT64, uint64, uint64_t) + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + putstr(*((VALUE *)memory), f, &subsink); + break; + case UPB_TYPE_MESSAGE: + putsubmsg(*((VALUE *)memory), f, &subsink, depth); + break; + +#undef T + + } + } + upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ)); +} + +static void putmsg(VALUE msg_rb, const Descriptor* desc, + upb_sink *sink, int depth) { + upb_sink_startmsg(sink); + + // Protect against cycles (possible because users may freely reassign message + // and repeated fields) by imposing a maximum recursion depth. + if (depth > UPB_SINK_MAX_NESTING) { + rb_raise(rb_eRuntimeError, + "Maximum recursion depth exceeded during encoding."); + } + + MessageHeader* msg; + TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); + void* msg_data = Message_data(msg); + + upb_msg_iter i; + for (upb_msg_begin(&i, desc->msgdef); + !upb_msg_done(&i); + upb_msg_next(&i)) { + upb_fielddef *f = upb_msg_iter_field(&i); + uint32_t offset = desc->layout->offsets[upb_fielddef_index(f)]; + + if (upb_fielddef_isseq(f)) { + VALUE ary = DEREF(msg_data, offset, VALUE); + if (ary != Qnil) { + putary(ary, f, sink, depth); + } + } else if (upb_fielddef_isstring(f)) { + VALUE str = DEREF(msg_data, offset, VALUE); + if (RSTRING_LEN(str) > 0) { + putstr(str, f, sink); + } + } else if (upb_fielddef_issubmsg(f)) { + putsubmsg(DEREF(msg_data, offset, VALUE), f, sink, depth); + } else { + upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); + +#define T(upbtypeconst, upbtype, ctype, default_value) \ + case upbtypeconst: { \ + ctype value = DEREF(msg_data, offset, ctype); \ + if (value != default_value) { \ + upb_sink_put##upbtype(sink, sel, value); \ + } \ + } \ + break; + + switch (upb_fielddef_type(f)) { + T(UPB_TYPE_FLOAT, float, float, 0.0) + T(UPB_TYPE_DOUBLE, double, double, 0.0) + T(UPB_TYPE_BOOL, bool, uint8_t, 0) + case UPB_TYPE_ENUM: + T(UPB_TYPE_INT32, int32, int32_t, 0) + T(UPB_TYPE_UINT32, uint32, uint32_t, 0) + T(UPB_TYPE_INT64, int64, int64_t, 0) + T(UPB_TYPE_UINT64, uint64, uint64_t, 0) + + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Internal error."); + } + +#undef T + + } + } + + upb_status status; + upb_sink_endmsg(sink, &status); +} + +static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) { + if (desc->pb_serialize_handlers == NULL) { + desc->pb_serialize_handlers = + upb_pb_encoder_newhandlers(desc->msgdef, &desc->pb_serialize_handlers); + } + return desc->pb_serialize_handlers; +} + +static const upb_handlers* msgdef_json_serialize_handlers(Descriptor* desc) { + if (desc->json_serialize_handlers == NULL) { + desc->json_serialize_handlers = + upb_json_printer_newhandlers( + desc->msgdef, &desc->json_serialize_handlers); + } + return desc->json_serialize_handlers; +} + +/* + * call-seq: + * MessageClass.encode(msg) => bytes + * + * Encodes the given message object to its serialized form in protocol buffers + * wire format. + */ +VALUE Message_encode(VALUE klass, VALUE msg_rb) { + VALUE descriptor = rb_iv_get(klass, kDescriptorInstanceVar); + Descriptor* desc = ruby_to_Descriptor(descriptor); + + stringsink sink; + stringsink_init(&sink); + + const upb_handlers* serialize_handlers = + msgdef_pb_serialize_handlers(desc); + + upb_pb_encoder encoder; + upb_pb_encoder_init(&encoder, serialize_handlers); + upb_pb_encoder_resetoutput(&encoder, &sink.sink); + + putmsg(msg_rb, desc, upb_pb_encoder_input(&encoder), 0); + + VALUE ret = rb_str_new(sink.ptr, sink.len); + + upb_pb_encoder_uninit(&encoder); + stringsink_uninit(&sink); + + return ret; +} + +/* + * call-seq: + * MessageClass.encode_json(msg) => json_string + * + * Encodes the given message object into its serialized JSON representation. + */ +VALUE Message_encode_json(VALUE klass, VALUE msg_rb) { + VALUE descriptor = rb_iv_get(klass, kDescriptorInstanceVar); + Descriptor* desc = ruby_to_Descriptor(descriptor); + + stringsink sink; + stringsink_init(&sink); + + const upb_handlers* serialize_handlers = + msgdef_json_serialize_handlers(desc); + + upb_json_printer printer; + upb_json_printer_init(&printer, serialize_handlers); + upb_json_printer_resetoutput(&printer, &sink.sink); + + putmsg(msg_rb, desc, upb_json_printer_input(&printer), 0); + + VALUE ret = rb_str_new(sink.ptr, sink.len); + + upb_json_printer_uninit(&printer); + stringsink_uninit(&sink); + + return ret; +} + +/* + * call-seq: + * Google::Protobuf.encode(msg) => bytes + * + * Encodes the given message object to protocol buffers wire format. This is an + * alternative to the #encode method on msg's class. + */ +VALUE Google_Protobuf_encode(VALUE self, VALUE msg_rb) { + VALUE klass = CLASS_OF(msg_rb); + return Message_encode(klass, msg_rb); +} + +/* + * call-seq: + * Google::Protobuf.encode_json(msg) => json_string + * + * Encodes the given message object to its JSON representation. This is an + * alternative to the #encode_json method on msg's class. + */ +VALUE Google_Protobuf_encode_json(VALUE self, VALUE msg_rb) { + VALUE klass = CLASS_OF(msg_rb); + return Message_encode_json(klass, msg_rb); +} + +/* + * call-seq: + * Google::Protobuf.decode(class, bytes) => msg + * + * Decodes the given bytes as protocol buffers wire format under the + * interpretation given by the given class's message definition. This is an + * alternative to the #decode method on the given class. + */ +VALUE Google_Protobuf_decode(VALUE self, VALUE klass, VALUE msg_rb) { + return Message_decode(klass, msg_rb); +} + +/* + * call-seq: + * Google::Protobuf.decode_json(class, json_string) => msg + * + * Decodes the given JSON string under the interpretation given by the given + * class's message definition. This is an alternative to the #decode_json method + * on the given class. + */ +VALUE Google_Protobuf_decode_json(VALUE self, VALUE klass, VALUE msg_rb) { + return Message_decode_json(klass, msg_rb); +} diff --git a/ruby/ext/protobuf_c/extconf.rb b/ruby/ext/protobuf_c/extconf.rb new file mode 100644 index 00000000..7f23b1a8 --- /dev/null +++ b/ruby/ext/protobuf_c/extconf.rb @@ -0,0 +1,23 @@ +#!/usr/bin/ruby + +require 'mkmf' + +upb_path = File.absolute_path(File.dirname($0)) + "/../../../upb" +libs = ["upb_pic", "upb.pb_pic", "upb.json_pic"] +system("cd #{upb_path}; make " + libs.map{|l| "lib/lib#{l}.a"}.join(" ")) + +$CFLAGS += " -O3 -std=c99 -Wno-unused-function -DNDEBUG" + +find_header("upb/upb.h", upb_path) or + raise "Can't find upb headers" +find_library("upb_pic", "upb_msgdef_new", upb_path + "/lib") or + raise "Can't find upb lib" +find_library("upb.pb_pic", "upb_pbdecoder_init", upb_path + "/lib") or + raise "Can't find upb.pb lib" +find_library("upb.json_pic", "upb_json_printer_init", upb_path + "/lib") or + raise "Can't find upb.pb lib" + +$objs = ["protobuf.o", "defs.o", "storage.o", "message.o", + "repeated_field.o", "encode_decode.o"] + +create_makefile("protobuf_c") diff --git a/ruby/ext/protobuf_c/message.c b/ruby/ext/protobuf_c/message.c new file mode 100644 index 00000000..105b7807 --- /dev/null +++ b/ruby/ext/protobuf_c/message.c @@ -0,0 +1,463 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2014 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "protobuf.h" + +// ----------------------------------------------------------------------------- +// Class/module creation from msgdefs and enumdefs, respectively. +// ----------------------------------------------------------------------------- + +void* Message_data(void* msg) { + return ((uint8_t *)msg) + sizeof(MessageHeader); +} + +void Message_mark(void* _self) { + MessageHeader* self = (MessageHeader *)_self; + layout_mark(self->descriptor->layout, Message_data(self)); +} + +void Message_free(void* self) { + xfree(self); +} + +rb_data_type_t Message_type = { + "Message", + { Message_mark, Message_free, NULL }, +}; + +VALUE Message_alloc(VALUE klass) { + VALUE descriptor = rb_iv_get(klass, kDescriptorInstanceVar); + Descriptor* desc = ruby_to_Descriptor(descriptor); + MessageHeader* msg = (MessageHeader*)ALLOC_N( + uint8_t, sizeof(MessageHeader) + desc->layout->size); + memset(Message_data(msg), 0, desc->layout->size); + + // We wrap first so that everything in the message object is GC-rooted in case + // a collection happens during object creation in layout_init(). + VALUE ret = TypedData_Wrap_Struct(klass, &Message_type, msg); + msg->descriptor = desc; + rb_iv_set(ret, kDescriptorInstanceVar, descriptor); + + layout_init(desc->layout, Message_data(msg)); + + return ret; +} + +/* + * call-seq: + * Message.method_missing(*args) + * + * Provides accessors and setters for message fields according to their field + * names. For any field whose name does not conflict with a built-in method, an + * accessor is provided with the same name as the field, and a setter is + * provided with the name of the field plus the '=' suffix. Thus, given a + * message instance 'msg' with field 'foo', the following code is valid: + * + * msg.foo = 42 + * puts msg.foo + */ +VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self) { + MessageHeader* self; + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + if (argc < 1) { + rb_raise(rb_eArgError, "Expected method name as first argument."); + } + VALUE method_name = argv[0]; + if (!SYMBOL_P(method_name)) { + rb_raise(rb_eArgError, "Expected symbol as method name."); + } + VALUE method_str = rb_id2str(SYM2ID(method_name)); + char* name = RSTRING_PTR(method_str); + size_t name_len = RSTRING_LEN(method_str); + bool setter = false; + + // Setters have names that end in '='. + if (name[name_len - 1] == '=') { + setter = true; + name_len--; + } + + const upb_fielddef* f = upb_msgdef_ntof(self->descriptor->msgdef, + name, name_len); + + if (f == NULL) { + rb_raise(rb_eArgError, "Unknown field"); + } + + if (setter) { + if (argc < 2) { + rb_raise(rb_eArgError, "No value provided to setter."); + } + layout_set(self->descriptor->layout, Message_data(self), f, argv[1]); + return Qnil; + } else { + return layout_get(self->descriptor->layout, Message_data(self), f); + } +} + +int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) { + MessageHeader* self; + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + + if (!SYMBOL_P(key)) { + rb_raise(rb_eArgError, + "Expected symbols as hash keys in initialization map."); + } + + VALUE method_str = rb_id2str(SYM2ID(key)); + char* name = RSTRING_PTR(method_str); + const upb_fielddef* f = upb_msgdef_ntofz(self->descriptor->msgdef, name); + if (f == NULL) { + rb_raise(rb_eArgError, + "Unknown field name in initialization map entry."); + } + + if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) { + if (TYPE(val) != T_ARRAY) { + rb_raise(rb_eArgError, + "Expected array as initializer value for repeated field."); + } + VALUE ary = layout_get(self->descriptor->layout, Message_data(self), f); + for (int i = 0; i < RARRAY_LEN(val); i++) { + RepeatedField_push(ary, rb_ary_entry(val, i)); + } + } else { + layout_set(self->descriptor->layout, Message_data(self), f, val); + } + return 0; +} + +/* + * call-seq: + * Message.new(kwargs) => new_message + * + * Creates a new instance of the given message class. Keyword arguments may be + * provided with keywords corresponding to field names. + * + * Note that no literal Message class exists. Only concrete classes per message + * type exist, as provided by the #msgclass method on Descriptors after they + * have been added to a pool. The method definitions described here on the + * Message class are provided on each concrete message class. + */ +VALUE Message_initialize(int argc, VALUE* argv, VALUE _self) { + if (argc == 0) { + return Qnil; + } + if (argc != 1) { + rb_raise(rb_eArgError, "Expected 0 or 1 arguments."); + } + VALUE hash_args = argv[0]; + if (TYPE(hash_args) != T_HASH) { + rb_raise(rb_eArgError, "Expected hash arguments."); + } + + rb_hash_foreach(hash_args, Message_initialize_kwarg, _self); + return Qnil; +} + +/* + * call-seq: + * Message.dup => new_message + * + * Performs a shallow copy of this message and returns the new copy. + */ +VALUE Message_dup(VALUE _self) { + MessageHeader* self; + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + + VALUE new_msg = rb_class_new_instance(0, NULL, CLASS_OF(_self)); + MessageHeader* new_msg_self; + TypedData_Get_Struct(new_msg, MessageHeader, &Message_type, new_msg_self); + + layout_dup(self->descriptor->layout, + Message_data(new_msg_self), + Message_data(self)); + + return new_msg; +} + +// Internal only; used by Google::Protobuf.deep_copy. +VALUE Message_deep_copy(VALUE _self) { + MessageHeader* self; + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + + VALUE new_msg = rb_class_new_instance(0, NULL, CLASS_OF(_self)); + MessageHeader* new_msg_self; + TypedData_Get_Struct(new_msg, MessageHeader, &Message_type, new_msg_self); + + layout_deep_copy(self->descriptor->layout, + Message_data(new_msg_self), + Message_data(self)); + + return new_msg; +} + +/* + * call-seq: + * Message.==(other) => boolean + * + * Performs a deep comparison of this message with another. Messages are equal + * if they have the same type and if each field is equal according to the :== + * method's semantics (a more efficient comparison may actually be done if the + * field is of a primitive type). + */ +VALUE Message_eq(VALUE _self, VALUE _other) { + MessageHeader* self; + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + + MessageHeader* other; + TypedData_Get_Struct(_other, MessageHeader, &Message_type, other); + + if (self->descriptor != other->descriptor) { + return Qfalse; + } + + return layout_eq(self->descriptor->layout, + Message_data(self), + Message_data(other)); +} + +/* + * call-seq: + * Message.hash => hash_value + * + * Returns a hash value that represents this message's field values. + */ +VALUE Message_hash(VALUE _self) { + MessageHeader* self; + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + + return layout_hash(self->descriptor->layout, Message_data(self)); +} + +/* + * call-seq: + * Message.inspect => string + * + * Returns a human-readable string representing this message. It will be + * formatted as "<MessageType: field1: value1, field2: value2, ...>". Each + * field's value is represented according to its own #inspect method. + */ +VALUE Message_inspect(VALUE _self) { + MessageHeader* self; + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + + VALUE str = rb_str_new2("<"); + str = rb_str_append(str, rb_str_new2(rb_class2name(CLASS_OF(_self)))); + str = rb_str_cat2(str, ": "); + str = rb_str_append(str, layout_inspect( + self->descriptor->layout, Message_data(self))); + str = rb_str_cat2(str, ">"); + return str; +} + +/* + * call-seq: + * Message.[](index) => value + * + * Accesses a field's value by field name. The provided field name should be a + * string. + */ +VALUE Message_index(VALUE _self, VALUE field_name) { + MessageHeader* self; + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + Check_Type(field_name, T_STRING); + const upb_fielddef* field = + upb_msgdef_ntofz(self->descriptor->msgdef, RSTRING_PTR(field_name)); + if (field == NULL) { + return Qnil; + } + return layout_get(self->descriptor->layout, Message_data(self), field); +} + +/* + * call-seq: + * Message.[]=(index, value) + * + * Sets a field's value by field name. The provided field name should be a + * string. + */ +VALUE Message_index_set(VALUE _self, VALUE field_name, VALUE value) { + MessageHeader* self; + TypedData_Get_Struct(_self, MessageHeader, &Message_type, self); + Check_Type(field_name, T_STRING); + const upb_fielddef* field = + upb_msgdef_ntofz(self->descriptor->msgdef, RSTRING_PTR(field_name)); + if (field == NULL) { + rb_raise(rb_eArgError, "Unknown field: %s", RSTRING_PTR(field_name)); + } + layout_set(self->descriptor->layout, Message_data(self), field, value); + return Qnil; +} + +/* + * call-seq: + * Message.descriptor => descriptor + * + * Class method that returns the Descriptor instance corresponding to this + * message class's type. + */ +VALUE Message_descriptor(VALUE klass) { + return rb_iv_get(klass, kDescriptorInstanceVar); +} + +VALUE build_class_from_descriptor(Descriptor* desc) { + if (desc->layout == NULL) { + desc->layout = create_layout(desc->msgdef); + } + if (desc->fill_method == NULL) { + desc->fill_method = new_fillmsg_decodermethod(desc, &desc->fill_method); + } + + const char* name = upb_msgdef_fullname(desc->msgdef); + if (name == NULL) { + rb_raise(rb_eRuntimeError, "Descriptor does not have assigned name."); + } + + VALUE klass = rb_define_class_id( + // Docs say this parameter is ignored. User will assign return value to + // their own toplevel constant class name. + rb_intern("Message"), + rb_cObject); + rb_iv_set(klass, kDescriptorInstanceVar, get_def_obj(desc->msgdef)); + rb_define_alloc_func(klass, Message_alloc); + rb_define_method(klass, "method_missing", + Message_method_missing, -1); + rb_define_method(klass, "initialize", Message_initialize, -1); + rb_define_method(klass, "dup", Message_dup, 0); + // Also define #clone so that we don't inherit Object#clone. + rb_define_method(klass, "clone", Message_dup, 0); + rb_define_method(klass, "==", Message_eq, 1); + rb_define_method(klass, "hash", Message_hash, 0); + rb_define_method(klass, "inspect", Message_inspect, 0); + rb_define_method(klass, "[]", Message_index, 1); + rb_define_method(klass, "[]=", Message_index_set, 2); + rb_define_singleton_method(klass, "decode", Message_decode, 1); + rb_define_singleton_method(klass, "encode", Message_encode, 1); + rb_define_singleton_method(klass, "decode_json", Message_decode_json, 1); + rb_define_singleton_method(klass, "encode_json", Message_encode_json, 1); + rb_define_singleton_method(klass, "descriptor", Message_descriptor, 0); + return klass; +} + +/* + * call-seq: + * Enum.lookup(number) => name + * + * This module method, provided on each generated enum module, looks up an enum + * value by number and returns its name as a Ruby symbol, or nil if not found. + */ +VALUE enum_lookup(VALUE self, VALUE number) { + int32_t num = NUM2INT(number); + VALUE desc = rb_iv_get(self, kDescriptorInstanceVar); + EnumDescriptor* enumdesc = ruby_to_EnumDescriptor(desc); + + const char* name = upb_enumdef_iton(enumdesc->enumdef, num); + if (name == NULL) { + return Qnil; + } else { + return ID2SYM(rb_intern(name)); + } +} + +/* + * call-seq: + * Enum.resolve(name) => number + * + * This module method, provided on each generated enum module, looks up an enum + * value by name (as a Ruby symbol) and returns its name, or nil if not found. + */ +VALUE enum_resolve(VALUE self, VALUE sym) { + const char* name = rb_id2name(SYM2ID(sym)); + VALUE desc = rb_iv_get(self, kDescriptorInstanceVar); + EnumDescriptor* enumdesc = ruby_to_EnumDescriptor(desc); + + int32_t num = 0; + bool found = upb_enumdef_ntoiz(enumdesc->enumdef, name, &num); + if (!found) { + return Qnil; + } else { + return INT2NUM(num); + } +} + +/* + * call-seq: + * Enum.descriptor + * + * This module method, provided on each generated enum module, returns the + * EnumDescriptor corresponding to this enum type. + */ +VALUE enum_descriptor(VALUE self) { + return rb_iv_get(self, kDescriptorInstanceVar); +} + +VALUE build_module_from_enumdesc(EnumDescriptor* enumdesc) { + VALUE mod = rb_define_module_id( + rb_intern(upb_enumdef_fullname(enumdesc->enumdef))); + + upb_enum_iter it; + for (upb_enum_begin(&it, enumdesc->enumdef); + !upb_enum_done(&it); + upb_enum_next(&it)) { + const char* name = upb_enum_iter_name(&it); + int32_t value = upb_enum_iter_number(&it); + if (name[0] < 'A' || name[0] > 'Z') { + rb_raise(rb_eTypeError, + "Enum value '%s' does not start with an uppercase letter " + "as is required for Ruby constants.", + name); + } + rb_define_const(mod, name, INT2NUM(value)); + } + + rb_define_singleton_method(mod, "lookup", enum_lookup, 1); + rb_define_singleton_method(mod, "resolve", enum_resolve, 1); + rb_define_singleton_method(mod, "descriptor", enum_descriptor, 0); + rb_iv_set(mod, kDescriptorInstanceVar, get_def_obj(enumdesc->enumdef)); + + return mod; +} + +/* + * call-seq: + * Google::Protobuf.deep_copy(obj) => copy_of_obj + * + * Performs a deep copy of either a RepeatedField instance or a message object, + * recursively copying its members. + */ +VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) { + VALUE klass = CLASS_OF(obj); + if (klass == cRepeatedField) { + return RepeatedField_deep_copy(obj); + } else { + return Message_deep_copy(obj); + } +} diff --git a/ruby/ext/protobuf_c/protobuf.c b/ruby/ext/protobuf_c/protobuf.c new file mode 100644 index 00000000..d5862284 --- /dev/null +++ b/ruby/ext/protobuf_c/protobuf.c @@ -0,0 +1,102 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2014 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "protobuf.h" + +// ----------------------------------------------------------------------------- +// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor +// instances. +// ----------------------------------------------------------------------------- + +// This is a hash table from def objects (encoded by converting pointers to +// Ruby integers) to MessageDef/EnumDef instances (as Ruby values). +VALUE upb_def_to_ruby_obj_map; + +void add_def_obj(const void* def, VALUE value) { + rb_hash_aset(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def), value); +} + +VALUE get_def_obj(const void* def) { + return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def)); +} + +// ----------------------------------------------------------------------------- +// Utilities. +// ----------------------------------------------------------------------------- + +// Raises a Ruby error if |status| is not OK, using its error message. +void check_upb_status(const upb_status* status, const char* msg) { + if (!upb_ok(status)) { + rb_raise(rb_eRuntimeError, "%s: %s\n", msg, upb_status_errmsg(status)); + } +} + +// String encodings: we look these up once, at load time, and then cache them +// here. +rb_encoding* kRubyStringUtf8Encoding; +rb_encoding* kRubyStringASCIIEncoding; +rb_encoding* kRubyString8bitEncoding; + +// ----------------------------------------------------------------------------- +// Initialization/entry point. +// ----------------------------------------------------------------------------- + +// This must be named "Init_protobuf_c" because the Ruby module is named +// "protobuf_c" -- the VM looks for this symbol in our .so. +void Init_protobuf_c() { + VALUE google = rb_define_module("Google"); + VALUE protobuf = rb_define_module_under(google, "Protobuf"); + VALUE internal = rb_define_module_under(protobuf, "Internal"); + DescriptorPool_register(protobuf); + Descriptor_register(protobuf); + FieldDescriptor_register(protobuf); + EnumDescriptor_register(protobuf); + MessageBuilderContext_register(internal); + EnumBuilderContext_register(internal); + Builder_register(internal); + RepeatedField_register(protobuf); + + rb_define_singleton_method(protobuf, "encode", Google_Protobuf_encode, 1); + rb_define_singleton_method(protobuf, "decode", Google_Protobuf_decode, 2); + rb_define_singleton_method(protobuf, "encode_json", + Google_Protobuf_encode_json, 1); + rb_define_singleton_method(protobuf, "decode_json", + Google_Protobuf_decode_json, 2); + + rb_define_singleton_method(protobuf, "deep_copy", + Google_Protobuf_deep_copy, 1); + + kRubyStringUtf8Encoding = rb_utf8_encoding(); + kRubyStringASCIIEncoding = rb_usascii_encoding(); + kRubyString8bitEncoding = rb_ascii8bit_encoding(); + + upb_def_to_ruby_obj_map = rb_hash_new(); + rb_gc_register_address(&upb_def_to_ruby_obj_map); +} diff --git a/ruby/ext/protobuf_c/protobuf.h b/ruby/ext/protobuf_c/protobuf.h new file mode 100644 index 00000000..a7f6f539 --- /dev/null +++ b/ruby/ext/protobuf_c/protobuf.h @@ -0,0 +1,404 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2014 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef __GOOGLE_PROTOBUF_RUBY_PROTOBUF_H__ +#define __GOOGLE_PROTOBUF_RUBY_PROTOBUF_H__ + +#include <ruby/ruby.h> +#include <ruby/vm.h> +#include <ruby/encoding.h> + +#include "upb/def.h" +#include "upb/handlers.h" +#include "upb/pb/decoder.h" +#include "upb/pb/encoder.h" +#include "upb/pb/glue.h" +#include "upb/json/parser.h" +#include "upb/json/printer.h" +#include "upb/shim/shim.h" +#include "upb/symtab.h" + +// Forward decls. +struct DescriptorPool; +struct Descriptor; +struct FieldDescriptor; +struct EnumDescriptor; +struct MessageLayout; +struct MessageHeader; +struct MessageBuilderContext; +struct EnumBuilderContext; +struct Builder; + +typedef struct DescriptorPool DescriptorPool; +typedef struct Descriptor Descriptor; +typedef struct FieldDescriptor FieldDescriptor; +typedef struct EnumDescriptor EnumDescriptor; +typedef struct MessageLayout MessageLayout; +typedef struct MessageHeader MessageHeader; +typedef struct MessageBuilderContext MessageBuilderContext; +typedef struct EnumBuilderContext EnumBuilderContext; +typedef struct Builder Builder; + +/* + It can be a bit confusing how the C structs defined below and the Ruby + objects interact and hold references to each other. First, a few principles: + + - Ruby's "TypedData" abstraction lets a Ruby VALUE hold a pointer to a C + struct (or arbitrary memory chunk), own it, and free it when collected. + Thus, each struct below will have a corresponding Ruby object + wrapping/owning it. + + - To get back from an underlying upb {msg,enum}def to the Ruby object, we + keep a global hashmap, accessed by get_def_obj/add_def_obj below. + + The in-memory structure is then something like: + + Ruby | upb + | + DescriptorPool ------------|-----------> upb_symtab____________________ + | | (message types) \ + | v \ + Descriptor ---------------|-----------> upb_msgdef (enum types)| + |--> msgclass | | ^ | + | (dynamically built) | | | (submsg fields) | + |--> MessageLayout | | | / + |--------------------------|> decoder method| | / + \--------------------------|> serialize | | / + | handlers v | / + FieldDescriptor -----------|-----------> upb_fielddef / + | | / + | v (enum fields) / + EnumDescriptor ------------|-----------> upb_enumdef <----------' + | + | + ^ | \___/ + `---------------|-----------------' (get_def_obj map) + */ + +// ----------------------------------------------------------------------------- +// Ruby class structure definitions. +// ----------------------------------------------------------------------------- + +struct DescriptorPool { + upb_symtab* symtab; +}; + +struct Descriptor { + const upb_msgdef* msgdef; + MessageLayout* layout; + VALUE klass; // begins as nil + const upb_handlers* fill_handlers; + const upb_pbdecodermethod* fill_method; + const upb_handlers* pb_serialize_handlers; + const upb_handlers* json_serialize_handlers; +}; + +struct FieldDescriptor { + const upb_fielddef* fielddef; +}; + +struct EnumDescriptor { + const upb_enumdef* enumdef; + VALUE module; // begins as nil +}; + +struct MessageBuilderContext { + VALUE descriptor; +}; + +struct EnumBuilderContext { + VALUE enumdesc; +}; + +struct Builder { + VALUE pending_list; + upb_def** defs; // used only while finalizing +}; + +extern VALUE cDescriptorPool; +extern VALUE cDescriptor; +extern VALUE cFieldDescriptor; +extern VALUE cEnumDescriptor; +extern VALUE cMessageBuilderContext; +extern VALUE cEnumBuilderContext; +extern VALUE cBuilder; + +extern const char* kDescriptorInstanceVar; + +// We forward-declare all of the Ruby method implementations here because we +// sometimes call the methods directly across .c files, rather than going +// through Ruby's method dispatching (e.g. during message parse). It's cleaner +// to keep the list of object methods together than to split them between +// static-in-file definitions and header declarations. + +void DescriptorPool_mark(void* _self); +void DescriptorPool_free(void* _self); +VALUE DescriptorPool_alloc(VALUE klass); +void DescriptorPool_register(VALUE module); +DescriptorPool* ruby_to_DescriptorPool(VALUE value); +VALUE DescriptorPool_add(VALUE _self, VALUE def); +VALUE DescriptorPool_build(VALUE _self); +VALUE DescriptorPool_lookup(VALUE _self, VALUE name); +VALUE DescriptorPool_generated_pool(VALUE _self); + +void Descriptor_mark(void* _self); +void Descriptor_free(void* _self); +VALUE Descriptor_alloc(VALUE klass); +void Descriptor_register(VALUE module); +Descriptor* ruby_to_Descriptor(VALUE value); +VALUE Descriptor_name(VALUE _self); +VALUE Descriptor_name_set(VALUE _self, VALUE str); +VALUE Descriptor_each(VALUE _self); +VALUE Descriptor_lookup(VALUE _self, VALUE name); +VALUE Descriptor_add_field(VALUE _self, VALUE obj); +VALUE Descriptor_msgclass(VALUE _self); +extern const rb_data_type_t _Descriptor_type; + +void FieldDescriptor_mark(void* _self); +void FieldDescriptor_free(void* _self); +VALUE FieldDescriptor_alloc(VALUE klass); +void FieldDescriptor_register(VALUE module); +FieldDescriptor* ruby_to_FieldDescriptor(VALUE value); +VALUE FieldDescriptor_name(VALUE _self); +VALUE FieldDescriptor_name_set(VALUE _self, VALUE str); +VALUE FieldDescriptor_type(VALUE _self); +VALUE FieldDescriptor_type_set(VALUE _self, VALUE type); +VALUE FieldDescriptor_label(VALUE _self); +VALUE FieldDescriptor_label_set(VALUE _self, VALUE label); +VALUE FieldDescriptor_number(VALUE _self); +VALUE FieldDescriptor_number_set(VALUE _self, VALUE number); +VALUE FieldDescriptor_submsg_name(VALUE _self); +VALUE FieldDescriptor_submsg_name_set(VALUE _self, VALUE value); +VALUE FieldDescriptor_subtype(VALUE _self); +VALUE FieldDescriptor_get(VALUE _self, VALUE msg_rb); +VALUE FieldDescriptor_set(VALUE _self, VALUE msg_rb, VALUE value); +upb_fieldtype_t ruby_to_fieldtype(VALUE type); +VALUE fieldtype_to_ruby(upb_fieldtype_t type); + +void EnumDescriptor_mark(void* _self); +void EnumDescriptor_free(void* _self); +VALUE EnumDescriptor_alloc(VALUE klass); +void EnumDescriptor_register(VALUE module); +EnumDescriptor* ruby_to_EnumDescriptor(VALUE value); +VALUE EnumDescriptor_name(VALUE _self); +VALUE EnumDescriptor_name_set(VALUE _self, VALUE str); +VALUE EnumDescriptor_add_value(VALUE _self, VALUE name, VALUE number); +VALUE EnumDescriptor_lookup_name(VALUE _self, VALUE name); +VALUE EnumDescriptor_lookup_value(VALUE _self, VALUE number); +VALUE EnumDescriptor_each(VALUE _self); +VALUE EnumDescriptor_enummodule(VALUE _self); +extern const rb_data_type_t _EnumDescriptor_type; + +void MessageBuilderContext_mark(void* _self); +void MessageBuilderContext_free(void* _self); +VALUE MessageBuilderContext_alloc(VALUE klass); +void MessageBuilderContext_register(VALUE module); +MessageBuilderContext* ruby_to_MessageBuilderContext(VALUE value); +VALUE MessageBuilderContext_initialize(VALUE _self, VALUE descriptor); +VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self); +VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self); +VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self); + +void EnumBuilderContext_mark(void* _self); +void EnumBuilderContext_free(void* _self); +VALUE EnumBuilderContext_alloc(VALUE klass); +void EnumBuilderContext_register(VALUE module); +EnumBuilderContext* ruby_to_EnumBuilderContext(VALUE value); +VALUE EnumBuilderContext_initialize(VALUE _self, VALUE enumdesc); +VALUE EnumBuilderContext_value(VALUE _self, VALUE name, VALUE number); + +void Builder_mark(void* _self); +void Builder_free(void* _self); +VALUE Builder_alloc(VALUE klass); +void Builder_register(VALUE module); +Builder* ruby_to_Builder(VALUE value); +VALUE Builder_add_message(VALUE _self, VALUE name); +VALUE Builder_add_enum(VALUE _self, VALUE name); +VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb); + +// ----------------------------------------------------------------------------- +// Native slot storage abstraction. +// ----------------------------------------------------------------------------- + +size_t native_slot_size(upb_fieldtype_t type); +void native_slot_set(upb_fieldtype_t type, + VALUE type_class, + void* memory, + VALUE value); +VALUE native_slot_get(upb_fieldtype_t type, + VALUE type_class, + void* memory); +void native_slot_init(upb_fieldtype_t type, void* memory); +void native_slot_mark(upb_fieldtype_t type, void* memory); +void native_slot_dup(upb_fieldtype_t type, void* to, void* from); +void native_slot_deep_copy(upb_fieldtype_t type, void* to, void* from); +bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2); + +void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value); + +extern rb_encoding* kRubyStringUtf8Encoding; +extern rb_encoding* kRubyStringASCIIEncoding; +extern rb_encoding* kRubyString8bitEncoding; + +// ----------------------------------------------------------------------------- +// Repeated field container type. +// ----------------------------------------------------------------------------- + +typedef struct { + upb_fieldtype_t field_type; + VALUE field_type_class; + void* elements; + int size; + int capacity; +} RepeatedField; + +void RepeatedField_mark(void* self); +void RepeatedField_free(void* self); +VALUE RepeatedField_alloc(VALUE klass); +VALUE RepeatedField_init(int argc, VALUE* argv, VALUE self); +void RepeatedField_register(VALUE module); + +extern const rb_data_type_t RepeatedField_type; +extern VALUE cRepeatedField; + +RepeatedField* ruby_to_RepeatedField(VALUE value); + +void RepeatedField_register(VALUE module); +VALUE RepeatedField_each(VALUE _self); +VALUE RepeatedField_index(VALUE _self, VALUE _index); +void* RepeatedField_index_native(VALUE _self, int index); +VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val); +void RepeatedField_reserve(RepeatedField* self, int new_size); +VALUE RepeatedField_push(VALUE _self, VALUE val); +void RepeatedField_push_native(VALUE _self, void* data); +VALUE RepeatedField_pop(VALUE _self); +VALUE RepeatedField_insert(int argc, VALUE* argv, VALUE _self); +VALUE RepeatedField_replace(VALUE _self, VALUE list); +VALUE RepeatedField_clear(VALUE _self); +VALUE RepeatedField_length(VALUE _self); +VALUE RepeatedField_dup(VALUE _self); +VALUE RepeatedField_deep_copy(VALUE _self); +VALUE RepeatedField_eq(VALUE _self, VALUE _other); +VALUE RepeatedField_hash(VALUE _self); +VALUE RepeatedField_inspect(VALUE _self); +VALUE RepeatedField_plus(VALUE _self, VALUE list); + +// ----------------------------------------------------------------------------- +// Message layout / storage. +// ----------------------------------------------------------------------------- + +struct MessageLayout { + const upb_msgdef* msgdef; + size_t* offsets; + size_t size; +}; + +MessageLayout* create_layout(const upb_msgdef* msgdef); +void free_layout(MessageLayout* layout); +VALUE layout_get(MessageLayout* layout, + void* storage, + const upb_fielddef* field); +void layout_set(MessageLayout* layout, + void* storage, + const upb_fielddef* field, + VALUE val); +void layout_init(MessageLayout* layout, void* storage); +void layout_mark(MessageLayout* layout, void* storage); +void layout_dup(MessageLayout* layout, void* to, void* from); +void layout_deep_copy(MessageLayout* layout, void* to, void* from); +VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2); +VALUE layout_hash(MessageLayout* layout, void* storage); +VALUE layout_inspect(MessageLayout* layout, void* storage); + +// ----------------------------------------------------------------------------- +// Message class creation. +// ----------------------------------------------------------------------------- + +struct MessageHeader { + Descriptor* descriptor; // kept alive by self.class.descriptor reference. + // Data comes after this. +}; + +extern rb_data_type_t Message_type; + +VALUE build_class_from_descriptor(Descriptor* descriptor); +void* Message_data(void* msg); +void Message_mark(void* self); +void Message_free(void* self); +VALUE Message_alloc(VALUE klass); +VALUE Message_method_missing(int argc, VALUE* argv, VALUE _self); +VALUE Message_initialize(int argc, VALUE* argv, VALUE _self); +VALUE Message_dup(VALUE _self); +VALUE Message_deep_copy(VALUE _self); +VALUE Message_eq(VALUE _self, VALUE _other); +VALUE Message_hash(VALUE _self); +VALUE Message_inspect(VALUE _self); +VALUE Message_index(VALUE _self, VALUE field_name); +VALUE Message_index_set(VALUE _self, VALUE field_name, VALUE value); +VALUE Message_descriptor(VALUE klass); +VALUE Message_decode(VALUE klass, VALUE data); +VALUE Message_encode(VALUE klass, VALUE msg_rb); +VALUE Message_decode_json(VALUE klass, VALUE data); +VALUE Message_encode_json(VALUE klass, VALUE msg_rb); + +VALUE Google_Protobuf_encode(VALUE self, VALUE msg_rb); +VALUE Google_Protobuf_decode(VALUE self, VALUE klass, VALUE msg_rb); +VALUE Google_Protobuf_encode_json(VALUE self, VALUE msg_rb); +VALUE Google_Protobuf_decode_json(VALUE self, VALUE klass, VALUE msg_rb); + +VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj); + +VALUE build_module_from_enumdesc(EnumDescriptor* enumdef); +VALUE enum_lookup(VALUE self, VALUE number); +VALUE enum_resolve(VALUE self, VALUE sym); + +const upb_pbdecodermethod *new_fillmsg_decodermethod( + Descriptor* descriptor, const void *owner); + +// ----------------------------------------------------------------------------- +// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor +// instances. +// ----------------------------------------------------------------------------- +void add_def_obj(const void* def, VALUE value); +VALUE get_def_obj(const void* def); + +// ----------------------------------------------------------------------------- +// Utilities. +// ----------------------------------------------------------------------------- + +void check_upb_status(const upb_status* status, const char* msg); + +#define CHECK_UPB(code, msg) do { \ + upb_status status = UPB_STATUS_INIT; \ + code; \ + check_upb_status(&status, msg); \ +} while (0) + +#endif // __GOOGLE_PROTOBUF_RUBY_PROTOBUF_H__ diff --git a/ruby/ext/protobuf_c/repeated_field.c b/ruby/ext/protobuf_c/repeated_field.c new file mode 100644 index 00000000..6bd13b07 --- /dev/null +++ b/ruby/ext/protobuf_c/repeated_field.c @@ -0,0 +1,597 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2014 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "protobuf.h" + +// ----------------------------------------------------------------------------- +// Repeated field container type. +// ----------------------------------------------------------------------------- + +const rb_data_type_t RepeatedField_type = { + "Google::Protobuf::RepeatedField", + { RepeatedField_mark, RepeatedField_free, NULL }, +}; + +VALUE cRepeatedField; + +RepeatedField* ruby_to_RepeatedField(VALUE _self) { + RepeatedField* self; + TypedData_Get_Struct(_self, RepeatedField, &RepeatedField_type, self); + return self; +} + +/* + * call-seq: + * RepeatedField.each(&block) + * + * Invokes the block once for each element of the repeated field. RepeatedField + * also includes Enumerable; combined with this method, the repeated field thus + * acts like an ordinary Ruby sequence. + */ +VALUE RepeatedField_each(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + upb_fieldtype_t field_type = self->field_type; + VALUE field_type_class = self->field_type_class; + int element_size = native_slot_size(field_type); + + size_t off = 0; + for (int i = 0; i < self->size; i++, off += element_size) { + void* memory = (void *) (((uint8_t *)self->elements) + off); + VALUE val = native_slot_get(field_type, field_type_class, memory); + rb_yield(val); + } + return Qnil; +} + +/* + * call-seq: + * RepeatedField.[](index) => value + * + * Accesses the element at the given index. Throws an exception on out-of-bounds + * errors. + */ +VALUE RepeatedField_index(VALUE _self, VALUE _index) { + RepeatedField* self = ruby_to_RepeatedField(_self); + int element_size = native_slot_size(self->field_type); + upb_fieldtype_t field_type = self->field_type; + VALUE field_type_class = self->field_type_class; + + int index = NUM2INT(_index); + if (index < 0 || index >= self->size) { + rb_raise(rb_eRangeError, "Index out of range"); + } + + void* memory = (void *) (((uint8_t *)self->elements) + index * element_size); + return native_slot_get(field_type, field_type_class, memory); +} + +/* + * call-seq: + * RepeatedField.[]=(index, value) + * + * Sets the element at the given index. On out-of-bounds assignments, extends + * the array and fills the hole (if any) with default values. + */ +VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val) { + RepeatedField* self = ruby_to_RepeatedField(_self); + upb_fieldtype_t field_type = self->field_type; + VALUE field_type_class = self->field_type_class; + int element_size = native_slot_size(field_type); + + int index = NUM2INT(_index); + if (index < 0 || index >= (INT_MAX - 1)) { + rb_raise(rb_eRangeError, "Index out of range"); + } + if (index >= self->size) { + RepeatedField_reserve(self, index + 1); + upb_fieldtype_t field_type = self->field_type; + int element_size = native_slot_size(field_type); + for (int i = self->size; i <= index; i++) { + void* elem = (void *)(((uint8_t *)self->elements) + i * element_size); + native_slot_init(field_type, elem); + } + self->size = index + 1; + } + + void* memory = (void *) (((uint8_t *)self->elements) + index * element_size); + native_slot_set(field_type, field_type_class, memory, val); + return Qnil; +} + +static int kInitialSize = 8; + +void RepeatedField_reserve(RepeatedField* self, int new_size) { + if (new_size <= self->capacity) { + return; + } + if (self->capacity == 0) { + self->capacity = kInitialSize; + } + while (self->capacity < new_size) { + self->capacity *= 2; + } + void* old_elems = self->elements; + int elem_size = native_slot_size(self->field_type); + self->elements = ALLOC_N(uint8_t, elem_size * self->capacity); + if (old_elems != NULL) { + memcpy(self->elements, old_elems, self->size * elem_size); + xfree(old_elems); + } +} + +/* + * call-seq: + * RepeatedField.push(value) + * + * Adds a new element to the repeated field. + */ +VALUE RepeatedField_push(VALUE _self, VALUE val) { + RepeatedField* self = ruby_to_RepeatedField(_self); + upb_fieldtype_t field_type = self->field_type; + int element_size = native_slot_size(field_type); + RepeatedField_reserve(self, self->size + 1); + int index = self->size; + void* memory = (void *) (((uint8_t *)self->elements) + index * element_size); + native_slot_set(field_type, self->field_type_class, memory, val); + // native_slot_set may raise an error; bump index only after set. + self->size++; + return _self; +} + +// Used by parsing handlers. +void RepeatedField_push_native(VALUE _self, void* data) { + RepeatedField* self = ruby_to_RepeatedField(_self); + upb_fieldtype_t field_type = self->field_type; + int element_size = native_slot_size(field_type); + RepeatedField_reserve(self, self->size + 1); + int index = self->size; + void* memory = (void *) (((uint8_t *)self->elements) + index * element_size); + memcpy(memory, data, element_size); + self->size++; +} + +void* RepeatedField_index_native(VALUE _self, int index) { + RepeatedField* self = ruby_to_RepeatedField(_self); + upb_fieldtype_t field_type = self->field_type; + int element_size = native_slot_size(field_type); + return ((uint8_t *)self->elements) + index * element_size; +} + +/* + * call-seq: + * RepeatedField.pop => value + * + * Removes the last element and returns it. Throws an exception if the repeated + * field is empty. + */ +VALUE RepeatedField_pop(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + upb_fieldtype_t field_type = self->field_type; + VALUE field_type_class = self->field_type_class; + int element_size = native_slot_size(field_type); + if (self->size == 0) { + rb_raise(rb_eRangeError, "Pop from empty repeated field is not allowed."); + } + int index = self->size - 1; + void* memory = (void *) (((uint8_t *)self->elements) + index * element_size); + VALUE ret = native_slot_get(field_type, field_type_class, memory); + self->size--; + return ret; +} + +/* + * call-seq: + * RepeatedField.insert(*args) + * + * Pushes each arg in turn onto the end of the repeated field. + */ +VALUE RepeatedField_insert(int argc, VALUE* argv, VALUE _self) { + for (int i = 0; i < argc; i++) { + RepeatedField_push(_self, argv[i]); + } + return Qnil; +} + +/* + * call-seq: + * RepeatedField.replace(list) + * + * Replaces the contents of the repeated field with the given list of elements. + */ +VALUE RepeatedField_replace(VALUE _self, VALUE list) { + RepeatedField* self = ruby_to_RepeatedField(_self); + Check_Type(list, T_ARRAY); + self->size = 0; + for (int i = 0; i < RARRAY_LEN(list); i++) { + RepeatedField_push(_self, rb_ary_entry(list, i)); + } + return Qnil; +} + +/* + * call-seq: + * RepeatedField.clear + * + * Clears (removes all elements from) this repeated field. + */ +VALUE RepeatedField_clear(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + self->size = 0; + return Qnil; +} + +/* + * call-seq: + * RepeatedField.length + * + * Returns the length of this repeated field. + */ +VALUE RepeatedField_length(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + return INT2NUM(self->size); +} + +static VALUE RepeatedField_new_this_type(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + VALUE new_rptfield = Qnil; + VALUE element_type = fieldtype_to_ruby(self->field_type); + if (self->field_type_class != Qnil) { + new_rptfield = rb_funcall(CLASS_OF(_self), rb_intern("new"), 2, + element_type, self->field_type_class); + } else { + new_rptfield = rb_funcall(CLASS_OF(_self), rb_intern("new"), 1, + element_type); + } + return new_rptfield; +} + +/* + * call-seq: + * RepeatedField.dup => repeated_field + * + * Duplicates this repeated field with a shallow copy. References to all + * non-primitive element objects (e.g., submessages) are shared. + */ +VALUE RepeatedField_dup(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + VALUE new_rptfield = RepeatedField_new_this_type(_self); + RepeatedField* new_rptfield_self = ruby_to_RepeatedField(new_rptfield); + RepeatedField_reserve(new_rptfield_self, self->size); + upb_fieldtype_t field_type = self->field_type; + size_t elem_size = native_slot_size(field_type); + size_t off = 0; + for (int i = 0; i < self->size; i++, off += elem_size) { + void* to_mem = (uint8_t *)new_rptfield_self->elements + off; + void* from_mem = (uint8_t *)self->elements + off; + native_slot_dup(field_type, to_mem, from_mem); + new_rptfield_self->size++; + } + + return new_rptfield; +} + +// Internal only: used by Google::Protobuf.deep_copy. +VALUE RepeatedField_deep_copy(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + VALUE new_rptfield = RepeatedField_new_this_type(_self); + RepeatedField* new_rptfield_self = ruby_to_RepeatedField(new_rptfield); + RepeatedField_reserve(new_rptfield_self, self->size); + upb_fieldtype_t field_type = self->field_type; + size_t elem_size = native_slot_size(field_type); + size_t off = 0; + for (int i = 0; i < self->size; i++, off += elem_size) { + void* to_mem = (uint8_t *)new_rptfield_self->elements + off; + void* from_mem = (uint8_t *)self->elements + off; + native_slot_deep_copy(field_type, to_mem, from_mem); + new_rptfield_self->size++; + } + + return new_rptfield; +} + +/* + * call-seq: + * RepeatedField.==(other) => boolean + * + * Compares this repeated field to another. Repeated fields are equal if their + * element types are equal, their lengths are equal, and each element is equal. + * Elements are compared as per normal Ruby semantics, by calling their :== + * methods (or performing a more efficient comparison for primitive types). + */ +VALUE RepeatedField_eq(VALUE _self, VALUE _other) { + if (_self == _other) { + return Qtrue; + } + RepeatedField* self = ruby_to_RepeatedField(_self); + + // Inefficient but workable: to support comparison to a generic array, we + // build a temporary RepeatedField of our type. + if (TYPE(_other) == T_ARRAY) { + VALUE new_rptfield = RepeatedField_new_this_type(_self); + for (int i = 0; i < RARRAY_LEN(_other); i++) { + VALUE elem = rb_ary_entry(_other, i); + RepeatedField_push(new_rptfield, elem); + } + _other = new_rptfield; + } + + RepeatedField* other = ruby_to_RepeatedField(_other); + if (self->field_type != other->field_type || + self->field_type_class != other->field_type_class || + self->size != other->size) { + return Qfalse; + } + + upb_fieldtype_t field_type = self->field_type; + size_t elem_size = native_slot_size(field_type); + size_t off = 0; + for (int i = 0; i < self->size; i++, off += elem_size) { + void* self_mem = ((uint8_t *)self->elements) + off; + void* other_mem = ((uint8_t *)other->elements) + off; + if (!native_slot_eq(field_type, self_mem, other_mem)) { + return Qfalse; + } + } + return Qtrue; +} + +/* + * call-seq: + * RepeatedField.hash => hash_value + * + * Returns a hash value computed from this repeated field's elements. + */ +VALUE RepeatedField_hash(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + + VALUE hash = LL2NUM(0); + + upb_fieldtype_t field_type = self->field_type; + VALUE field_type_class = self->field_type_class; + size_t elem_size = native_slot_size(field_type); + size_t off = 0; + for (int i = 0; i < self->size; i++, off += elem_size) { + void* mem = ((uint8_t *)self->elements) + off; + VALUE elem = native_slot_get(field_type, field_type_class, mem); + hash = rb_funcall(hash, rb_intern("<<"), 1, INT2NUM(2)); + hash = rb_funcall(hash, rb_intern("^"), 1, + rb_funcall(elem, rb_intern("hash"), 0)); + } + + return hash; +} + +/* + * call-seq: + * RepeatedField.inspect => string + * + * Returns a string representing this repeated field's elements. It will be + * formated as "[<element>, <element>, ...]", with each element's string + * representation computed by its own #inspect method. + */ +VALUE RepeatedField_inspect(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + + VALUE str = rb_str_new2("["); + + bool first = true; + + upb_fieldtype_t field_type = self->field_type; + VALUE field_type_class = self->field_type_class; + size_t elem_size = native_slot_size(field_type); + size_t off = 0; + for (int i = 0; i < self->size; i++, off += elem_size) { + void* mem = ((uint8_t *)self->elements) + off; + VALUE elem = native_slot_get(field_type, field_type_class, mem); + if (!first) { + str = rb_str_cat2(str, ", "); + } else { + first = false; + } + str = rb_str_append(str, rb_funcall(elem, rb_intern("inspect"), 0)); + } + + str = rb_str_cat2(str, "]"); + return str; +} + +/* + * call-seq: + * RepeatedField.+(other) => repeated field + * + * Returns a new repeated field that contains the concatenated list of this + * repeated field's elements and other's elements. The other (second) list may + * be either another repeated field or a Ruby array. + */ +VALUE RepeatedField_plus(VALUE _self, VALUE list) { + VALUE dupped = RepeatedField_dup(_self); + + if (TYPE(list) == T_ARRAY) { + for (int i = 0; i < RARRAY_LEN(list); i++) { + VALUE elem = rb_ary_entry(list, i); + RepeatedField_push(dupped, elem); + } + } else if (RB_TYPE_P(list, T_DATA) && RTYPEDDATA_P(list) && + RTYPEDDATA_TYPE(list) == &RepeatedField_type) { + RepeatedField* self = ruby_to_RepeatedField(_self); + RepeatedField* list_rptfield = ruby_to_RepeatedField(list); + if (self->field_type != list_rptfield->field_type || + self->field_type_class != list_rptfield->field_type_class) { + rb_raise(rb_eArgError, + "Attempt to append RepeatedField with different element type."); + } + for (int i = 0; i < list_rptfield->size; i++) { + void* mem = RepeatedField_index_native(list, i); + RepeatedField_push_native(dupped, mem); + } + } else { + rb_raise(rb_eArgError, "Unknown type appending to RepeatedField"); + } + + return dupped; +} + +static void validate_type_class(upb_fieldtype_t type, VALUE klass) { + if (rb_iv_get(klass, kDescriptorInstanceVar) == Qnil) { + rb_raise(rb_eArgError, + "Type class has no descriptor. Please pass a " + "class or enum as returned by the DescriptorPool."); + } + if (type == UPB_TYPE_MESSAGE) { + VALUE desc = rb_iv_get(klass, kDescriptorInstanceVar); + if (!RB_TYPE_P(desc, T_DATA) || !RTYPEDDATA_P(desc) || + RTYPEDDATA_TYPE(desc) != &_Descriptor_type) { + rb_raise(rb_eArgError, "Descriptor has an incorrect type."); + } + if (rb_get_alloc_func(klass) != &Message_alloc) { + rb_raise(rb_eArgError, + "Message class was not returned by the DescriptorPool."); + } + } else if (type == UPB_TYPE_ENUM) { + VALUE enumdesc = rb_iv_get(klass, kDescriptorInstanceVar); + if (!RB_TYPE_P(enumdesc, T_DATA) || !RTYPEDDATA_P(enumdesc) || + RTYPEDDATA_TYPE(enumdesc) != &_EnumDescriptor_type) { + rb_raise(rb_eArgError, "Descriptor has an incorrect type."); + } + } +} + +void RepeatedField_init_args(int argc, VALUE* argv, + VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + VALUE ary = Qnil; + if (argc < 1) { + rb_raise(rb_eArgError, "Expected at least 1 argument."); + } + self->field_type = ruby_to_fieldtype(argv[0]); + + if (self->field_type == UPB_TYPE_MESSAGE || + self->field_type == UPB_TYPE_ENUM) { + if (argc < 2) { + rb_raise(rb_eArgError, "Expected at least 2 arguments for message/enum."); + } + self->field_type_class = argv[1]; + if (argc > 2) { + ary = argv[2]; + } + validate_type_class(self->field_type, self->field_type_class); + } else { + if (argc > 2) { + rb_raise(rb_eArgError, "Too many arguments: expected 1 or 2."); + } + if (argc > 1) { + ary = argv[1]; + } + } + + if (ary != Qnil) { + if (!RB_TYPE_P(ary, T_ARRAY)) { + rb_raise(rb_eArgError, "Expected array as initialize argument"); + } + for (int i = 0; i < RARRAY_LEN(ary); i++) { + RepeatedField_push(_self, rb_ary_entry(ary, i)); + } + } +} + +// Mark, free, alloc, init and class setup functions. + +void RepeatedField_mark(void* _self) { + RepeatedField* self = (RepeatedField*)_self; + rb_gc_mark(self->field_type_class); + upb_fieldtype_t field_type = self->field_type; + int element_size = native_slot_size(field_type); + for (int i = 0; i < self->size; i++) { + void* memory = (((uint8_t *)self->elements) + i * element_size); + native_slot_mark(self->field_type, memory); + } +} + +void RepeatedField_free(void* _self) { + RepeatedField* self = (RepeatedField*)_self; + xfree(self->elements); + xfree(self); +} + +/* + * call-seq: + * RepeatedField.new(type, type_class = nil, initial_elems = []) + * + * Creates a new repeated field. The provided type must be a Ruby symbol, and + * can take on the same values as those accepted by FieldDescriptor#type=. If + * the type is :message or :enum, type_class must be non-nil, and must be the + * Ruby class or module returned by Descriptor#msgclass or + * EnumDescriptor#enummodule, respectively. An initial list of elements may also + * be provided. + */ +VALUE RepeatedField_alloc(VALUE klass) { + RepeatedField* self = ALLOC(RepeatedField); + self->elements = NULL; + self->size = 0; + self->capacity = 0; + self->field_type = -1; + self->field_type_class = Qnil; + VALUE ret = TypedData_Wrap_Struct(klass, &RepeatedField_type, self); + return ret; +} + +VALUE RepeatedField_init(int argc, VALUE* argv, VALUE self) { + RepeatedField_init_args(argc, argv, self); + return Qnil; +} + +void RepeatedField_register(VALUE module) { + VALUE klass = rb_define_class_under( + module, "RepeatedField", rb_cObject); + rb_define_alloc_func(klass, RepeatedField_alloc); + cRepeatedField = klass; + rb_gc_register_address(&cRepeatedField); + + rb_define_method(klass, "initialize", + RepeatedField_init, -1); + rb_define_method(klass, "each", RepeatedField_each, 0); + rb_define_method(klass, "[]", RepeatedField_index, 1); + rb_define_method(klass, "[]=", RepeatedField_index_set, 2); + rb_define_method(klass, "push", RepeatedField_push, 1); + rb_define_method(klass, "<<", RepeatedField_push, 1); + rb_define_method(klass, "pop", RepeatedField_pop, 0); + rb_define_method(klass, "insert", RepeatedField_insert, -1); + rb_define_method(klass, "replace", RepeatedField_replace, 1); + rb_define_method(klass, "clear", RepeatedField_clear, 0); + rb_define_method(klass, "length", RepeatedField_length, 0); + rb_define_method(klass, "dup", RepeatedField_dup, 0); + // Also define #clone so that we don't inherit Object#clone. + rb_define_method(klass, "clone", RepeatedField_dup, 0); + rb_define_method(klass, "==", RepeatedField_eq, 1); + rb_define_method(klass, "hash", RepeatedField_hash, 0); + rb_define_method(klass, "inspect", RepeatedField_inspect, 0); + rb_define_method(klass, "+", RepeatedField_plus, 1); + rb_include_module(klass, rb_mEnumerable); +} diff --git a/ruby/ext/protobuf_c/storage.c b/ruby/ext/protobuf_c/storage.c new file mode 100644 index 00000000..c4d801af --- /dev/null +++ b/ruby/ext/protobuf_c/storage.c @@ -0,0 +1,577 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2014 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "protobuf.h" + +#include <math.h> + +#include <ruby/encoding.h> + +// ----------------------------------------------------------------------------- +// Ruby <-> native slot management. +// ----------------------------------------------------------------------------- + +#define DEREF(memory, type) *(type*)(memory) + +size_t native_slot_size(upb_fieldtype_t type) { + switch (type) { + case UPB_TYPE_FLOAT: return 4; + case UPB_TYPE_DOUBLE: return 8; + case UPB_TYPE_BOOL: return 1; + case UPB_TYPE_STRING: return sizeof(VALUE); + case UPB_TYPE_BYTES: return sizeof(VALUE); + case UPB_TYPE_MESSAGE: return sizeof(VALUE); + case UPB_TYPE_ENUM: return 4; + case UPB_TYPE_INT32: return 4; + case UPB_TYPE_INT64: return 8; + case UPB_TYPE_UINT32: return 4; + case UPB_TYPE_UINT64: return 8; + default: return 0; + } +} + +static void check_int_range_precision(upb_fieldtype_t type, VALUE val) { + // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper + // bound; we just need to do precision checks (i.e., disallow rounding) and + // check for < 0 on unsigned types. + if (TYPE(val) == T_FLOAT) { + double dbl_val = NUM2DBL(val); + if (floor(dbl_val) != dbl_val) { + rb_raise(rb_eRangeError, + "Non-integral floating point value assigned to integer field."); + } + } + if (type == UPB_TYPE_UINT32 || type == UPB_TYPE_UINT64) { + if (NUM2DBL(val) < 0) { + rb_raise(rb_eRangeError, + "Assigning negative value to unsigned integer field."); + } + } +} + +static bool is_ruby_num(VALUE value) { + return (TYPE(value) == T_FLOAT || + TYPE(value) == T_FIXNUM || + TYPE(value) == T_BIGNUM); +} + +void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) { + bool bad_encoding = false; + rb_encoding* string_encoding = rb_enc_from_index(ENCODING_GET(value)); + if (type == UPB_TYPE_STRING) { + bad_encoding = + string_encoding != kRubyStringUtf8Encoding && + string_encoding != kRubyStringASCIIEncoding; + } else { + bad_encoding = + string_encoding != kRubyString8bitEncoding; + } + // Check that encoding is UTF-8 or ASCII (for string fields) or ASCII-8BIT + // (for bytes fields). + if (bad_encoding) { + rb_raise(rb_eTypeError, "Encoding for '%s' fields must be %s (was %s)", + (type == UPB_TYPE_STRING) ? "string" : "bytes", + (type == UPB_TYPE_STRING) ? "UTF-8 or ASCII" : "ASCII-8BIT", + rb_enc_name(string_encoding)); + } +} + +void native_slot_set(upb_fieldtype_t type, VALUE type_class, + void* memory, VALUE value) { + switch (type) { + case UPB_TYPE_FLOAT: + if (!is_ruby_num(value)) { + rb_raise(rb_eTypeError, "Expected number type for float field."); + } + DEREF(memory, float) = NUM2DBL(value); + break; + case UPB_TYPE_DOUBLE: + if (!is_ruby_num(value)) { + rb_raise(rb_eTypeError, "Expected number type for double field."); + } + DEREF(memory, double) = NUM2DBL(value); + break; + case UPB_TYPE_BOOL: { + int8_t val = -1; + if (value == Qtrue) { + val = 1; + } else if (value == Qfalse) { + val = 0; + } else { + rb_raise(rb_eTypeError, "Invalid argument for boolean field."); + } + DEREF(memory, int8_t) = val; + break; + } + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + if (CLASS_OF(value) != rb_cString) { + rb_raise(rb_eTypeError, "Invalid argument for string field."); + } + native_slot_validate_string_encoding(type, value); + DEREF(memory, VALUE) = value; + break; + } + case UPB_TYPE_MESSAGE: { + if (CLASS_OF(value) != type_class) { + rb_raise(rb_eTypeError, + "Invalid type %s to assign to submessage field.", + rb_class2name(CLASS_OF(value))); + } + DEREF(memory, VALUE) = value; + break; + } + case UPB_TYPE_ENUM: { + if (!is_ruby_num(value) && TYPE(value) != T_SYMBOL) { + rb_raise(rb_eTypeError, + "Expected number or symbol type for enum field."); + } + int32_t int_val = 0; + if (TYPE(value) == T_SYMBOL) { + // Ensure that the given symbol exists in the enum module. + VALUE lookup = rb_const_get(type_class, SYM2ID(value)); + if (lookup == Qnil) { + rb_raise(rb_eRangeError, "Unknown symbol value for enum field."); + } else { + int_val = NUM2INT(lookup); + } + } else { + check_int_range_precision(UPB_TYPE_INT32, value); + int_val = NUM2INT(value); + } + DEREF(memory, int32_t) = int_val; + break; + } + case UPB_TYPE_INT32: + case UPB_TYPE_INT64: + case UPB_TYPE_UINT32: + case UPB_TYPE_UINT64: + if (!is_ruby_num(value)) { + rb_raise(rb_eTypeError, "Expected number type for integral field."); + } + check_int_range_precision(type, value); + switch (type) { + case UPB_TYPE_INT32: + DEREF(memory, int32_t) = NUM2INT(value); + break; + case UPB_TYPE_INT64: + DEREF(memory, int64_t) = NUM2LL(value); + break; + case UPB_TYPE_UINT32: + DEREF(memory, uint32_t) = NUM2UINT(value); + break; + case UPB_TYPE_UINT64: + DEREF(memory, uint64_t) = NUM2ULL(value); + break; + default: + break; + } + break; + default: + break; + } +} + +VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) { + switch (type) { + case UPB_TYPE_FLOAT: + return DBL2NUM(DEREF(memory, float)); + case UPB_TYPE_DOUBLE: + return DBL2NUM(DEREF(memory, double)); + case UPB_TYPE_BOOL: + return DEREF(memory, int8_t) ? Qtrue : Qfalse; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: + return *((VALUE *)memory); + case UPB_TYPE_ENUM: { + int32_t val = DEREF(memory, int32_t); + VALUE symbol = enum_lookup(type_class, INT2NUM(val)); + if (symbol == Qnil) { + return INT2NUM(val); + } else { + return symbol; + } + } + case UPB_TYPE_INT32: + return INT2NUM(DEREF(memory, int32_t)); + case UPB_TYPE_INT64: + return LL2NUM(DEREF(memory, int64_t)); + case UPB_TYPE_UINT32: + return UINT2NUM(DEREF(memory, uint32_t)); + case UPB_TYPE_UINT64: + return ULL2NUM(DEREF(memory, uint64_t)); + default: + return Qnil; + } +} + +void native_slot_init(upb_fieldtype_t type, void* memory) { + switch (type) { + case UPB_TYPE_FLOAT: + DEREF(memory, float) = 0.0; + break; + case UPB_TYPE_DOUBLE: + DEREF(memory, double) = 0.0; + break; + case UPB_TYPE_BOOL: + DEREF(memory, int8_t) = 0; + break; + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + // TODO(cfallin): set encoding appropriately + DEREF(memory, VALUE) = rb_str_new2(""); + break; + case UPB_TYPE_MESSAGE: + DEREF(memory, VALUE) = Qnil; + break; + case UPB_TYPE_ENUM: + case UPB_TYPE_INT32: + DEREF(memory, int32_t) = 0; + break; + case UPB_TYPE_INT64: + DEREF(memory, int64_t) = 0; + break; + case UPB_TYPE_UINT32: + DEREF(memory, uint32_t) = 0; + break; + case UPB_TYPE_UINT64: + DEREF(memory, uint64_t) = 0; + break; + default: + break; + } +} + +void native_slot_mark(upb_fieldtype_t type, void* memory) { + switch (type) { + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: + rb_gc_mark(DEREF(memory, VALUE)); + break; + default: + break; + } +} + +void native_slot_dup(upb_fieldtype_t type, void* to, void* from) { + memcpy(to, from, native_slot_size(type)); +} + +void native_slot_deep_copy(upb_fieldtype_t type, void* to, void* from) { + switch (type) { + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: { + VALUE from_val = DEREF(from, VALUE); + DEREF(to, VALUE) = (from_val != Qnil) ? + rb_funcall(from_val, rb_intern("dup"), 0) : Qnil; + break; + } + case UPB_TYPE_MESSAGE: { + VALUE from_val = DEREF(from, VALUE); + DEREF(to, VALUE) = (from_val != Qnil) ? + Message_deep_copy(from_val) : Qnil; + break; + } + default: + memcpy(to, from, native_slot_size(type)); + } +} + +bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2) { + switch (type) { + case UPB_TYPE_STRING: + case UPB_TYPE_BYTES: + case UPB_TYPE_MESSAGE: { + VALUE val1 = DEREF(mem1, VALUE); + VALUE val2 = DEREF(mem2, VALUE); + VALUE ret = rb_funcall(val1, rb_intern("=="), 1, val2); + return ret == Qtrue; + } + default: + return !memcmp(mem1, mem2, native_slot_size(type)); + } +} + +// ----------------------------------------------------------------------------- +// Memory layout management. +// ----------------------------------------------------------------------------- + +MessageLayout* create_layout(const upb_msgdef* msgdef) { + MessageLayout* layout = ALLOC(MessageLayout); + int nfields = upb_msgdef_numfields(msgdef); + layout->offsets = ALLOC_N(size_t, nfields); + + upb_msg_iter it; + size_t off = 0; + for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + size_t field_size = + (upb_fielddef_label(field) == UPB_LABEL_REPEATED) ? + sizeof(VALUE) : native_slot_size(upb_fielddef_type(field)); + // align current offset + off = (off + field_size - 1) & ~(field_size - 1); + layout->offsets[upb_fielddef_index(field)] = off; + off += field_size; + } + + layout->size = off; + + layout->msgdef = msgdef; + upb_msgdef_ref(layout->msgdef, &layout->msgdef); + + return layout; +} + +void free_layout(MessageLayout* layout) { + xfree(layout->offsets); + upb_msgdef_unref(layout->msgdef, &layout->msgdef); + xfree(layout); +} + +static VALUE get_type_class(const upb_fielddef* field) { + VALUE type_class = Qnil; + if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) { + VALUE submsgdesc = + get_def_obj(upb_fielddef_subdef(field)); + type_class = Descriptor_msgclass(submsgdesc); + } else if (upb_fielddef_type(field) == UPB_TYPE_ENUM) { + VALUE subenumdesc = + get_def_obj(upb_fielddef_subdef(field)); + type_class = EnumDescriptor_enummodule(subenumdesc); + } + return type_class; +} + +VALUE layout_get(MessageLayout* layout, + void* storage, + const upb_fielddef* field) { + void* memory = ((uint8_t *)storage) + + layout->offsets[upb_fielddef_index(field)]; + if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + return *((VALUE *)memory); + } else { + return native_slot_get(upb_fielddef_type(field), + get_type_class(field), + memory); + } +} + +static void check_repeated_field_type(VALUE val, const upb_fielddef* field) { + assert(upb_fielddef_label(field) == UPB_LABEL_REPEATED); + + if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) || + RTYPEDDATA_TYPE(val) != &RepeatedField_type) { + rb_raise(rb_eTypeError, "Expected repeated field array"); + } + + RepeatedField* self = ruby_to_RepeatedField(val); + if (self->field_type != upb_fielddef_type(field)) { + rb_raise(rb_eTypeError, "Repeated field array has wrong element type"); + } + + if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE || + upb_fielddef_type(field) == UPB_TYPE_ENUM) { + RepeatedField* self = ruby_to_RepeatedField(val); + if (self->field_type_class != + get_def_obj(upb_fielddef_subdef(field))) { + rb_raise(rb_eTypeError, + "Repeated field array has wrong message/enum class"); + } + } +} + +void layout_set(MessageLayout* layout, + void* storage, + const upb_fielddef* field, + VALUE val) { + void* memory = ((uint8_t *)storage) + + layout->offsets[upb_fielddef_index(field)]; + if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + check_repeated_field_type(val, field); + *((VALUE *)memory) = val; + } else { + native_slot_set(upb_fielddef_type(field), get_type_class(field), + memory, val); + } +} + +void layout_init(MessageLayout* layout, + void* storage) { + upb_msg_iter it; + for (upb_msg_begin(&it, layout->msgdef); + !upb_msg_done(&it); + upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + void* memory = ((uint8_t *)storage) + + layout->offsets[upb_fielddef_index(field)]; + + if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + VALUE ary = Qnil; + VALUE type_class = get_type_class(field); + if (type_class != Qnil) { + VALUE args[2] = { + fieldtype_to_ruby(upb_fielddef_type(field)), + type_class, + }; + ary = rb_class_new_instance(2, args, cRepeatedField); + } else { + VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) }; + ary = rb_class_new_instance(1, args, cRepeatedField); + } + *((VALUE *)memory) = ary; + } else { + native_slot_init(upb_fielddef_type(field), memory); + } + } +} + +void layout_mark(MessageLayout* layout, void* storage) { + upb_msg_iter it; + for (upb_msg_begin(&it, layout->msgdef); + !upb_msg_done(&it); + upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + void* memory = ((uint8_t *)storage) + + layout->offsets[upb_fielddef_index(field)]; + + if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + rb_gc_mark(*((VALUE *)memory)); + } else { + native_slot_mark(upb_fielddef_type(field), memory); + } + } +} + +void layout_dup(MessageLayout* layout, void* to, void* from) { + upb_msg_iter it; + for (upb_msg_begin(&it, layout->msgdef); + !upb_msg_done(&it); + upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + void* to_memory = ((uint8_t *)to) + + layout->offsets[upb_fielddef_index(field)]; + void* from_memory = ((uint8_t *)from) + + layout->offsets[upb_fielddef_index(field)]; + + if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + *((VALUE *)to_memory) = RepeatedField_dup(*((VALUE *)from_memory)); + } else { + native_slot_dup(upb_fielddef_type(field), to_memory, from_memory); + } + } +} + +void layout_deep_copy(MessageLayout* layout, void* to, void* from) { + upb_msg_iter it; + for (upb_msg_begin(&it, layout->msgdef); + !upb_msg_done(&it); + upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + void* to_memory = ((uint8_t *)to) + + layout->offsets[upb_fielddef_index(field)]; + void* from_memory = ((uint8_t *)from) + + layout->offsets[upb_fielddef_index(field)]; + + if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + *((VALUE *)to_memory) = RepeatedField_deep_copy(*((VALUE *)from_memory)); + } else { + native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory); + } + } +} + +VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) { + upb_msg_iter it; + for (upb_msg_begin(&it, layout->msgdef); + !upb_msg_done(&it); + upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + void* msg1_memory = ((uint8_t *)msg1) + + layout->offsets[upb_fielddef_index(field)]; + void* msg2_memory = ((uint8_t *)msg2) + + layout->offsets[upb_fielddef_index(field)]; + + if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { + if (RepeatedField_eq(*((VALUE *)msg1_memory), + *((VALUE *)msg2_memory)) == Qfalse) { + return Qfalse; + } + } else { + if (!native_slot_eq(upb_fielddef_type(field), + msg1_memory, msg2_memory)) { + return Qfalse; + } + } + } + return Qtrue; +} + +VALUE layout_hash(MessageLayout* layout, void* storage) { + upb_msg_iter it; + st_index_t h = rb_hash_start(0); + VALUE hash_sym = rb_intern("hash"); + for (upb_msg_begin(&it, layout->msgdef); + !upb_msg_done(&it); + upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + VALUE field_val = layout_get(layout, storage, field); + h = rb_hash_uint(h, NUM2LONG(rb_funcall(field_val, hash_sym, 0))); + } + h = rb_hash_end(h); + + return INT2FIX(h); +} + +VALUE layout_inspect(MessageLayout* layout, void* storage) { + VALUE str = rb_str_new2(""); + + upb_msg_iter it; + bool first = true; + for (upb_msg_begin(&it, layout->msgdef); + !upb_msg_done(&it); + upb_msg_next(&it)) { + const upb_fielddef* field = upb_msg_iter_field(&it); + VALUE field_val = layout_get(layout, storage, field); + + if (!first) { + str = rb_str_cat2(str, ", "); + } else { + first = false; + } + str = rb_str_cat2(str, upb_fielddef_name(field)); + str = rb_str_cat2(str, ": "); + + str = rb_str_append(str, rb_funcall(field_val, rb_intern("inspect"), 0)); + } + + return str; +} |