aboutsummaryrefslogtreecommitdiffhomepage
path: root/ruby/ext
diff options
context:
space:
mode:
authorGravatar Joshua Haberman <jhaberman@gmail.com>2015-01-13 13:50:11 -0800
committerGravatar Joshua Haberman <jhaberman@gmail.com>2015-01-13 13:50:11 -0800
commit5446deaea7ffc29f6e09368cb6238da083969123 (patch)
tree994ba61d6441558edbff629b3da99d31552600bb /ruby/ext
parent052e0205a76717f39fc65e303fd2b92ab1df3028 (diff)
parentace4212e60bf1abd46181c9ddb9fe31b6d9fac45 (diff)
Merge pull request #155 from cfallin/ruby-maps
Support for maps in the MRI C Ruby extension.
Diffstat (limited to 'ruby/ext')
-rw-r--r--ruby/ext/google/protobuf_c/defs.c113
-rw-r--r--ruby/ext/google/protobuf_c/encode_decode.c417
-rw-r--r--ruby/ext/google/protobuf_c/extconf.rb2
-rw-r--r--ruby/ext/google/protobuf_c/map.c805
-rw-r--r--ruby/ext/google/protobuf_c/message.c15
-rw-r--r--ruby/ext/google/protobuf_c/protobuf.c1
-rw-r--r--ruby/ext/google/protobuf_c/protobuf.h86
-rw-r--r--ruby/ext/google/protobuf_c/repeated_field.c6
-rw-r--r--ruby/ext/google/protobuf_c/storage.c189
-rw-r--r--ruby/ext/google/protobuf_c/upb.c1199
-rw-r--r--ruby/ext/google/protobuf_c/upb.h83
11 files changed, 2337 insertions, 579 deletions
diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c
index bb6f10e1..a18aaac4 100644
--- a/ruby/ext/google/protobuf_c/defs.c
+++ b/ruby/ext/google/protobuf_c/defs.c
@@ -226,6 +226,7 @@ DEFINE_CLASS(Descriptor, "Google::Protobuf::Descriptor");
void Descriptor_mark(void* _self) {
Descriptor* self = _self;
rb_gc_mark(self->klass);
+ rb_gc_mark(self->typeclass_references);
}
void Descriptor_free(void* _self) {
@@ -270,6 +271,7 @@ VALUE Descriptor_alloc(VALUE klass) {
self->fill_method = NULL;
self->pb_serialize_handlers = NULL;
self->json_serialize_handlers = NULL;
+ self->typeclass_references = rb_ary_new();
return ret;
}
@@ -923,6 +925,7 @@ DEFINE_CLASS(MessageBuilderContext,
void MessageBuilderContext_mark(void* _self) {
MessageBuilderContext* self = _self;
rb_gc_mark(self->descriptor);
+ rb_gc_mark(self->builder);
}
void MessageBuilderContext_free(void* _self) {
@@ -935,6 +938,7 @@ VALUE MessageBuilderContext_alloc(VALUE klass) {
VALUE ret = TypedData_Wrap_Struct(
klass, &_MessageBuilderContext_type, self);
self->descriptor = Qnil;
+ self->builder = Qnil;
return ret;
}
@@ -943,24 +947,29 @@ void MessageBuilderContext_register(VALUE module) {
module, "MessageBuilderContext", rb_cObject);
rb_define_alloc_func(klass, MessageBuilderContext_alloc);
rb_define_method(klass, "initialize",
- MessageBuilderContext_initialize, 1);
+ MessageBuilderContext_initialize, 2);
rb_define_method(klass, "optional", MessageBuilderContext_optional, -1);
rb_define_method(klass, "required", MessageBuilderContext_required, -1);
rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1);
+ rb_define_method(klass, "map", MessageBuilderContext_map, -1);
cMessageBuilderContext = klass;
rb_gc_register_address(&cMessageBuilderContext);
}
/*
* call-seq:
- * MessageBuilderContext.new(desc) => context
+ * MessageBuilderContext.new(desc, builder) => context
*
- * Create a new builder context around the given message descriptor. This class
- * is intended to serve as a DSL context to be used with #instance_eval.
+ * Create a new message builder context around the given message descriptor and
+ * builder context. This class is intended to serve as a DSL context to be used
+ * with #instance_eval.
*/
-VALUE MessageBuilderContext_initialize(VALUE _self, VALUE msgdef) {
+VALUE MessageBuilderContext_initialize(VALUE _self,
+ VALUE msgdef,
+ VALUE builder) {
DEFINE_SELF(MessageBuilderContext, self, _self);
self->descriptor = msgdef;
+ self->builder = builder;
return Qnil;
}
@@ -1065,6 +1074,97 @@ VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self) {
name, type, number, type_class);
}
+/*
+ * call-seq:
+ * MessageBuilderContext.map(name, key_type, value_type, number,
+ * value_type_class = nil)
+ *
+ * Defines a new map field on this message type with the given key and value
+ * types, tag number, and type class (for message and enum value types). The key
+ * type must be :int32/:uint32/:int64/:uint64, :bool, or :string. The value type
+ * type must be a Ruby symbol (as accepted by FieldDescriptor#type=) and the
+ * type_class must be a string, if present (as accepted by
+ * FieldDescriptor#submsg_name=).
+ */
+VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self) {
+ DEFINE_SELF(MessageBuilderContext, self, _self);
+
+ if (argc < 4) {
+ rb_raise(rb_eArgError, "Expected at least 4 arguments.");
+ }
+ VALUE name = argv[0];
+ VALUE key_type = argv[1];
+ VALUE value_type = argv[2];
+ VALUE number = argv[3];
+ VALUE type_class = (argc > 4) ? argv[4] : Qnil;
+
+ // Validate the key type. We can't accept enums, messages, or floats/doubles
+ // as map keys. (We exclude these explicitly, and the field-descriptor setter
+ // below then ensures that the type is one of the remaining valid options.)
+ if (SYM2ID(key_type) == rb_intern("float") ||
+ SYM2ID(key_type) == rb_intern("double") ||
+ SYM2ID(key_type) == rb_intern("enum") ||
+ SYM2ID(key_type) == rb_intern("message")) {
+ rb_raise(rb_eArgError,
+ "Cannot add a map field with a float, double, enum, or message "
+ "type.");
+ }
+
+ // Create a new message descriptor for the map entry message, and create a
+ // repeated submessage field here with that type.
+ VALUE mapentry_desc = rb_class_new_instance(0, NULL, cDescriptor);
+ VALUE mapentry_desc_name = rb_funcall(self->descriptor, rb_intern("name"), 0);
+ mapentry_desc_name = rb_str_cat2(mapentry_desc_name, "_MapEntry_");
+ mapentry_desc_name = rb_str_cat2(mapentry_desc_name,
+ rb_id2name(SYM2ID(name)));
+ Descriptor_name_set(mapentry_desc, mapentry_desc_name);
+
+ // The 'mapentry' attribute has no Ruby setter because we do not want the user
+ // attempting to DIY the setup below; we want to ensure that the fields are
+ // correct. So we reach into the msgdef here to set the bit manually.
+ Descriptor* mapentry_desc_self = ruby_to_Descriptor(mapentry_desc);
+ upb_msgdef_setmapentry((upb_msgdef*)mapentry_desc_self->msgdef, true);
+
+ // optional <type> key = 1;
+ VALUE key_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
+ FieldDescriptor_name_set(key_field, rb_str_new2("key"));
+ FieldDescriptor_label_set(key_field, ID2SYM(rb_intern("optional")));
+ FieldDescriptor_number_set(key_field, INT2NUM(1));
+ FieldDescriptor_type_set(key_field, key_type);
+ Descriptor_add_field(mapentry_desc, key_field);
+
+ // optional <type> value = 2;
+ VALUE value_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
+ FieldDescriptor_name_set(value_field, rb_str_new2("value"));
+ FieldDescriptor_label_set(value_field, ID2SYM(rb_intern("optional")));
+ FieldDescriptor_number_set(value_field, INT2NUM(2));
+ FieldDescriptor_type_set(value_field, value_type);
+ if (type_class != Qnil) {
+ VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute.
+ submsg_name = rb_str_append(submsg_name, type_class);
+ FieldDescriptor_submsg_name_set(value_field, submsg_name);
+ }
+ Descriptor_add_field(mapentry_desc, value_field);
+
+ // Add the map-entry message type to the current builder, and use the type to
+ // create the map field itself.
+ Builder* builder_self = ruby_to_Builder(self->builder);
+ rb_ary_push(builder_self->pending_list, mapentry_desc);
+
+ VALUE map_field = rb_class_new_instance(0, NULL, cFieldDescriptor);
+ VALUE name_str = rb_str_new2(rb_id2name(SYM2ID(name)));
+ FieldDescriptor_name_set(map_field, name_str);
+ FieldDescriptor_number_set(map_field, number);
+ FieldDescriptor_label_set(map_field, ID2SYM(rb_intern("repeated")));
+ FieldDescriptor_type_set(map_field, ID2SYM(rb_intern("message")));
+ VALUE submsg_name = rb_str_new2("."); // prepend '.' to make name absolute.
+ submsg_name = rb_str_append(submsg_name, mapentry_desc_name);
+ FieldDescriptor_submsg_name_set(map_field, submsg_name);
+ Descriptor_add_field(self->descriptor, map_field);
+
+ return Qnil;
+}
+
// -----------------------------------------------------------------------------
// EnumBuilderContext.
// -----------------------------------------------------------------------------
@@ -1190,7 +1290,8 @@ void Builder_register(VALUE module) {
VALUE Builder_add_message(VALUE _self, VALUE name) {
DEFINE_SELF(Builder, self, _self);
VALUE msgdef = rb_class_new_instance(0, NULL, cDescriptor);
- VALUE ctx = rb_class_new_instance(1, &msgdef, cMessageBuilderContext);
+ VALUE args[2] = { msgdef, _self };
+ VALUE ctx = rb_class_new_instance(2, args, cMessageBuilderContext);
VALUE block = rb_block_proc();
rb_funcall(msgdef, rb_intern("name="), 1, name);
rb_funcall_with_block(ctx, rb_intern("instance_eval"), 0, NULL, block);
diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c
index 8aba3c9e..e5e1514b 100644
--- a/ruby/ext/google/protobuf_c/encode_decode.c
+++ b/ruby/ext/google/protobuf_c/encode_decode.c
@@ -64,7 +64,7 @@ static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs,
static void *startseq_handler(void* closure, const void* hd) {
MessageHeader* msg = closure;
const size_t *ofs = hd;
- return (void*)DEREF(Message_data(msg), *ofs, VALUE);
+ return (void*)DEREF(msg, *ofs, VALUE);
}
// Handlers that append primitive values to a repeated field (a regular Ruby
@@ -115,7 +115,7 @@ static void* str_handler(void *closure,
const size_t *ofs = hd;
VALUE str = rb_str_new2("");
rb_enc_associate(str, kRubyStringUtf8Encoding);
- DEREF(Message_data(msg), *ofs, VALUE) = str;
+ DEREF(msg, *ofs, VALUE) = str;
return (void*)str;
}
@@ -127,7 +127,7 @@ static void* bytes_handler(void *closure,
const size_t *ofs = hd;
VALUE str = rb_str_new2("");
rb_enc_associate(str, kRubyString8bitEncoding);
- DEREF(Message_data(msg), *ofs, VALUE) = str;
+ DEREF(msg, *ofs, VALUE) = str;
return (void*)str;
}
@@ -163,20 +163,237 @@ static void *submsg_handler(void *closure, const void *hd) {
get_def_obj((void*)submsgdata->md);
VALUE subklass = Descriptor_msgclass(subdesc);
- if (DEREF(Message_data(msg), submsgdata->ofs, VALUE) == Qnil) {
- DEREF(Message_data(msg), submsgdata->ofs, VALUE) =
+ if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) {
+ DEREF(msg, submsgdata->ofs, VALUE) =
rb_class_new_instance(0, NULL, subklass);
}
- VALUE submsg_rb = DEREF(Message_data(msg), submsgdata->ofs, VALUE);
+ VALUE submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
MessageHeader* submsg;
TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
return submsg;
}
+// Handler data for startmap/endmap handlers.
+typedef struct {
+ size_t ofs;
+ upb_fieldtype_t key_field_type;
+ upb_fieldtype_t value_field_type;
+ VALUE value_field_typeclass;
+} map_handlerdata_t;
+
+// Temporary frame for map parsing: at the beginning of a map entry message, a
+// submsg handler allocates a frame to hold (i) a reference to the Map object
+// into which this message will be inserted and (ii) storage slots to
+// temporarily hold the key and value for this map entry until the end of the
+// submessage. When the submessage ends, another handler is called to insert the
+// value into the map.
+typedef struct {
+ VALUE map;
+ char key_storage[NATIVE_SLOT_MAX_SIZE];
+ char value_storage[NATIVE_SLOT_MAX_SIZE];
+} map_parse_frame_t;
+
+// Handler to begin a map entry: allocates a temporary frame. This is the
+// 'startsubmsg' handler on the msgdef that contains the map field.
+static void *startmapentry_handler(void *closure, const void *hd) {
+ MessageHeader* msg = closure;
+ const map_handlerdata_t* mapdata = hd;
+ VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE);
+
+ map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
+ frame->map = map_rb;
+
+ native_slot_init(mapdata->key_field_type, &frame->key_storage);
+ native_slot_init(mapdata->value_field_type, &frame->value_storage);
+
+ return frame;
+}
+
+// Handler to end a map entry: inserts the value defined during the message into
+// the map. This is the 'endmsg' handler on the map entry msgdef.
+static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
+ map_parse_frame_t* frame = closure;
+ const map_handlerdata_t* mapdata = hd;
+
+ VALUE key = native_slot_get(
+ mapdata->key_field_type, Qnil,
+ &frame->key_storage);
+ VALUE value = native_slot_get(
+ mapdata->value_field_type, mapdata->value_field_typeclass,
+ &frame->value_storage);
+
+ Map_index_set(frame->map, key, value);
+ free(frame);
+
+ return true;
+}
+
+// Allocates a new map_handlerdata_t given the map entry message definition. If
+// the offset of the field within the parent message is also given, that is
+// added to the handler data as well. Note that this is called *twice* per map
+// field: once in the parent message handler setup when setting the startsubmsg
+// handler and once in the map entry message handler setup when setting the
+// key/value and endmsg handlers. The reason is that there is no easy way to
+// pass the handlerdata down to the sub-message handler setup.
+static map_handlerdata_t* new_map_handlerdata(
+ size_t ofs,
+ const upb_msgdef* mapentry_def,
+ Descriptor* desc) {
+
+ map_handlerdata_t* hd = ALLOC(map_handlerdata_t);
+ hd->ofs = ofs;
+ const upb_fielddef* key_field = upb_msgdef_itof(mapentry_def,
+ MAP_KEY_FIELD);
+ assert(key_field != NULL);
+ hd->key_field_type = upb_fielddef_type(key_field);
+ const upb_fielddef* value_field = upb_msgdef_itof(mapentry_def,
+ MAP_VALUE_FIELD);
+ assert(value_field != NULL);
+ hd->value_field_type = upb_fielddef_type(value_field);
+ hd->value_field_typeclass = field_type_class(value_field);
+
+ // Ensure that value_field_typeclass is properly GC-rooted. We must do this
+ // because we hold a reference to the Ruby class in the handlerdata, which is
+ // owned by the handlers. The handlers are owned by *this* message's Ruby
+ // object, but each Ruby object is rooted independently at the def -> Ruby
+ // object map. So we have to ensure that the Ruby objects we depend on will
+ // stick around as long as we're around.
+ if (hd->value_field_typeclass != Qnil) {
+ rb_ary_push(desc->typeclass_references, hd->value_field_typeclass);
+ }
+
+ return hd;
+}
+
+// Set up handlers for a repeated field.
+static void add_handlers_for_repeated_field(upb_handlers *h,
+ const upb_fielddef *f,
+ size_t offset) {
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
+ upb_handlers_setstartseq(h, f, startseq_handler, &attr);
+ upb_handlerattr_uninit(&attr);
+
+ switch (upb_fielddef_type(f)) {
+
+#define SET_HANDLER(utype, ltype) \
+ case utype: \
+ upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
+ break;
+
+ SET_HANDLER(UPB_TYPE_BOOL, bool);
+ SET_HANDLER(UPB_TYPE_INT32, int32);
+ SET_HANDLER(UPB_TYPE_UINT32, uint32);
+ SET_HANDLER(UPB_TYPE_ENUM, int32);
+ SET_HANDLER(UPB_TYPE_FLOAT, float);
+ SET_HANDLER(UPB_TYPE_INT64, int64);
+ SET_HANDLER(UPB_TYPE_UINT64, uint64);
+ SET_HANDLER(UPB_TYPE_DOUBLE, double);
+
+#undef SET_HANDLER
+
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES: {
+ bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
+ upb_handlers_setstartstr(h, f, is_bytes ?
+ appendbytes_handler : appendstr_handler,
+ NULL);
+ upb_handlers_setstring(h, f, stringdata_handler, NULL);
+ break;
+ }
+ case UPB_TYPE_MESSAGE: {
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
+ upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
+ upb_handlerattr_uninit(&attr);
+ break;
+ }
+ }
+}
+
+// Set up handlers for a singular field.
+static void add_handlers_for_singular_field(upb_handlers *h,
+ const upb_fielddef *f,
+ size_t offset) {
+ switch (upb_fielddef_type(f)) {
+ case UPB_TYPE_BOOL:
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_ENUM:
+ case UPB_TYPE_FLOAT:
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT64:
+ case UPB_TYPE_DOUBLE:
+ upb_shim_set(h, f, offset, -1);
+ break;
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES: {
+ bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
+ upb_handlers_setstartstr(h, f,
+ is_bytes ? bytes_handler : str_handler,
+ &attr);
+ upb_handlers_setstring(h, f, stringdata_handler, &attr);
+ upb_handlerattr_uninit(&attr);
+ break;
+ }
+ case UPB_TYPE_MESSAGE: {
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
+ upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
+ upb_handlerattr_uninit(&attr);
+ break;
+ }
+ }
+}
+
+// Adds handlers to a map field.
+static void add_handlers_for_mapfield(upb_handlers* h,
+ const upb_fielddef* fielddef,
+ size_t offset,
+ Descriptor* desc) {
+ const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef);
+ map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc);
+ upb_handlers_addcleanup(h, hd, free);
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, hd);
+ upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr);
+ upb_handlerattr_uninit(&attr);
+}
+
+// Adds handlers to a map-entry msgdef.
+static void add_handlers_for_mapentry(const upb_msgdef* msgdef,
+ upb_handlers* h,
+ Descriptor* desc) {
+ const upb_fielddef* key_field = map_entry_key(msgdef);
+ const upb_fielddef* value_field = map_entry_value(msgdef);
+ map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc);
+ upb_handlers_addcleanup(h, hd, free);
+ upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+ upb_handlerattr_sethandlerdata(&attr, hd);
+ upb_handlers_setendmsg(h, endmap_handler, &attr);
+
+ add_handlers_for_singular_field(
+ h, key_field,
+ offsetof(map_parse_frame_t, key_storage));
+ add_handlers_for_singular_field(
+ h, value_field,
+ offsetof(map_parse_frame_t, value_storage));
+}
+
static void add_handlers_for_message(const void *closure, upb_handlers *h) {
- Descriptor* desc = ruby_to_Descriptor(
- get_def_obj((void*)upb_handlers_msgdef(h)));
+ const upb_msgdef* msgdef = upb_handlers_msgdef(h);
+ Descriptor* desc = ruby_to_Descriptor(get_def_obj((void*)msgdef));
+
+ // If this is a mapentry message type, set up a special set of handlers and
+ // bail out of the normal (user-defined) message type handling.
+ if (upb_msgdef_mapentry(msgdef)) {
+ add_handlers_for_mapentry(msgdef, h, desc);
+ return;
+ }
+
// Ensure layout exists. We may be invoked to create handlers for a given
// message if we are included as a submsg of another message type before our
// class is actually built, so to work around this, we just create the layout
@@ -191,82 +408,15 @@ static void add_handlers_for_message(const void *closure, upb_handlers *h) {
!upb_msg_done(&i);
upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
- size_t offset = desc->layout->offsets[upb_fielddef_index(f)];
-
- if (upb_fielddef_isseq(f)) {
- upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
- upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
- upb_handlers_setstartseq(h, f, startseq_handler, &attr);
- upb_handlerattr_uninit(&attr);
-
- switch (upb_fielddef_type(f)) {
+ size_t offset = desc->layout->offsets[upb_fielddef_index(f)] +
+ sizeof(MessageHeader);
-#define SET_HANDLER(utype, ltype) \
- case utype: \
- upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
- break;
-
- SET_HANDLER(UPB_TYPE_BOOL, bool);
- SET_HANDLER(UPB_TYPE_INT32, int32);
- SET_HANDLER(UPB_TYPE_UINT32, uint32);
- SET_HANDLER(UPB_TYPE_ENUM, int32);
- SET_HANDLER(UPB_TYPE_FLOAT, float);
- SET_HANDLER(UPB_TYPE_INT64, int64);
- SET_HANDLER(UPB_TYPE_UINT64, uint64);
- SET_HANDLER(UPB_TYPE_DOUBLE, double);
-
-#undef SET_HANDLER
-
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES: {
- bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
- upb_handlers_setstartstr(h, f, is_bytes ?
- appendbytes_handler : appendstr_handler,
- NULL);
- upb_handlers_setstring(h, f, stringdata_handler, NULL);
- }
- case UPB_TYPE_MESSAGE: {
- upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
- upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
- upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
- upb_handlerattr_uninit(&attr);
- break;
- }
- }
- }
-
- switch (upb_fielddef_type(f)) {
- case UPB_TYPE_BOOL:
- case UPB_TYPE_INT32:
- case UPB_TYPE_UINT32:
- case UPB_TYPE_ENUM:
- case UPB_TYPE_FLOAT:
- case UPB_TYPE_INT64:
- case UPB_TYPE_UINT64:
- case UPB_TYPE_DOUBLE:
- // The shim writes directly at the given offset (instead of using
- // DEREF()) so we need to add the msg overhead.
- upb_shim_set(h, f, offset + sizeof(MessageHeader), -1);
- break;
- case UPB_TYPE_STRING:
- case UPB_TYPE_BYTES: {
- bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
- upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
- upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
- upb_handlers_setstartstr(h, f,
- is_bytes ? bytes_handler : str_handler,
- &attr);
- upb_handlers_setstring(h, f, stringdata_handler, &attr);
- upb_handlerattr_uninit(&attr);
- break;
- }
- case UPB_TYPE_MESSAGE: {
- upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
- upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
- upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
- upb_handlerattr_uninit(&attr);
- break;
- }
+ if (is_map_field(f)) {
+ add_handlers_for_mapfield(h, f, offset, desc);
+ } else if (upb_fielddef_isseq(f)) {
+ add_handlers_for_repeated_field(h, f, offset);
+ } else {
+ add_handlers_for_singular_field(h, f, offset);
}
}
}
@@ -558,6 +708,88 @@ static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
}
+static void put_ruby_value(VALUE value,
+ const upb_fielddef *f,
+ VALUE type_class,
+ int depth,
+ upb_sink *sink) {
+ upb_selector_t sel = 0;
+ if (upb_fielddef_isprimitive(f)) {
+ sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
+ }
+
+ switch (upb_fielddef_type(f)) {
+ case UPB_TYPE_INT32:
+ upb_sink_putint32(sink, sel, NUM2INT(value));
+ break;
+ case UPB_TYPE_INT64:
+ upb_sink_putint64(sink, sel, NUM2LL(value));
+ break;
+ case UPB_TYPE_UINT32:
+ upb_sink_putuint32(sink, sel, NUM2UINT(value));
+ break;
+ case UPB_TYPE_UINT64:
+ upb_sink_putuint64(sink, sel, NUM2ULL(value));
+ break;
+ case UPB_TYPE_FLOAT:
+ upb_sink_putfloat(sink, sel, NUM2DBL(value));
+ break;
+ case UPB_TYPE_DOUBLE:
+ upb_sink_putdouble(sink, sel, NUM2DBL(value));
+ break;
+ case UPB_TYPE_ENUM: {
+ if (TYPE(value) == T_SYMBOL) {
+ value = rb_funcall(type_class, rb_intern("resolve"), 1, value);
+ }
+ upb_sink_putint32(sink, sel, NUM2INT(value));
+ break;
+ }
+ case UPB_TYPE_BOOL:
+ upb_sink_putbool(sink, sel, value == Qtrue);
+ break;
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES:
+ putstr(value, f, sink);
+ break;
+ case UPB_TYPE_MESSAGE:
+ putsubmsg(value, f, sink, depth);
+ }
+}
+
+static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
+ int depth) {
+ if (map == Qnil) return;
+ Map* self = ruby_to_Map(map);
+
+ upb_sink subsink;
+
+ upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
+
+ assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
+ const upb_fielddef* key_field = map_field_key(f);
+ const upb_fielddef* value_field = map_field_value(f);
+
+ Map_iter it;
+ for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) {
+ VALUE key = Map_iter_key(&it);
+ VALUE value = Map_iter_value(&it);
+
+ upb_sink entry_sink;
+ upb_sink_startsubmsg(&subsink, getsel(f, UPB_HANDLER_STARTSUBMSG), &entry_sink);
+ upb_sink_startmsg(&entry_sink);
+
+ put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink);
+ put_ruby_value(value, value_field, self->value_type_class, depth + 1,
+ &entry_sink);
+
+ upb_status status;
+ upb_sink_endmsg(&entry_sink, &status);
+ upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
+ }
+
+ upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
+}
+
static void putmsg(VALUE msg_rb, const Descriptor* desc,
upb_sink *sink, int depth) {
upb_sink_startmsg(sink);
@@ -571,33 +803,38 @@ static void putmsg(VALUE msg_rb, const Descriptor* desc,
MessageHeader* msg;
TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
- void* msg_data = Message_data(msg);
upb_msg_iter i;
for (upb_msg_begin(&i, desc->msgdef);
!upb_msg_done(&i);
upb_msg_next(&i)) {
upb_fielddef *f = upb_msg_iter_field(&i);
- uint32_t offset = desc->layout->offsets[upb_fielddef_index(f)];
+ uint32_t offset =
+ desc->layout->offsets[upb_fielddef_index(f)] + sizeof(MessageHeader);
- if (upb_fielddef_isseq(f)) {
- VALUE ary = DEREF(msg_data, offset, VALUE);
+ if (is_map_field(f)) {
+ VALUE map = DEREF(msg, offset, VALUE);
+ if (map != Qnil) {
+ putmap(map, f, sink, depth);
+ }
+ } else if (upb_fielddef_isseq(f)) {
+ VALUE ary = DEREF(msg, offset, VALUE);
if (ary != Qnil) {
putary(ary, f, sink, depth);
}
} else if (upb_fielddef_isstring(f)) {
- VALUE str = DEREF(msg_data, offset, VALUE);
+ VALUE str = DEREF(msg, offset, VALUE);
if (RSTRING_LEN(str) > 0) {
putstr(str, f, sink);
}
} else if (upb_fielddef_issubmsg(f)) {
- putsubmsg(DEREF(msg_data, offset, VALUE), f, sink, depth);
+ putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth);
} else {
upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
#define T(upbtypeconst, upbtype, ctype, default_value) \
case upbtypeconst: { \
- ctype value = DEREF(msg_data, offset, ctype); \
+ ctype value = DEREF(msg, offset, ctype); \
if (value != default_value) { \
upb_sink_put##upbtype(sink, sel, value); \
} \
diff --git a/ruby/ext/google/protobuf_c/extconf.rb b/ruby/ext/google/protobuf_c/extconf.rb
index 7cf7bf6a..8d60392c 100644
--- a/ruby/ext/google/protobuf_c/extconf.rb
+++ b/ruby/ext/google/protobuf_c/extconf.rb
@@ -5,6 +5,6 @@ require 'mkmf'
$CFLAGS += " -O3 -std=c99 -Wno-unused-function -DNDEBUG "
$objs = ["protobuf.o", "defs.o", "storage.o", "message.o",
- "repeated_field.o", "encode_decode.o", "upb.o"]
+ "repeated_field.o", "map.o", "encode_decode.o", "upb.o"]
create_makefile("google/protobuf_c")
diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c
new file mode 100644
index 00000000..4ee71d18
--- /dev/null
+++ b/ruby/ext/google/protobuf_c/map.c
@@ -0,0 +1,805 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2014 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "protobuf.h"
+
+// -----------------------------------------------------------------------------
+// Basic map operations on top of upb's strtable.
+//
+// Note that we roll our own `Map` container here because, as for
+// `RepeatedField`, we want a strongly-typed container. This is so that any user
+// errors due to incorrect map key or value types are raised as close as
+// possible to the error site, rather than at some deferred point (e.g.,
+// serialization).
+//
+// We build our `Map` on top of upb_strtable so that we're able to take
+// advantage of the native_slot storage abstraction, as RepeatedField does.
+// (This is not quite a perfect mapping -- see the key conversions below -- but
+// gives us full support and error-checking for all value types for free.)
+// -----------------------------------------------------------------------------
+
+// Map values are stored using the native_slot abstraction (as with repeated
+// field values), but keys are a bit special. Since we use a strtable, we need
+// to store keys as sequences of bytes such that equality of those bytes maps
+// one-to-one to equality of keys. We store strings directly (i.e., they map to
+// their own bytes) and integers as native integers (using the native_slot
+// abstraction).
+
+// Note that there is another tradeoff here in keeping string keys as native
+// strings rather than Ruby strings: traversing the Map requires conversion to
+// Ruby string values on every traversal, potentially creating more garbage. We
+// should consider ways to cache a Ruby version of the key if this becomes an
+// issue later.
+
+// Forms a key to use with the underlying strtable from a Ruby key value. |buf|
+// must point to TABLE_KEY_BUF_LENGTH bytes of temporary space, used to
+// construct a key byte sequence if needed. |out_key| and |out_length| provide
+// the resulting key data/length.
+#define TABLE_KEY_BUF_LENGTH 8 // sizeof(uint64_t)
+static void table_key(Map* self, VALUE key,
+ char* buf,
+ const char** out_key,
+ size_t* out_length) {
+ switch (self->key_type) {
+ case UPB_TYPE_BYTES:
+ case UPB_TYPE_STRING:
+ // Strings: use string content directly.
+ Check_Type(key, T_STRING);
+ native_slot_validate_string_encoding(self->key_type, key);
+ *out_key = RSTRING_PTR(key);
+ *out_length = RSTRING_LEN(key);
+ break;
+
+ case UPB_TYPE_BOOL:
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_UINT64:
+ native_slot_set(self->key_type, Qnil, buf, key);
+ *out_key = buf;
+ *out_length = native_slot_size(self->key_type);
+ break;
+
+ default:
+ // Map constructor should not allow a Map with another key type to be
+ // constructed.
+ assert(false);
+ break;
+ }
+}
+
+static VALUE table_key_to_ruby(Map* self, const char* buf, size_t length) {
+ switch (self->key_type) {
+ case UPB_TYPE_BYTES:
+ case UPB_TYPE_STRING: {
+ VALUE ret = rb_str_new(buf, length);
+ rb_enc_associate(ret,
+ (self->key_type == UPB_TYPE_BYTES) ?
+ kRubyString8bitEncoding : kRubyStringUtf8Encoding);
+ return ret;
+ }
+
+ case UPB_TYPE_BOOL:
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_UINT64:
+ return native_slot_get(self->key_type, Qnil, buf);
+
+ default:
+ assert(false);
+ return Qnil;
+ }
+}
+
+static void* value_memory(upb_value* v) {
+ return (void*)(&v->val.uint64);
+}
+
+// -----------------------------------------------------------------------------
+// Map container type.
+// -----------------------------------------------------------------------------
+
+const rb_data_type_t Map_type = {
+ "Google::Protobuf::Map",
+ { Map_mark, Map_free, NULL },
+};
+
+VALUE cMap;
+
+Map* ruby_to_Map(VALUE _self) {
+ Map* self;
+ TypedData_Get_Struct(_self, Map, &Map_type, self);
+ return self;
+}
+
+void Map_mark(void* _self) {
+ Map* self = _self;
+
+ rb_gc_mark(self->value_type_class);
+
+ if (self->value_type == UPB_TYPE_STRING ||
+ self->value_type == UPB_TYPE_BYTES ||
+ self->value_type == UPB_TYPE_MESSAGE) {
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &self->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+ upb_value v = upb_strtable_iter_value(&it);
+ void* mem = value_memory(&v);
+ native_slot_mark(self->value_type, mem);
+ }
+ }
+}
+
+void Map_free(void* _self) {
+ Map* self = _self;
+ upb_strtable_uninit(&self->table);
+ xfree(self);
+}
+
+VALUE Map_alloc(VALUE klass) {
+ Map* self = ALLOC(Map);
+ memset(self, 0, sizeof(Map));
+ self->value_type_class = Qnil;
+ VALUE ret = TypedData_Wrap_Struct(klass, &Map_type, self);
+ return ret;
+}
+
+static bool needs_typeclass(upb_fieldtype_t type) {
+ switch (type) {
+ case UPB_TYPE_MESSAGE:
+ case UPB_TYPE_ENUM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * call-seq:
+ * Map.new(key_type, value_type, value_typeclass = nil, init_hashmap = {})
+ * => new map
+ *
+ * Allocates a new Map container. This constructor may be called with 2, 3, or 4
+ * arguments. The first two arguments are always present and are symbols (taking
+ * on the same values as field-type symbols in message descriptors) that
+ * indicate the type of the map key and value fields.
+ *
+ * The supported key types are: :int32, :int64, :uint32, :uint64, :bool,
+ * :string, :bytes.
+ *
+ * The supported value types are: :int32, :int64, :uint32, :uint64, :bool,
+ * :string, :bytes, :enum, :message.
+ *
+ * The third argument, value_typeclass, must be present if value_type is :enum
+ * or :message. As in RepeatedField#new, this argument must be a message class
+ * (for :message) or enum module (for :enum).
+ *
+ * The last argument, if present, provides initial content for map. Note that
+ * this may be an ordinary Ruby hashmap or another Map instance with identical
+ * key and value types. Also note that this argument may be present whether or
+ * not value_typeclass is present (and it is unambiguously separate from
+ * value_typeclass because value_typeclass's presence is strictly determined by
+ * value_type). The contents of this initial hashmap or Map instance are
+ * shallow-copied into the new Map: the original map is unmodified, but
+ * references to underlying objects will be shared if the value type is a
+ * message type.
+ */
+VALUE Map_init(int argc, VALUE* argv, VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+
+ // We take either two args (:key_type, :value_type), three args (:key_type,
+ // :value_type, "ValueMessageType"), or four args (the above plus an initial
+ // hashmap).
+ if (argc < 2 || argc > 4) {
+ rb_raise(rb_eArgError, "Map constructor expects 2, 3 or 4 arguments.");
+ }
+
+ self->key_type = ruby_to_fieldtype(argv[0]);
+ self->value_type = ruby_to_fieldtype(argv[1]);
+
+ // Check that the key type is an allowed type.
+ switch (self->key_type) {
+ case UPB_TYPE_INT32:
+ case UPB_TYPE_INT64:
+ case UPB_TYPE_UINT32:
+ case UPB_TYPE_UINT64:
+ case UPB_TYPE_BOOL:
+ case UPB_TYPE_STRING:
+ case UPB_TYPE_BYTES:
+ // These are OK.
+ break;
+ default:
+ rb_raise(rb_eArgError, "Invalid key type for map.");
+ }
+
+ int init_value_arg = 2;
+ if (needs_typeclass(self->value_type) && argc > 2) {
+ self->value_type_class = argv[2];
+ validate_type_class(self->value_type, self->value_type_class);
+ init_value_arg = 3;
+ }
+
+ // Table value type is always UINT64: this ensures enough space to store the
+ // native_slot value.
+ if (!upb_strtable_init(&self->table, UPB_CTYPE_UINT64)) {
+ rb_raise(rb_eRuntimeError, "Could not allocate table.");
+ }
+
+ if (argc > init_value_arg) {
+ Map_merge_into_self(_self, argv[init_value_arg]);
+ }
+
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * Map.each(&block)
+ *
+ * Invokes &block on each |key, value| pair in the map, in unspecified order.
+ * Note that Map also includes Enumerable; map thus acts like a normal Ruby
+ * sequence.
+ */
+VALUE Map_each(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &self->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+
+ VALUE key = table_key_to_ruby(
+ self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
+
+ upb_value v = upb_strtable_iter_value(&it);
+ void* mem = value_memory(&v);
+ VALUE value = native_slot_get(self->value_type,
+ self->value_type_class,
+ mem);
+
+ rb_yield_values(2, key, value);
+ }
+
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * Map.keys => [list_of_keys]
+ *
+ * Returns the list of keys contained in the map, in unspecified order.
+ */
+VALUE Map_keys(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+
+ VALUE ret = rb_ary_new();
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &self->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+
+ VALUE key = table_key_to_ruby(
+ self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
+
+ rb_ary_push(ret, key);
+ }
+
+ return ret;
+}
+
+/*
+ * call-seq:
+ * Map.values => [list_of_values]
+ *
+ * Returns the list of values contained in the map, in unspecified order.
+ */
+VALUE Map_values(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+
+ VALUE ret = rb_ary_new();
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &self->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+
+ upb_value v = upb_strtable_iter_value(&it);
+ void* mem = value_memory(&v);
+ VALUE value = native_slot_get(self->value_type,
+ self->value_type_class,
+ mem);
+
+ rb_ary_push(ret, value);
+ }
+
+ return ret;
+}
+
+/*
+ * call-seq:
+ * Map.[](key) => value
+ *
+ * Accesses the element at the given key. Throws an exception if the key type is
+ * incorrect. Returns nil when the key is not present in the map.
+ */
+VALUE Map_index(VALUE _self, VALUE key) {
+ Map* self = ruby_to_Map(_self);
+
+ char keybuf[TABLE_KEY_BUF_LENGTH];
+ const char* keyval = NULL;
+ size_t length = 0;
+ table_key(self, key, keybuf, &keyval, &length);
+
+ upb_value v;
+ if (upb_strtable_lookup2(&self->table, keyval, length, &v)) {
+ void* mem = value_memory(&v);
+ return native_slot_get(self->value_type, self->value_type_class, mem);
+ } else {
+ return Qnil;
+ }
+}
+
+/*
+ * call-seq:
+ * Map.[]=(key, value) => value
+ *
+ * Inserts or overwrites the value at the given key with the given new value.
+ * Throws an exception if the key type is incorrect. Returns the new value that
+ * was just inserted.
+ */
+VALUE Map_index_set(VALUE _self, VALUE key, VALUE value) {
+ Map* self = ruby_to_Map(_self);
+
+ char keybuf[TABLE_KEY_BUF_LENGTH];
+ const char* keyval = NULL;
+ size_t length = 0;
+ table_key(self, key, keybuf, &keyval, &length);
+
+ upb_value v;
+ void* mem = value_memory(&v);
+ native_slot_set(self->value_type, self->value_type_class, mem, value);
+
+ // Replace any existing value by issuing a 'remove' operation first.
+ upb_strtable_remove2(&self->table, keyval, length, NULL);
+ if (!upb_strtable_insert2(&self->table, keyval, length, v)) {
+ rb_raise(rb_eRuntimeError, "Could not insert into table");
+ }
+
+ // Ruby hashmap's :[]= method also returns the inserted value.
+ return value;
+}
+
+/*
+ * call-seq:
+ * Map.has_key?(key) => bool
+ *
+ * Returns true if the given key is present in the map. Throws an exception if
+ * the key has the wrong type.
+ */
+VALUE Map_has_key(VALUE _self, VALUE key) {
+ Map* self = ruby_to_Map(_self);
+
+ char keybuf[TABLE_KEY_BUF_LENGTH];
+ const char* keyval = NULL;
+ size_t length = 0;
+ table_key(self, key, keybuf, &keyval, &length);
+
+ if (upb_strtable_lookup2(&self->table, keyval, length, NULL)) {
+ return Qtrue;
+ } else {
+ return Qfalse;
+ }
+}
+
+/*
+ * call-seq:
+ * Map.delete(key) => old_value
+ *
+ * Deletes the value at the given key, if any, returning either the old value or
+ * nil if none was present. Throws an exception if the key is of the wrong type.
+ */
+VALUE Map_delete(VALUE _self, VALUE key) {
+ Map* self = ruby_to_Map(_self);
+
+ char keybuf[TABLE_KEY_BUF_LENGTH];
+ const char* keyval = NULL;
+ size_t length = 0;
+ table_key(self, key, keybuf, &keyval, &length);
+
+ upb_value v;
+ if (upb_strtable_remove2(&self->table, keyval, length, &v)) {
+ void* mem = value_memory(&v);
+ return native_slot_get(self->value_type, self->value_type_class, mem);
+ } else {
+ return Qnil;
+ }
+}
+
+/*
+ * call-seq:
+ * Map.clear
+ *
+ * Removes all entries from the map.
+ */
+VALUE Map_clear(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+
+ // Uninit and reinit the table -- this is faster than iterating and doing a
+ // delete-lookup on each key.
+ upb_strtable_uninit(&self->table);
+ if (!upb_strtable_init(&self->table, UPB_CTYPE_INT64)) {
+ rb_raise(rb_eRuntimeError, "Unable to re-initialize table");
+ }
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * Map.length
+ *
+ * Returns the number of entries (key-value pairs) in the map.
+ */
+VALUE Map_length(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+ return ULL2NUM(upb_strtable_count(&self->table));
+}
+
+static VALUE Map_new_this_type(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+ VALUE new_map = Qnil;
+ VALUE key_type = fieldtype_to_ruby(self->key_type);
+ VALUE value_type = fieldtype_to_ruby(self->value_type);
+ if (self->value_type_class != Qnil) {
+ new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 3,
+ key_type, value_type, self->value_type_class);
+ } else {
+ new_map = rb_funcall(CLASS_OF(_self), rb_intern("new"), 2,
+ key_type, value_type);
+ }
+ return new_map;
+}
+
+/*
+ * call-seq:
+ * Map.dup => new_map
+ *
+ * Duplicates this map with a shallow copy. References to all non-primitive
+ * element objects (e.g., submessages) are shared.
+ */
+VALUE Map_dup(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+ VALUE new_map = Map_new_this_type(_self);
+ Map* new_self = ruby_to_Map(new_map);
+
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &self->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+
+ upb_value v = upb_strtable_iter_value(&it);
+ void* mem = value_memory(&v);
+ upb_value dup;
+ void* dup_mem = value_memory(&dup);
+ native_slot_dup(self->value_type, dup_mem, mem);
+
+ if (!upb_strtable_insert2(&new_self->table,
+ upb_strtable_iter_key(&it),
+ upb_strtable_iter_keylength(&it),
+ dup)) {
+ rb_raise(rb_eRuntimeError, "Error inserting value into new table");
+ }
+ }
+
+ return new_map;
+}
+
+// Used by Google::Protobuf.deep_copy but not exposed directly.
+VALUE Map_deep_copy(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+ VALUE new_map = Map_new_this_type(_self);
+ Map* new_self = ruby_to_Map(new_map);
+
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &self->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+
+ upb_value v = upb_strtable_iter_value(&it);
+ void* mem = value_memory(&v);
+ upb_value dup;
+ void* dup_mem = value_memory(&dup);
+ native_slot_deep_copy(self->value_type, dup_mem, mem);
+
+ if (!upb_strtable_insert2(&new_self->table,
+ upb_strtable_iter_key(&it),
+ upb_strtable_iter_keylength(&it),
+ dup)) {
+ rb_raise(rb_eRuntimeError, "Error inserting value into new table");
+ }
+ }
+
+ return new_map;
+}
+
+/*
+ * call-seq:
+ * Map.==(other) => boolean
+ *
+ * Compares this map to another. Maps are equal if they have identical key sets,
+ * and for each key, the values in both maps compare equal. Elements are
+ * compared as per normal Ruby semantics, by calling their :== methods (or
+ * performing a more efficient comparison for primitive types).
+ *
+ * Maps with dissimilar key types or value types/typeclasses are never equal,
+ * even if value comparison (for example, between integers and floats) would
+ * have otherwise indicated that every element has equal value.
+ */
+VALUE Map_eq(VALUE _self, VALUE _other) {
+ Map* self = ruby_to_Map(_self);
+
+ // Allow comparisons to Ruby hashmaps by converting to a temporary Map
+ // instance. Slow, but workable.
+ if (TYPE(_other) == T_HASH) {
+ VALUE other_map = Map_new_this_type(_self);
+ Map_merge_into_self(other_map, _other);
+ _other = other_map;
+ }
+
+ Map* other = ruby_to_Map(_other);
+
+ if (self == other) {
+ return Qtrue;
+ }
+ if (self->key_type != other->key_type ||
+ self->value_type != other->value_type ||
+ self->value_type_class != other->value_type_class) {
+ return Qfalse;
+ }
+ if (upb_strtable_count(&self->table) != upb_strtable_count(&other->table)) {
+ return Qfalse;
+ }
+
+ // For each member of self, check that an equal member exists at the same key
+ // in other.
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &self->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+
+ upb_value v = upb_strtable_iter_value(&it);
+ void* mem = value_memory(&v);
+ upb_value other_v;
+ void* other_mem = value_memory(&other_v);
+
+ if (!upb_strtable_lookup2(&other->table,
+ upb_strtable_iter_key(&it),
+ upb_strtable_iter_keylength(&it),
+ &other_v)) {
+ // Not present in other map.
+ return Qfalse;
+ }
+
+ if (!native_slot_eq(self->value_type, mem, other_mem)) {
+ // Present, but value not equal.
+ return Qfalse;
+ }
+ }
+
+ return Qtrue;
+}
+
+/*
+ * call-seq:
+ * Map.hash => hash_value
+ *
+ * Returns a hash value based on this map's contents.
+ */
+VALUE Map_hash(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+
+ st_index_t h = rb_hash_start(0);
+ VALUE hash_sym = rb_intern("hash");
+
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &self->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+ VALUE key = table_key_to_ruby(
+ self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
+
+ upb_value v = upb_strtable_iter_value(&it);
+ void* mem = value_memory(&v);
+ VALUE value = native_slot_get(self->value_type,
+ self->value_type_class,
+ mem);
+
+ h = rb_hash_uint(h, NUM2LONG(rb_funcall(key, hash_sym, 0)));
+ h = rb_hash_uint(h, NUM2LONG(rb_funcall(value, hash_sym, 0)));
+ }
+
+ return INT2FIX(h);
+}
+
+/*
+ * call-seq:
+ * Map.inspect => string
+ *
+ * Returns a string representing this map's elements. It will be formatted as
+ * "{key => value, key => value, ...}", with each key and value string
+ * representation computed by its own #inspect method.
+ */
+VALUE Map_inspect(VALUE _self) {
+ Map* self = ruby_to_Map(_self);
+
+ VALUE str = rb_str_new2("{");
+
+ bool first = true;
+ VALUE inspect_sym = rb_intern("inspect");
+
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &self->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+ VALUE key = table_key_to_ruby(
+ self, upb_strtable_iter_key(&it), upb_strtable_iter_keylength(&it));
+
+ upb_value v = upb_strtable_iter_value(&it);
+ void* mem = value_memory(&v);
+ VALUE value = native_slot_get(self->value_type,
+ self->value_type_class,
+ mem);
+
+ if (!first) {
+ str = rb_str_cat2(str, ", ");
+ } else {
+ first = false;
+ }
+ str = rb_str_append(str, rb_funcall(key, inspect_sym, 0));
+ str = rb_str_cat2(str, " => ");
+ str = rb_str_append(str, rb_funcall(value, inspect_sym, 0));
+ }
+
+ str = rb_str_cat2(str, "}");
+ return str;
+}
+
+/*
+ * call-seq:
+ * Map.merge(other_map) => map
+ *
+ * Copies key/value pairs from other_map into a copy of this map. If a key is
+ * set in other_map and this map, the value from other_map overwrites the value
+ * in the new copy of this map. Returns the new copy of this map with merged
+ * contents.
+ */
+VALUE Map_merge(VALUE _self, VALUE hashmap) {
+ VALUE dupped = Map_dup(_self);
+ return Map_merge_into_self(dupped, hashmap);
+}
+
+static int merge_into_self_callback(VALUE key, VALUE value, VALUE self) {
+ Map_index_set(self, key, value);
+ return ST_CONTINUE;
+}
+
+// Used only internally -- shared by #merge and #initialize.
+VALUE Map_merge_into_self(VALUE _self, VALUE hashmap) {
+ if (TYPE(hashmap) == T_HASH) {
+ rb_hash_foreach(hashmap, merge_into_self_callback, _self);
+ } else if (RB_TYPE_P(hashmap, T_DATA) && RTYPEDDATA_P(hashmap) &&
+ RTYPEDDATA_TYPE(hashmap) == &Map_type) {
+
+ Map* self = ruby_to_Map(_self);
+ Map* other = ruby_to_Map(hashmap);
+
+ if (self->key_type != other->key_type ||
+ self->value_type != other->value_type ||
+ self->value_type_class != other->value_type_class) {
+ rb_raise(rb_eArgError, "Attempt to merge Map with mismatching types");
+ }
+
+ upb_strtable_iter it;
+ for (upb_strtable_begin(&it, &other->table);
+ !upb_strtable_done(&it);
+ upb_strtable_next(&it)) {
+
+ // Replace any existing value by issuing a 'remove' operation first.
+ upb_value oldv;
+ upb_strtable_remove2(&self->table,
+ upb_strtable_iter_key(&it),
+ upb_strtable_iter_keylength(&it),
+ &oldv);
+
+ upb_value v = upb_strtable_iter_value(&it);
+ upb_strtable_insert2(&self->table,
+ upb_strtable_iter_key(&it),
+ upb_strtable_iter_keylength(&it),
+ v);
+ }
+ } else {
+ rb_raise(rb_eArgError, "Unknown type merging into Map");
+ }
+ return _self;
+}
+
+// Internal method: map iterator initialization (used for serialization).
+void Map_begin(VALUE _self, Map_iter* iter) {
+ Map* self = ruby_to_Map(_self);
+ iter->self = self;
+ upb_strtable_begin(&iter->it, &self->table);
+}
+
+void Map_next(Map_iter* iter) {
+ upb_strtable_next(&iter->it);
+}
+
+bool Map_done(Map_iter* iter) {
+ return upb_strtable_done(&iter->it);
+}
+
+VALUE Map_iter_key(Map_iter* iter) {
+ return table_key_to_ruby(
+ iter->self,
+ upb_strtable_iter_key(&iter->it),
+ upb_strtable_iter_keylength(&iter->it));
+}
+
+VALUE Map_iter_value(Map_iter* iter) {
+ upb_value v = upb_strtable_iter_value(&iter->it);
+ void* mem = value_memory(&v);
+ return native_slot_get(iter->self->value_type,
+ iter->self->value_type_class,
+ mem);
+}
+
+void Map_register(VALUE module) {
+ VALUE klass = rb_define_class_under(module, "Map", rb_cObject);
+ rb_define_alloc_func(klass, Map_alloc);
+ cMap = klass;
+ rb_gc_register_address(&cMap);
+
+ rb_define_method(klass, "initialize", Map_init, -1);
+ rb_define_method(klass, "each", Map_each, 0);
+ rb_define_method(klass, "keys", Map_keys, 0);
+ rb_define_method(klass, "values", Map_values, 0);
+ rb_define_method(klass, "[]", Map_index, 1);
+ rb_define_method(klass, "[]=", Map_index_set, 2);
+ rb_define_method(klass, "has_key?", Map_has_key, 1);
+ rb_define_method(klass, "delete", Map_delete, 1);
+ rb_define_method(klass, "clear", Map_clear, 0);
+ rb_define_method(klass, "length", Map_length, 0);
+ rb_define_method(klass, "dup", Map_dup, 0);
+ rb_define_method(klass, "==", Map_eq, 1);
+ rb_define_method(klass, "hash", Map_hash, 0);
+ rb_define_method(klass, "inspect", Map_inspect, 0);
+ rb_define_method(klass, "merge", Map_merge, 1);
+ rb_include_module(klass, rb_mEnumerable);
+}
diff --git a/ruby/ext/google/protobuf_c/message.c b/ruby/ext/google/protobuf_c/message.c
index 105b7807..ee8881d4 100644
--- a/ruby/ext/google/protobuf_c/message.c
+++ b/ruby/ext/google/protobuf_c/message.c
@@ -139,7 +139,14 @@ int Message_initialize_kwarg(VALUE key, VALUE val, VALUE _self) {
"Unknown field name in initialization map entry.");
}
- if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) {
+ if (is_map_field(f)) {
+ if (TYPE(val) != T_HASH) {
+ rb_raise(rb_eArgError,
+ "Expected Hash object as initializer value for map field.");
+ }
+ VALUE map = layout_get(self->descriptor->layout, Message_data(self), f);
+ Map_merge_into_self(map, val);
+ } else if (upb_fielddef_label(f) == UPB_LABEL_REPEATED) {
if (TYPE(val) != T_ARRAY) {
rb_raise(rb_eArgError,
"Expected array as initializer value for repeated field.");
@@ -450,13 +457,15 @@ VALUE build_module_from_enumdesc(EnumDescriptor* enumdesc) {
* call-seq:
* Google::Protobuf.deep_copy(obj) => copy_of_obj
*
- * Performs a deep copy of either a RepeatedField instance or a message object,
- * recursively copying its members.
+ * Performs a deep copy of a RepeatedField instance, a Map instance, or a
+ * message object, recursively copying its members.
*/
VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj) {
VALUE klass = CLASS_OF(obj);
if (klass == cRepeatedField) {
return RepeatedField_deep_copy(obj);
+ } else if (klass == cMap) {
+ return Map_deep_copy(obj);
} else {
return Message_deep_copy(obj);
}
diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c
index d5862284..30552705 100644
--- a/ruby/ext/google/protobuf_c/protobuf.c
+++ b/ruby/ext/google/protobuf_c/protobuf.c
@@ -82,6 +82,7 @@ void Init_protobuf_c() {
EnumBuilderContext_register(internal);
Builder_register(internal);
RepeatedField_register(protobuf);
+ Map_register(protobuf);
rb_define_singleton_method(protobuf, "encode", Google_Protobuf_encode, 1);
rb_define_singleton_method(protobuf, "decode", Google_Protobuf_decode, 2);
diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h
index c3a5d653..88ae62e4 100644
--- a/ruby/ext/google/protobuf_c/protobuf.h
+++ b/ruby/ext/google/protobuf_c/protobuf.h
@@ -110,6 +110,10 @@ struct Descriptor {
const upb_pbdecodermethod* fill_method;
const upb_handlers* pb_serialize_handlers;
const upb_handlers* json_serialize_handlers;
+ // Handlers hold type class references for sub-message fields directly in some
+ // cases. We need to keep these rooted because they might otherwise be
+ // collected.
+ VALUE typeclass_references;
};
struct FieldDescriptor {
@@ -123,6 +127,7 @@ struct EnumDescriptor {
struct MessageBuilderContext {
VALUE descriptor;
+ VALUE builder;
};
struct EnumBuilderContext {
@@ -213,10 +218,13 @@ void MessageBuilderContext_free(void* _self);
VALUE MessageBuilderContext_alloc(VALUE klass);
void MessageBuilderContext_register(VALUE module);
MessageBuilderContext* ruby_to_MessageBuilderContext(VALUE value);
-VALUE MessageBuilderContext_initialize(VALUE _self, VALUE descriptor);
+VALUE MessageBuilderContext_initialize(VALUE _self,
+ VALUE descriptor,
+ VALUE builder);
VALUE MessageBuilderContext_optional(int argc, VALUE* argv, VALUE _self);
VALUE MessageBuilderContext_required(int argc, VALUE* argv, VALUE _self);
VALUE MessageBuilderContext_repeated(int argc, VALUE* argv, VALUE _self);
+VALUE MessageBuilderContext_map(int argc, VALUE* argv, VALUE _self);
void EnumBuilderContext_mark(void* _self);
void EnumBuilderContext_free(void* _self);
@@ -239,6 +247,8 @@ VALUE Builder_finalize_to_pool(VALUE _self, VALUE pool_rb);
// Native slot storage abstraction.
// -----------------------------------------------------------------------------
+#define NATIVE_SLOT_MAX_SIZE sizeof(void*)
+
size_t native_slot_size(upb_fieldtype_t type);
void native_slot_set(upb_fieldtype_t type,
VALUE type_class,
@@ -246,7 +256,7 @@ void native_slot_set(upb_fieldtype_t type,
VALUE value);
VALUE native_slot_get(upb_fieldtype_t type,
VALUE type_class,
- void* memory);
+ const void* memory);
void native_slot_init(upb_fieldtype_t type, void* memory);
void native_slot_mark(upb_fieldtype_t type, void* memory);
void native_slot_dup(upb_fieldtype_t type, void* to, void* from);
@@ -254,11 +264,27 @@ void native_slot_deep_copy(upb_fieldtype_t type, void* to, void* from);
bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2);
void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value);
+void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE value);
extern rb_encoding* kRubyStringUtf8Encoding;
extern rb_encoding* kRubyStringASCIIEncoding;
extern rb_encoding* kRubyString8bitEncoding;
+VALUE field_type_class(const upb_fielddef* field);
+
+#define MAP_KEY_FIELD 1
+#define MAP_VALUE_FIELD 2
+
+// These operate on a map field (i.e., a repeated field of submessages whose
+// submessage type is a map-entry msgdef).
+bool is_map_field(const upb_fielddef* field);
+const upb_fielddef* map_field_key(const upb_fielddef* field);
+const upb_fielddef* map_field_value(const upb_fielddef* field);
+
+// These operate on a map-entry msgdef.
+const upb_fielddef* map_entry_key(const upb_msgdef* msgdef);
+const upb_fielddef* map_entry_value(const upb_msgdef* msgdef);
+
// -----------------------------------------------------------------------------
// Repeated field container type.
// -----------------------------------------------------------------------------
@@ -282,7 +308,6 @@ extern VALUE cRepeatedField;
RepeatedField* ruby_to_RepeatedField(VALUE value);
-void RepeatedField_register(VALUE module);
VALUE RepeatedField_each(VALUE _self);
VALUE RepeatedField_index(VALUE _self, VALUE _index);
void* RepeatedField_index_native(VALUE _self, int index);
@@ -302,6 +327,59 @@ VALUE RepeatedField_hash(VALUE _self);
VALUE RepeatedField_inspect(VALUE _self);
VALUE RepeatedField_plus(VALUE _self, VALUE list);
+// Defined in repeated_field.c; also used by Map.
+void validate_type_class(upb_fieldtype_t type, VALUE klass);
+
+// -----------------------------------------------------------------------------
+// Map container type.
+// -----------------------------------------------------------------------------
+
+typedef struct {
+ upb_fieldtype_t key_type;
+ upb_fieldtype_t value_type;
+ VALUE value_type_class;
+ upb_strtable table;
+} Map;
+
+void Map_mark(void* self);
+void Map_free(void* self);
+VALUE Map_alloc(VALUE klass);
+VALUE Map_init(int argc, VALUE* argv, VALUE self);
+void Map_register(VALUE module);
+
+extern const rb_data_type_t Map_type;
+extern VALUE cMap;
+
+Map* ruby_to_Map(VALUE value);
+
+VALUE Map_each(VALUE _self);
+VALUE Map_keys(VALUE _self);
+VALUE Map_values(VALUE _self);
+VALUE Map_index(VALUE _self, VALUE key);
+VALUE Map_index_set(VALUE _self, VALUE key, VALUE value);
+VALUE Map_has_key(VALUE _self, VALUE key);
+VALUE Map_delete(VALUE _self, VALUE key);
+VALUE Map_clear(VALUE _self);
+VALUE Map_length(VALUE _self);
+VALUE Map_dup(VALUE _self);
+VALUE Map_deep_copy(VALUE _self);
+VALUE Map_eq(VALUE _self, VALUE _other);
+VALUE Map_hash(VALUE _self);
+VALUE Map_inspect(VALUE _self);
+VALUE Map_merge(VALUE _self, VALUE hashmap);
+VALUE Map_merge_into_self(VALUE _self, VALUE hashmap);
+
+typedef struct {
+ Map* self;
+ upb_strtable_iter it;
+} Map_iter;
+
+void Map_begin(VALUE _self, Map_iter* iter);
+void Map_next(Map_iter* iter);
+bool Map_done(Map_iter* iter);
+VALUE Map_iter_key(Map_iter* iter);
+VALUE Map_iter_value(Map_iter* iter);
+
// -----------------------------------------------------------------------------
// Message layout / storage.
// -----------------------------------------------------------------------------
@@ -315,7 +393,7 @@ struct MessageLayout {
MessageLayout* create_layout(const upb_msgdef* msgdef);
void free_layout(MessageLayout* layout);
VALUE layout_get(MessageLayout* layout,
- void* storage,
+ const void* storage,
const upb_fielddef* field);
void layout_set(MessageLayout* layout,
void* storage,
diff --git a/ruby/ext/google/protobuf_c/repeated_field.c b/ruby/ext/google/protobuf_c/repeated_field.c
index 6bd13b07..6e3f0bc7 100644
--- a/ruby/ext/google/protobuf_c/repeated_field.c
+++ b/ruby/ext/google/protobuf_c/repeated_field.c
@@ -324,6 +324,10 @@ VALUE RepeatedField_deep_copy(VALUE _self) {
* element types are equal, their lengths are equal, and each element is equal.
* Elements are compared as per normal Ruby semantics, by calling their :==
* methods (or performing a more efficient comparison for primitive types).
+ *
+ * Repeated fields with dissimilar element types are never equal, even if value
+ * comparison (for example, between integers and floats) would have otherwise
+ * indicated that every element has equal value.
*/
VALUE RepeatedField_eq(VALUE _self, VALUE _other) {
if (_self == _other) {
@@ -458,7 +462,7 @@ VALUE RepeatedField_plus(VALUE _self, VALUE list) {
return dupped;
}
-static void validate_type_class(upb_fieldtype_t type, VALUE klass) {
+void validate_type_class(upb_fieldtype_t type, VALUE klass) {
if (rb_iv_get(klass, kDescriptorInstanceVar) == Qnil) {
rb_raise(rb_eArgError,
"Type class has no descriptor. Please pass a "
diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c
index c4d801af..14f49d44 100644
--- a/ruby/ext/google/protobuf_c/storage.c
+++ b/ruby/ext/google/protobuf_c/storage.c
@@ -57,7 +57,17 @@ size_t native_slot_size(upb_fieldtype_t type) {
}
}
-static void check_int_range_precision(upb_fieldtype_t type, VALUE val) {
+static bool is_ruby_num(VALUE value) {
+ return (TYPE(value) == T_FLOAT ||
+ TYPE(value) == T_FIXNUM ||
+ TYPE(value) == T_BIGNUM);
+}
+
+void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE val) {
+ if (!is_ruby_num(val)) {
+ rb_raise(rb_eTypeError, "Expected number type for integral field.");
+ }
+
// NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
// bound; we just need to do precision checks (i.e., disallow rounding) and
// check for < 0 on unsigned types.
@@ -76,12 +86,6 @@ static void check_int_range_precision(upb_fieldtype_t type, VALUE val) {
}
}
-static bool is_ruby_num(VALUE value) {
- return (TYPE(value) == T_FLOAT ||
- TYPE(value) == T_FIXNUM ||
- TYPE(value) == T_BIGNUM);
-}
-
void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) {
bool bad_encoding = false;
rb_encoding* string_encoding = rb_enc_from_index(ENCODING_GET(value));
@@ -156,14 +160,14 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class,
int32_t int_val = 0;
if (TYPE(value) == T_SYMBOL) {
// Ensure that the given symbol exists in the enum module.
- VALUE lookup = rb_const_get(type_class, SYM2ID(value));
+ VALUE lookup = rb_funcall(type_class, rb_intern("resolve"), 1, value);
if (lookup == Qnil) {
rb_raise(rb_eRangeError, "Unknown symbol value for enum field.");
} else {
int_val = NUM2INT(lookup);
}
} else {
- check_int_range_precision(UPB_TYPE_INT32, value);
+ native_slot_check_int_range_precision(UPB_TYPE_INT32, value);
int_val = NUM2INT(value);
}
DEREF(memory, int32_t) = int_val;
@@ -173,10 +177,7 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class,
case UPB_TYPE_INT64:
case UPB_TYPE_UINT32:
case UPB_TYPE_UINT64:
- if (!is_ruby_num(value)) {
- rb_raise(rb_eTypeError, "Expected number type for integral field.");
- }
- check_int_range_precision(type, value);
+ native_slot_check_int_range_precision(type, value);
switch (type) {
case UPB_TYPE_INT32:
DEREF(memory, int32_t) = NUM2INT(value);
@@ -199,7 +200,9 @@ void native_slot_set(upb_fieldtype_t type, VALUE type_class,
}
}
-VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) {
+VALUE native_slot_get(upb_fieldtype_t type,
+ VALUE type_class,
+ const void* memory) {
switch (type) {
case UPB_TYPE_FLOAT:
return DBL2NUM(DEREF(memory, float));
@@ -210,7 +213,7 @@ VALUE native_slot_get(upb_fieldtype_t type, VALUE type_class, void* memory) {
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
case UPB_TYPE_MESSAGE:
- return *((VALUE *)memory);
+ return DEREF(memory, VALUE);
case UPB_TYPE_ENUM: {
int32_t val = DEREF(memory, int32_t);
VALUE symbol = enum_lookup(type_class, INT2NUM(val));
@@ -246,8 +249,9 @@ void native_slot_init(upb_fieldtype_t type, void* memory) {
break;
case UPB_TYPE_STRING:
case UPB_TYPE_BYTES:
- // TODO(cfallin): set encoding appropriately
DEREF(memory, VALUE) = rb_str_new2("");
+ rb_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES) ?
+ kRubyString8bitEncoding : kRubyStringUtf8Encoding);
break;
case UPB_TYPE_MESSAGE:
DEREF(memory, VALUE) = Qnil;
@@ -322,6 +326,43 @@ bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2) {
}
// -----------------------------------------------------------------------------
+// Map field utilities.
+// -----------------------------------------------------------------------------
+
+bool is_map_field(const upb_fielddef* field) {
+ if (upb_fielddef_label(field) != UPB_LABEL_REPEATED ||
+ upb_fielddef_type(field) != UPB_TYPE_MESSAGE) {
+ return false;
+ }
+ const upb_msgdef* subdef = upb_fielddef_msgsubdef(field);
+ return upb_msgdef_mapentry(subdef);
+}
+
+const upb_fielddef* map_field_key(const upb_fielddef* field) {
+ assert(is_map_field(field));
+ const upb_msgdef* subdef = upb_fielddef_msgsubdef(field);
+ return map_entry_key(subdef);
+}
+
+const upb_fielddef* map_field_value(const upb_fielddef* field) {
+ assert(is_map_field(field));
+ const upb_msgdef* subdef = upb_fielddef_msgsubdef(field);
+ return map_entry_value(subdef);
+}
+
+const upb_fielddef* map_entry_key(const upb_msgdef* msgdef) {
+ const upb_fielddef* key_field = upb_msgdef_itof(msgdef, MAP_KEY_FIELD);
+ assert(key_field != NULL);
+ return key_field;
+}
+
+const upb_fielddef* map_entry_value(const upb_msgdef* msgdef) {
+ const upb_fielddef* value_field = upb_msgdef_itof(msgdef, MAP_VALUE_FIELD);
+ assert(value_field != NULL);
+ return value_field;
+}
+
+// -----------------------------------------------------------------------------
// Memory layout management.
// -----------------------------------------------------------------------------
@@ -334,9 +375,12 @@ MessageLayout* create_layout(const upb_msgdef* msgdef) {
size_t off = 0;
for (upb_msg_begin(&it, msgdef); !upb_msg_done(&it); upb_msg_next(&it)) {
const upb_fielddef* field = upb_msg_iter_field(&it);
- size_t field_size =
- (upb_fielddef_label(field) == UPB_LABEL_REPEATED) ?
- sizeof(VALUE) : native_slot_size(upb_fielddef_type(field));
+ size_t field_size = 0;
+ if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+ field_size = sizeof(VALUE);
+ } else {
+ field_size = native_slot_size(upb_fielddef_type(field));
+ }
// align current offset
off = (off + field_size - 1) & ~(field_size - 1);
layout->offsets[upb_fielddef_index(field)] = off;
@@ -357,7 +401,7 @@ void free_layout(MessageLayout* layout) {
xfree(layout);
}
-static VALUE get_type_class(const upb_fielddef* field) {
+VALUE field_type_class(const upb_fielddef* field) {
VALUE type_class = Qnil;
if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) {
VALUE submsgdesc =
@@ -372,7 +416,7 @@ static VALUE get_type_class(const upb_fielddef* field) {
}
VALUE layout_get(MessageLayout* layout,
- void* storage,
+ const void* storage,
const upb_fielddef* field) {
void* memory = ((uint8_t *)storage) +
layout->offsets[upb_fielddef_index(field)];
@@ -380,7 +424,7 @@ VALUE layout_get(MessageLayout* layout,
return *((VALUE *)memory);
} else {
return native_slot_get(upb_fielddef_type(field),
- get_type_class(field),
+ field_type_class(field),
memory);
}
}
@@ -398,9 +442,8 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) {
rb_raise(rb_eTypeError, "Repeated field array has wrong element type");
}
- if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE ||
- upb_fielddef_type(field) == UPB_TYPE_ENUM) {
- RepeatedField* self = ruby_to_RepeatedField(val);
+ if (self->field_type == UPB_TYPE_MESSAGE ||
+ self->field_type == UPB_TYPE_ENUM) {
if (self->field_type_class !=
get_def_obj(upb_fielddef_subdef(field))) {
rb_raise(rb_eTypeError,
@@ -409,17 +452,48 @@ static void check_repeated_field_type(VALUE val, const upb_fielddef* field) {
}
}
+static void check_map_field_type(VALUE val, const upb_fielddef* field) {
+ assert(is_map_field(field));
+ const upb_fielddef* key_field = map_field_key(field);
+ const upb_fielddef* value_field = map_field_value(field);
+
+ if (!RB_TYPE_P(val, T_DATA) || !RTYPEDDATA_P(val) ||
+ RTYPEDDATA_TYPE(val) != &Map_type) {
+ rb_raise(rb_eTypeError, "Expected Map instance");
+ }
+
+ Map* self = ruby_to_Map(val);
+ if (self->key_type != upb_fielddef_type(key_field)) {
+ rb_raise(rb_eTypeError, "Map key type does not match field's key type");
+ }
+ if (self->value_type != upb_fielddef_type(value_field)) {
+ rb_raise(rb_eTypeError, "Map value type does not match field's value type");
+ }
+ if (upb_fielddef_type(value_field) == UPB_TYPE_MESSAGE ||
+ upb_fielddef_type(value_field) == UPB_TYPE_ENUM) {
+ if (self->value_type_class !=
+ get_def_obj(upb_fielddef_subdef(value_field))) {
+ rb_raise(rb_eTypeError,
+ "Map value type has wrong message/enum class");
+ }
+ }
+}
+
+
void layout_set(MessageLayout* layout,
void* storage,
const upb_fielddef* field,
VALUE val) {
void* memory = ((uint8_t *)storage) +
layout->offsets[upb_fielddef_index(field)];
- if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+ if (is_map_field(field)) {
+ check_map_field_type(val, field);
+ DEREF(memory, VALUE) = val;
+ } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
check_repeated_field_type(val, field);
- *((VALUE *)memory) = val;
+ DEREF(memory, VALUE) = val;
} else {
- native_slot_set(upb_fielddef_type(field), get_type_class(field),
+ native_slot_set(upb_fielddef_type(field), field_type_class(field),
memory, val);
}
}
@@ -434,9 +508,34 @@ void layout_init(MessageLayout* layout,
void* memory = ((uint8_t *)storage) +
layout->offsets[upb_fielddef_index(field)];
- if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+ if (is_map_field(field)) {
+ VALUE map = Qnil;
+
+ const upb_fielddef* key_field = map_field_key(field);
+ const upb_fielddef* value_field = map_field_value(field);
+ VALUE type_class = field_type_class(value_field);
+
+ if (type_class != Qnil) {
+ VALUE args[3] = {
+ fieldtype_to_ruby(upb_fielddef_type(key_field)),
+ fieldtype_to_ruby(upb_fielddef_type(value_field)),
+ type_class,
+ };
+ map = rb_class_new_instance(3, args, cMap);
+ } else {
+ VALUE args[2] = {
+ fieldtype_to_ruby(upb_fielddef_type(key_field)),
+ fieldtype_to_ruby(upb_fielddef_type(value_field)),
+ };
+ map = rb_class_new_instance(2, args, cMap);
+ }
+
+ DEREF(memory, VALUE) = map;
+ } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
VALUE ary = Qnil;
- VALUE type_class = get_type_class(field);
+
+ VALUE type_class = field_type_class(field);
+
if (type_class != Qnil) {
VALUE args[2] = {
fieldtype_to_ruby(upb_fielddef_type(field)),
@@ -447,7 +546,8 @@ void layout_init(MessageLayout* layout,
VALUE args[1] = { fieldtype_to_ruby(upb_fielddef_type(field)) };
ary = rb_class_new_instance(1, args, cRepeatedField);
}
- *((VALUE *)memory) = ary;
+
+ DEREF(memory, VALUE) = ary;
} else {
native_slot_init(upb_fielddef_type(field), memory);
}
@@ -464,7 +564,7 @@ void layout_mark(MessageLayout* layout, void* storage) {
layout->offsets[upb_fielddef_index(field)];
if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
- rb_gc_mark(*((VALUE *)memory));
+ rb_gc_mark(DEREF(memory, VALUE));
} else {
native_slot_mark(upb_fielddef_type(field), memory);
}
@@ -482,8 +582,10 @@ void layout_dup(MessageLayout* layout, void* to, void* from) {
void* from_memory = ((uint8_t *)from) +
layout->offsets[upb_fielddef_index(field)];
- if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
- *((VALUE *)to_memory) = RepeatedField_dup(*((VALUE *)from_memory));
+ if (is_map_field(field)) {
+ DEREF(to_memory, VALUE) = Map_dup(DEREF(from_memory, VALUE));
+ } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+ DEREF(to_memory, VALUE) = RepeatedField_dup(DEREF(from_memory, VALUE));
} else {
native_slot_dup(upb_fielddef_type(field), to_memory, from_memory);
}
@@ -501,8 +603,12 @@ void layout_deep_copy(MessageLayout* layout, void* to, void* from) {
void* from_memory = ((uint8_t *)from) +
layout->offsets[upb_fielddef_index(field)];
- if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
- *((VALUE *)to_memory) = RepeatedField_deep_copy(*((VALUE *)from_memory));
+ if (is_map_field(field)) {
+ DEREF(to_memory, VALUE) =
+ Map_deep_copy(DEREF(from_memory, VALUE));
+ } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+ DEREF(to_memory, VALUE) =
+ RepeatedField_deep_copy(DEREF(from_memory, VALUE));
} else {
native_slot_deep_copy(upb_fielddef_type(field), to_memory, from_memory);
}
@@ -520,11 +626,12 @@ VALUE layout_eq(MessageLayout* layout, void* msg1, void* msg2) {
void* msg2_memory = ((uint8_t *)msg2) +
layout->offsets[upb_fielddef_index(field)];
- if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
- if (RepeatedField_eq(*((VALUE *)msg1_memory),
- *((VALUE *)msg2_memory)) == Qfalse) {
- return Qfalse;
- }
+ if (is_map_field(field)) {
+ return Map_eq(DEREF(msg1_memory, VALUE),
+ DEREF(msg2_memory, VALUE));
+ } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+ return RepeatedField_eq(DEREF(msg1_memory, VALUE),
+ DEREF(msg2_memory, VALUE));
} else {
if (!native_slot_eq(upb_fielddef_type(field),
msg1_memory, msg2_memory)) {
diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c
index c9f47195..571c809f 100644
--- a/ruby/ext/google/protobuf_c/upb.c
+++ b/ruby/ext/google/protobuf_c/upb.c
@@ -1269,6 +1269,7 @@ upb_msgdef *upb_msgdef_new(const void *owner) {
if (!upb_def_init(UPB_UPCAST(m), UPB_DEF_MSG, &vtbl, owner)) goto err2;
if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
+ m->map_entry = false;
return m;
err1:
@@ -1283,6 +1284,7 @@ upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
if (!newm) return NULL;
bool ok = upb_def_setfullname(UPB_UPCAST(newm),
upb_def_fullname(UPB_UPCAST(m)), NULL);
+ newm->map_entry = m->map_entry;
UPB_ASSERT_VAR(ok, ok);
upb_msg_iter i;
for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
@@ -1386,6 +1388,15 @@ int upb_msgdef_numfields(const upb_msgdef *m) {
return upb_strtable_count(&m->ntof);
}
+void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
+ assert(!upb_msgdef_isfrozen(m));
+ m->map_entry = map_entry;
+}
+
+bool upb_msgdef_mapentry(const upb_msgdef *m) {
+ return m->map_entry;
+}
+
void upb_msg_begin(upb_msg_iter *iter, const upb_msgdef *m) {
upb_inttable_begin(iter, &m->itof);
}
@@ -3401,31 +3412,32 @@ int log2ceil(uint64_t v) {
}
char *upb_strdup(const char *s) {
- size_t n = strlen(s) + 1;
+ return upb_strdup2(s, strlen(s));
+}
+
+char *upb_strdup2(const char *s, size_t len) {
+ // Prevent overflow errors.
+ if (len == SIZE_MAX) return NULL;
+ // Always null-terminate, even if binary data; but don't rely on the input to
+ // have a null-terminating byte since it may be a raw binary buffer.
+ size_t n = len + 1;
char *p = malloc(n);
- if (p) memcpy(p, s, n);
+ if (p) {
+ memcpy(p, s, len);
+ p[len] = 0;
+ }
return p;
}
// A type to represent the lookup key of either a strtable or an inttable.
-// This is like upb_tabkey, but can carry a size also to allow lookups of
-// non-NULL-terminated strings (we don't store string lengths in the table).
typedef struct {
upb_tabkey key;
- uint32_t len; // For string keys only.
} lookupkey_t;
-static lookupkey_t strkey(const char *str) {
- lookupkey_t k;
- k.key.str = (char*)str;
- k.len = strlen(str);
- return k;
-}
-
static lookupkey_t strkey2(const char *str, size_t len) {
lookupkey_t k;
- k.key.str = (char*)str;
- k.len = len;
+ k.key.s.str = (char*)str;
+ k.key.s.length = len;
return k;
}
@@ -3607,11 +3619,12 @@ static size_t begin(const upb_table *t) {
// A simple "subclass" of upb_table that only adds a hash function for strings.
static uint32_t strhash(upb_tabkey key) {
- return MurmurHash2(key.str, strlen(key.str), 0);
+ return MurmurHash2(key.s.str, key.s.length, 0);
}
static bool streql(upb_tabkey k1, lookupkey_t k2) {
- return strncmp(k1.str, k2.key.str, k2.len) == 0 && k1.str[k2.len] == '\0';
+ return k1.s.length == k2.key.s.length &&
+ memcmp(k1.s.str, k2.key.s.str, k1.s.length) == 0;
}
bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
@@ -3620,7 +3633,7 @@ bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
void upb_strtable_uninit(upb_strtable *t) {
for (size_t i = 0; i < upb_table_size(&t->t); i++)
- free((void*)t->t.entries[i].key.str);
+ free((void*)t->t.entries[i].key.s.str);
uninit(&t->t);
}
@@ -3631,26 +3644,30 @@ bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
upb_strtable_iter i;
upb_strtable_begin(&i, t);
for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
- upb_strtable_insert(
- &new_table, upb_strtable_iter_key(&i), upb_strtable_iter_value(&i));
+ upb_strtable_insert2(
+ &new_table,
+ upb_strtable_iter_key(&i),
+ upb_strtable_iter_keylength(&i),
+ upb_strtable_iter_value(&i));
}
upb_strtable_uninit(t);
*t = new_table;
return true;
}
-bool upb_strtable_insert(upb_strtable *t, const char *k, upb_value v) {
+bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
+ upb_value v) {
if (isfull(&t->t)) {
// Need to resize. New table of double the size, add old elements to it.
if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
return false;
}
}
- if ((k = upb_strdup(k)) == NULL) return false;
+ if ((k = upb_strdup2(k, len)) == NULL) return false;
- lookupkey_t key = strkey(k);
- uint32_t hash = MurmurHash2(key.key.str, key.len, 0);
- insert(&t->t, strkey(k), v, hash, &strhash, &streql);
+ lookupkey_t key = strkey2(k, len);
+ uint32_t hash = MurmurHash2(key.key.s.str, key.key.s.length, 0);
+ insert(&t->t, key, v, hash, &strhash, &streql);
return true;
}
@@ -3660,11 +3677,12 @@ bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
return lookup(&t->t, strkey2(key, len), v, hash, &streql);
}
-bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val) {
+bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
+ upb_value *val) {
uint32_t hash = MurmurHash2(key, strlen(key), 0);
upb_tabkey tabkey;
- if (rm(&t->t, strkey(key), val, &tabkey, hash, &streql)) {
- free((void*)tabkey.str);
+ if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
+ free((void*)tabkey.s.str);
return true;
} else {
return false;
@@ -3693,7 +3711,12 @@ bool upb_strtable_done(const upb_strtable_iter *i) {
const char *upb_strtable_iter_key(upb_strtable_iter *i) {
assert(!upb_strtable_done(i));
- return str_tabent(i)->key.str;
+ return str_tabent(i)->key.s.str;
+}
+
+size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
+ assert(!upb_strtable_done(i));
+ return str_tabent(i)->key.s.length;
}
upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
@@ -4209,8 +4232,10 @@ static void nullz(upb_status *status) {
}
void upb_status_clear(upb_status *status) {
- upb_status blank = UPB_STATUS_INIT;
- upb_status_copy(status, &blank);
+ if (!status) return;
+ status->ok_ = true;
+ status->code_ = 0;
+ status->msg[0] = '\0';
}
bool upb_ok(const upb_status *status) { return status->ok_; }
@@ -5977,6 +6002,7 @@ static void putop(compiler *c, opcode op, ...) {
case OP_SETDELIM:
case OP_HALT:
case OP_RET:
+ case OP_DISPATCH:
put32(c, op);
break;
case OP_PARSE_DOUBLE:
@@ -6057,7 +6083,7 @@ const char *upb_pbdecoder_getopname(unsigned int op) {
OP(ENDSUBMSG), OP(STARTSTR), OP(STRING), OP(ENDSTR), OP(CALL), OP(RET),
OP(PUSHLENDELIM), OP(PUSHTAGDELIM), OP(SETDELIM), OP(CHECKDELIM),
OP(BRANCH), OP(TAG1), OP(TAG2), OP(TAGN), OP(SETDISPATCH), OP(POP),
- OP(SETBIGGROUPNUM), OP(HALT),
+ OP(SETBIGGROUPNUM), OP(DISPATCH), OP(HALT),
};
return op > OP_HALT ? names[0] : names[op];
#undef OP
@@ -6089,6 +6115,7 @@ static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
upb_handlers_msgdef(method->dest_handlers_)));
break;
}
+ case OP_DISPATCH:
case OP_STARTMSG:
case OP_ENDMSG:
case OP_PUSHLENDELIM:
@@ -6434,6 +6461,7 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
putop(c, OP_SETDISPATCH, &method->dispatch);
putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
label(c, LABEL_FIELD);
+ uint32_t* start_pc = c->pc;
upb_msg_iter i;
for(upb_msg_begin(&i, md); !upb_msg_done(&i); upb_msg_next(&i)) {
const upb_fielddef *f = upb_msg_iter_field(&i);
@@ -6449,8 +6477,18 @@ static void compile_method(compiler *c, upb_pbdecodermethod *method) {
}
}
+ // If there were no fields, or if no handlers were defined, we need to
+ // generate a non-empty loop body so that we can at least dispatch for unknown
+ // fields and check for the end of the message.
+ if (c->pc == start_pc) {
+ // Check for end-of-message.
+ putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+ // Unconditionally dispatch.
+ putop(c, OP_DISPATCH, 0);
+ }
+
// For now we just loop back to the last field of the message (or if none,
- // the DISPATCH opcode for the message.
+ // the DISPATCH opcode for the message).
putop(c, OP_BRANCH, -LABEL_FIELD);
// Insert both a label and a dispatch table entry for this end-of-msg.
@@ -7434,6 +7472,9 @@ size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
if (result == DECODE_MISMATCH) goto badtag;
if (result >= 0) return result;
})
+ VMCASE(OP_DISPATCH, {
+ CHECK_RETURN(dispatch(d));
+ })
VMCASE(OP_HALT, {
return size;
})
@@ -7492,7 +7533,8 @@ bool upb_pbdecoder_end(void *closure, const void *handler_data) {
// Rewind from OP_TAG* to OP_CHECKDELIM.
assert(getop(*d->pc) == OP_TAG1 ||
getop(*d->pc) == OP_TAG2 ||
- getop(*d->pc) == OP_TAGN);
+ getop(*d->pc) == OP_TAGN ||
+ getop(*d->pc == OP_DISPATCH));
d->pc = p;
}
upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
@@ -8627,6 +8669,9 @@ upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
#define PARSER_CHECK_RETURN(x) if (!(x)) return false
+// Used to signal that a capture has been suspended.
+static char suspend_capture;
+
static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
upb_handlertype_t type) {
upb_selector_t sel;
@@ -8640,41 +8685,6 @@ static upb_selector_t parser_getsel(upb_json_parser *p) {
p, upb_handlers_getprimitivehandlertype(p->top->f));
}
-static void start_member(upb_json_parser *p) {
- assert(!p->top->f);
- assert(!p->accumulated);
- p->accumulated_len = 0;
-}
-
-static bool end_member(upb_json_parser *p) {
- // TODO(haberman): support keys that span buffers or have escape sequences.
- assert(!p->top->f);
- assert(p->accumulated);
- const upb_fielddef *f =
- upb_msgdef_ntof(p->top->m, p->accumulated, p->accumulated_len);
-
- if (!f) {
- // TODO(haberman): Ignore unknown fields if requested/configured to do so.
- upb_status_seterrf(p->status, "No such field: %.*s\n",
- (int)p->accumulated_len, p->accumulated);
- return false;
- }
-
- p->top->f = f;
- p->accumulated = NULL;
-
- return true;
-}
-
-static void start_object(upb_json_parser *p) {
- upb_sink_startmsg(&p->top->sink);
-}
-
-static void end_object(upb_json_parser *p) {
- upb_status status;
- upb_sink_endmsg(&p->top->sink, &status);
-}
-
static bool check_stack(upb_json_parser *p) {
if ((p->top + 1) == p->limit) {
upb_status_seterrmsg(p->status, "Nesting too deep");
@@ -8684,83 +8694,28 @@ static bool check_stack(upb_json_parser *p) {
return true;
}
-static bool start_subobject(upb_json_parser *p) {
- assert(p->top->f);
-
- if (!upb_fielddef_issubmsg(p->top->f)) {
- upb_status_seterrf(p->status,
- "Object specified for non-message/group field: %s",
- upb_fielddef_name(p->top->f));
- return false;
- }
-
- if (!check_stack(p)) return false;
-
- upb_jsonparser_frame *inner = p->top + 1;
-
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
- upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
- inner->m = upb_fielddef_msgsubdef(p->top->f);
- inner->f = NULL;
- p->top = inner;
+// There are GCC/Clang built-ins for overflow checking which we could start
+// using if there was any performance benefit to it.
+static bool checked_add(size_t a, size_t b, size_t *c) {
+ if (SIZE_MAX - a < b) return false;
+ *c = a + b;
return true;
}
-static void end_subobject(upb_json_parser *p) {
- p->top--;
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
- upb_sink_endsubmsg(&p->top->sink, sel);
-}
-
-static bool start_array(upb_json_parser *p) {
- assert(p->top->f);
-
- if (!upb_fielddef_isseq(p->top->f)) {
- upb_status_seterrf(p->status,
- "Array specified for non-repeated field: %s",
- upb_fielddef_name(p->top->f));
- return false;
+static size_t saturating_multiply(size_t a, size_t b) {
+ // size_t is unsigned, so this is defined behavior even on overflow.
+ size_t ret = a * b;
+ if (b != 0 && ret / b != a) {
+ ret = SIZE_MAX;
}
-
- if (!check_stack(p)) return false;
-
- upb_jsonparser_frame *inner = p->top + 1;
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
- upb_sink_startseq(&p->top->sink, sel, &inner->sink);
- inner->m = p->top->m;
- inner->f = p->top->f;
- p->top = inner;
-
- return true;
-}
-
-static void end_array(upb_json_parser *p) {
- assert(p->top > p->stack);
-
- p->top--;
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
- upb_sink_endseq(&p->top->sink, sel);
+ return ret;
}
-static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
-
-static bool parser_putbool(upb_json_parser *p, bool val) {
- if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
- upb_status_seterrf(p->status,
- "Boolean value specified for non-bool field: %s",
- upb_fielddef_name(p->top->f));
- return false;
- }
- bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
- UPB_ASSERT_VAR(ok, ok);
- return true;
-}
+/* Base64 decoding ************************************************************/
-static void start_text(upb_json_parser *p, const char *ptr) {
- p->text_begin = ptr;
-}
+// TODO(haberman): make this streaming.
static const signed char b64table[] = {
-1, -1, -1, -1, -1, -1, -1, -1,
@@ -8880,89 +8835,326 @@ badpadding:
return false;
}
-static bool end_text(upb_json_parser *p, const char *ptr, bool is_num) {
- assert(!p->accumulated); // TODO: handle this case.
- p->accumulated = p->text_begin;
- p->accumulated_len = ptr - p->text_begin;
- if (p->top->f && upb_fielddef_isstring(p->top->f)) {
- // This is a string field (as opposed to a member name).
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
- if (upb_fielddef_type(p->top->f) == UPB_TYPE_BYTES) {
- PARSER_CHECK_RETURN(base64_push(p, sel, p->accumulated,
- p->accumulated_len));
- } else {
- upb_sink_putstring(&p->top->sink, sel, p->accumulated, p->accumulated_len, NULL);
- }
- p->accumulated = NULL;
- } else if (p->top->f &&
- upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM &&
- !is_num) {
-
- // Enum case: resolve enum symbolic name to integer value.
- const upb_enumdef *enumdef =
- (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
-
- int32_t int_val = 0;
- if (upb_enumdef_ntoi(enumdef, p->accumulated, p->accumulated_len,
- &int_val)) {
- upb_selector_t sel = parser_getsel(p);
- upb_sink_putint32(&p->top->sink, sel, int_val);
- } else {
- upb_status_seterrmsg(p->status, "Enum value name unknown");
+/* Accumulate buffer **********************************************************/
+
+// Functionality for accumulating a buffer.
+//
+// Some parts of the parser need an entire value as a contiguous string. For
+// example, to look up a member name in a hash table, or to turn a string into
+// a number, the relevant library routines need the input string to be in
+// contiguous memory, even if the value spanned two or more buffers in the
+// input. These routines handle that.
+//
+// In the common case we can just point to the input buffer to get this
+// contiguous string and avoid any actual copy. So we optimistically begin
+// this way. But there are a few cases where we must instead copy into a
+// separate buffer:
+//
+// 1. The string was not contiguous in the input (it spanned buffers).
+//
+// 2. The string included escape sequences that need to be interpreted to get
+// the true value in a contiguous buffer.
+
+static void assert_accumulate_empty(upb_json_parser *p) {
+ assert(p->accumulated == NULL);
+ assert(p->accumulated_len == 0);
+}
+
+static void accumulate_clear(upb_json_parser *p) {
+ p->accumulated = NULL;
+ p->accumulated_len = 0;
+}
+
+// Used internally by accumulate_append().
+static bool accumulate_realloc(upb_json_parser *p, size_t need) {
+ size_t new_size = UPB_MAX(p->accumulate_buf_size, 128);
+ while (new_size < need) {
+ new_size = saturating_multiply(new_size, 2);
+ }
+
+ void *mem = realloc(p->accumulate_buf, new_size);
+ if (!mem) {
+ upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
+ return false;
+ }
+
+ p->accumulate_buf = mem;
+ p->accumulate_buf_size = new_size;
+ return true;
+}
+
+// Logically appends the given data to the append buffer.
+// If "can_alias" is true, we will try to avoid actually copying, but the buffer
+// must be valid until the next accumulate_append() call (if any).
+static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
+ bool can_alias) {
+ if (!p->accumulated && can_alias) {
+ p->accumulated = buf;
+ p->accumulated_len = len;
+ return true;
+ }
+
+ size_t need;
+ if (!checked_add(p->accumulated_len, len, &need)) {
+ upb_status_seterrmsg(p->status, "Integer overflow.");
+ return false;
+ }
+
+ if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
+ return false;
+ }
+
+ if (p->accumulated != p->accumulate_buf) {
+ memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
+ p->accumulated = p->accumulate_buf;
+ }
+
+ memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
+ p->accumulated_len += len;
+ return true;
+}
+
+// Returns a pointer to the data accumulated since the last accumulate_clear()
+// call, and writes the length to *len. This with point either to the input
+// buffer or a temporary accumulate buffer.
+static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
+ assert(p->accumulated);
+ *len = p->accumulated_len;
+ return p->accumulated;
+}
+
+
+/* Mult-part text data ********************************************************/
+
+// When we have text data in the input, it can often come in multiple segments.
+// For example, there may be some raw string data followed by an escape
+// sequence. The two segments are processed with different logic. Also buffer
+// seams in the input can cause multiple segments.
+//
+// As we see segments, there are two main cases for how we want to process them:
+//
+// 1. we want to push the captured input directly to string handlers.
+//
+// 2. we need to accumulate all the parts into a contiguous buffer for further
+// processing (field name lookup, string->number conversion, etc).
+
+// This is the set of states for p->multipart_state.
+enum {
+ // We are not currently processing multipart data.
+ MULTIPART_INACTIVE = 0,
+
+ // We are processing multipart data by accumulating it into a contiguous
+ // buffer.
+ MULTIPART_ACCUMULATE = 1,
+
+ // We are processing multipart data by pushing each part directly to the
+ // current string handlers.
+ MULTIPART_PUSHEAGERLY = 2
+};
+
+// Start a multi-part text value where we accumulate the data for processing at
+// the end.
+static void multipart_startaccum(upb_json_parser *p) {
+ assert_accumulate_empty(p);
+ assert(p->multipart_state == MULTIPART_INACTIVE);
+ p->multipart_state = MULTIPART_ACCUMULATE;
+}
+
+// Start a multi-part text value where we immediately push text data to a string
+// value with the given selector.
+static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
+ assert_accumulate_empty(p);
+ assert(p->multipart_state == MULTIPART_INACTIVE);
+ p->multipart_state = MULTIPART_PUSHEAGERLY;
+ p->string_selector = sel;
+}
+
+static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
+ bool can_alias) {
+ switch (p->multipart_state) {
+ case MULTIPART_INACTIVE:
+ upb_status_seterrmsg(
+ p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
return false;
+
+ case MULTIPART_ACCUMULATE:
+ if (!accumulate_append(p, buf, len, can_alias)) {
+ return false;
+ }
+ break;
+
+ case MULTIPART_PUSHEAGERLY: {
+ const upb_bufhandle *handle = can_alias ? p->handle : NULL;
+ upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
+ break;
}
- p->accumulated = NULL;
}
return true;
}
-static bool start_stringval(upb_json_parser *p) {
- assert(p->top->f);
+// Note: this invalidates the accumulate buffer! Call only after reading its
+// contents.
+static void multipart_end(upb_json_parser *p) {
+ assert(p->multipart_state != MULTIPART_INACTIVE);
+ p->multipart_state = MULTIPART_INACTIVE;
+ accumulate_clear(p);
+}
- if (upb_fielddef_isstring(p->top->f)) {
- if (!check_stack(p)) return false;
- // Start a new parser frame: parser frames correspond one-to-one with
- // handler frames, and string events occur in a sub-frame.
- upb_jsonparser_frame *inner = p->top + 1;
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
- upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
- inner->m = p->top->m;
- inner->f = p->top->f;
- p->top = inner;
+/* Input capture **************************************************************/
- return true;
- } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
- // Do nothing -- symbolic enum names in quotes remain in the
- // current parser frame.
+// Functionality for capturing a region of the input as text. Gracefully
+// handles the case where a buffer seam occurs in the middle of the captured
+// region.
+
+static void capture_begin(upb_json_parser *p, const char *ptr) {
+ assert(p->multipart_state != MULTIPART_INACTIVE);
+ assert(p->capture == NULL);
+ p->capture = ptr;
+}
+
+static bool capture_end(upb_json_parser *p, const char *ptr) {
+ assert(p->capture);
+ if (multipart_text(p, p->capture, ptr - p->capture, true)) {
+ p->capture = NULL;
return true;
} else {
- upb_status_seterrf(p->status,
- "String specified for non-string/non-enum field: %s",
- upb_fielddef_name(p->top->f));
return false;
}
+}
+// This is called at the end of each input buffer (ie. when we have hit a
+// buffer seam). If we are in the middle of capturing the input, this
+// processes the unprocessed capture region.
+static void capture_suspend(upb_json_parser *p, const char **ptr) {
+ if (!p->capture) return;
+
+ if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
+ // We use this as a signal that we were in the middle of capturing, and
+ // that capturing should resume at the beginning of the next buffer.
+ //
+ // We can't use *ptr here, because we have no guarantee that this pointer
+ // will be valid when we resume (if the underlying memory is freed, then
+ // using the pointer at all, even to compare to NULL, is likely undefined
+ // behavior).
+ p->capture = &suspend_capture;
+ } else {
+ // Need to back up the pointer to the beginning of the capture, since
+ // we were not able to actually preserve it.
+ *ptr = p->capture;
+ }
}
-static void end_stringval(upb_json_parser *p) {
- if (upb_fielddef_isstring(p->top->f)) {
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
- upb_sink_endstr(&p->top->sink, sel);
- p->top--;
+static void capture_resume(upb_json_parser *p, const char *ptr) {
+ if (p->capture) {
+ assert(p->capture == &suspend_capture);
+ p->capture = ptr;
}
}
+
+/* Callbacks from the parser **************************************************/
+
+// These are the functions called directly from the parser itself.
+// We define these in the same order as their declarations in the parser.
+
+static char escape_char(char in) {
+ switch (in) {
+ case 'r': return '\r';
+ case 't': return '\t';
+ case 'n': return '\n';
+ case 'f': return '\f';
+ case 'b': return '\b';
+ case '/': return '/';
+ case '"': return '"';
+ case '\\': return '\\';
+ default:
+ assert(0);
+ return 'x';
+ }
+}
+
+static bool escape(upb_json_parser *p, const char *ptr) {
+ char ch = escape_char(*ptr);
+ return multipart_text(p, &ch, 1, false);
+}
+
+static void start_hex(upb_json_parser *p) {
+ p->digit = 0;
+}
+
+static void hexdigit(upb_json_parser *p, const char *ptr) {
+ char ch = *ptr;
+
+ p->digit <<= 4;
+
+ if (ch >= '0' && ch <= '9') {
+ p->digit += (ch - '0');
+ } else if (ch >= 'a' && ch <= 'f') {
+ p->digit += ((ch - 'a') + 10);
+ } else {
+ assert(ch >= 'A' && ch <= 'F');
+ p->digit += ((ch - 'A') + 10);
+ }
+}
+
+static bool end_hex(upb_json_parser *p) {
+ uint32_t codepoint = p->digit;
+
+ // emit the codepoint as UTF-8.
+ char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
+ int length = 0;
+ if (codepoint <= 0x7F) {
+ utf8[0] = codepoint;
+ length = 1;
+ } else if (codepoint <= 0x07FF) {
+ utf8[1] = (codepoint & 0x3F) | 0x80;
+ codepoint >>= 6;
+ utf8[0] = (codepoint & 0x1F) | 0xC0;
+ length = 2;
+ } else /* codepoint <= 0xFFFF */ {
+ utf8[2] = (codepoint & 0x3F) | 0x80;
+ codepoint >>= 6;
+ utf8[1] = (codepoint & 0x3F) | 0x80;
+ codepoint >>= 6;
+ utf8[0] = (codepoint & 0x0F) | 0xE0;
+ length = 3;
+ }
+ // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
+ // we have to wait for the next escape to get the full code point).
+
+ return multipart_text(p, utf8, length, false);
+}
+
+static void start_text(upb_json_parser *p, const char *ptr) {
+ capture_begin(p, ptr);
+}
+
+static bool end_text(upb_json_parser *p, const char *ptr) {
+ return capture_end(p, ptr);
+}
+
static void start_number(upb_json_parser *p, const char *ptr) {
- start_text(p, ptr);
- assert(p->accumulated == NULL);
+ multipart_startaccum(p);
+ capture_begin(p, ptr);
}
-static void end_number(upb_json_parser *p, const char *ptr) {
- end_text(p, ptr, true);
- const char *myend = p->accumulated + p->accumulated_len;
+static bool end_number(upb_json_parser *p, const char *ptr) {
+ if (!capture_end(p, ptr)) {
+ return false;
+ }
+
+ // strtol() and friends unfortunately do not support specifying the length of
+ // the input string, so we need to force a copy into a NULL-terminated buffer.
+ if (!multipart_text(p, "\0", 1, false)) {
+ return false;
+ }
+
+ size_t len;
+ const char *buf = accumulate_getptr(p, &len);
+ const char *myend = buf + len - 1; // One for NULL.
char *end;
switch (upb_fielddef_type(p->top->f)) {
@@ -8970,7 +9162,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_INT32: {
long val = strtol(p->accumulated, &end, 0);
if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
- assert(false);
+ goto err;
else
upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
break;
@@ -8978,7 +9170,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_INT64: {
long long val = strtoll(p->accumulated, &end, 0);
if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
- assert(false);
+ goto err;
else
upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
break;
@@ -8986,7 +9178,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_UINT32: {
unsigned long val = strtoul(p->accumulated, &end, 0);
if (val > UINT32_MAX || errno == ERANGE || end != myend)
- assert(false);
+ goto err;
else
upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
break;
@@ -8994,7 +9186,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_UINT64: {
unsigned long long val = strtoull(p->accumulated, &end, 0);
if (val > UINT64_MAX || errno == ERANGE || end != myend)
- assert(false);
+ goto err;
else
upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
break;
@@ -9002,7 +9194,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_DOUBLE: {
double val = strtod(p->accumulated, &end);
if (errno == ERANGE || end != myend)
- assert(false);
+ goto err;
else
upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
break;
@@ -9010,7 +9202,7 @@ static void end_number(upb_json_parser *p, const char *ptr) {
case UPB_TYPE_FLOAT: {
float val = strtof(p->accumulated, &end);
if (errno == ERANGE || end != myend)
- assert(false);
+ goto err;
else
upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
break;
@@ -9019,230 +9211,380 @@ static void end_number(upb_json_parser *p, const char *ptr) {
assert(false);
}
- p->accumulated = NULL;
+ multipart_end(p);
+ return true;
+
+err:
+ upb_status_seterrf(p->status, "error parsing number: %s", buf);
+ multipart_end(p);
+ return false;
}
-static char escape_char(char in) {
- switch (in) {
- case 'r': return '\r';
- case 't': return '\t';
- case 'n': return '\n';
- case 'f': return '\f';
- case 'b': return '\b';
- case '/': return '/';
- case '"': return '"';
- case '\\': return '\\';
+static bool parser_putbool(upb_json_parser *p, bool val) {
+ if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
+ upb_status_seterrf(p->status,
+ "Boolean value specified for non-bool field: %s",
+ upb_fielddef_name(p->top->f));
+ return false;
+ }
+
+ bool ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
+ UPB_ASSERT_VAR(ok, ok);
+ return true;
+}
+
+static bool start_stringval(upb_json_parser *p) {
+ assert(p->top->f);
+
+ if (upb_fielddef_isstring(p->top->f)) {
+ if (!check_stack(p)) return false;
+
+ // Start a new parser frame: parser frames correspond one-to-one with
+ // handler frames, and string events occur in a sub-frame.
+ upb_jsonparser_frame *inner = p->top + 1;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+ upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
+ inner->m = p->top->m;
+ inner->f = p->top->f;
+ p->top = inner;
+
+ if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
+ // For STRING fields we push data directly to the handlers as it is
+ // parsed. We don't do this yet for BYTES fields, because our base64
+ // decoder is not streaming.
+ //
+ // TODO(haberman): make base64 decoding streaming also.
+ multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
+ return true;
+ } else {
+ multipart_startaccum(p);
+ return true;
+ }
+ } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
+ // No need to push a frame -- symbolic enum names in quotes remain in the
+ // current parser frame.
+ //
+ // Enum string values must accumulate so we can look up the value in a table
+ // once it is complete.
+ multipart_startaccum(p);
+ return true;
+ } else {
+ upb_status_seterrf(p->status,
+ "String specified for non-string/non-enum field: %s",
+ upb_fielddef_name(p->top->f));
+ return false;
+ }
+}
+
+static bool end_stringval(upb_json_parser *p) {
+ bool ok = true;
+
+ switch (upb_fielddef_type(p->top->f)) {
+ case UPB_TYPE_BYTES:
+ if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
+ p->accumulated, p->accumulated_len)) {
+ return false;
+ }
+ // Fall through.
+
+ case UPB_TYPE_STRING: {
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+ upb_sink_endstr(&p->top->sink, sel);
+ p->top--;
+ break;
+ }
+
+ case UPB_TYPE_ENUM: {
+ // Resolve enum symbolic name to integer value.
+ const upb_enumdef *enumdef =
+ (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
+
+ size_t len;
+ const char *buf = accumulate_getptr(p, &len);
+
+ int32_t int_val = 0;
+ ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
+
+ if (ok) {
+ upb_selector_t sel = parser_getsel(p);
+ upb_sink_putint32(&p->top->sink, sel, int_val);
+ } else {
+ upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
+ }
+
+ break;
+ }
+
default:
- assert(0);
- return 'x';
+ assert(false);
+ upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
+ ok = false;
+ break;
}
+
+ multipart_end(p);
+ return ok;
}
-static void escape(upb_json_parser *p, const char *ptr) {
- char ch = escape_char(*ptr);
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
- upb_sink_putstring(&p->top->sink, sel, &ch, 1, NULL);
+static void start_member(upb_json_parser *p) {
+ assert(!p->top->f);
+ multipart_startaccum(p);
}
-static uint8_t hexdigit(char ch) {
- if (ch >= '0' && ch <= '9') {
- return ch - '0';
- } else if (ch >= 'a' && ch <= 'f') {
- return ch - 'a' + 10;
- } else {
- assert(ch >= 'A' && ch <= 'F');
- return ch - 'A' + 10;
+static bool end_member(upb_json_parser *p) {
+ assert(!p->top->f);
+ size_t len;
+ const char *buf = accumulate_getptr(p, &len);
+
+ const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
+
+ if (!f) {
+ // TODO(haberman): Ignore unknown fields if requested/configured to do so.
+ upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
+ return false;
}
+
+ p->top->f = f;
+ multipart_end(p);
+
+ return true;
}
-static void start_hex(upb_json_parser *p, const char *ptr) {
- start_text(p, ptr);
+static void clear_member(upb_json_parser *p) { p->top->f = NULL; }
+
+static bool start_subobject(upb_json_parser *p) {
+ assert(p->top->f);
+
+ if (!upb_fielddef_issubmsg(p->top->f)) {
+ upb_status_seterrf(p->status,
+ "Object specified for non-message/group field: %s",
+ upb_fielddef_name(p->top->f));
+ return false;
+ }
+
+ if (!check_stack(p)) return false;
+
+ upb_jsonparser_frame *inner = p->top + 1;
+
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+ upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+ inner->m = upb_fielddef_msgsubdef(p->top->f);
+ inner->f = NULL;
+ p->top = inner;
+
+ return true;
}
-static void hex(upb_json_parser *p, const char *end) {
- const char *start = p->text_begin;
- UPB_ASSERT_VAR(end, end - start == 4);
- uint16_t codepoint =
- (hexdigit(start[0]) << 12) |
- (hexdigit(start[1]) << 8) |
- (hexdigit(start[2]) << 4) |
- hexdigit(start[3]);
- // emit the codepoint as UTF-8.
- char utf8[3]; // support \u0000 -- \uFFFF -- need only three bytes.
- int length = 0;
- if (codepoint <= 0x7F) {
- utf8[0] = codepoint;
- length = 1;
- } else if (codepoint <= 0x07FF) {
- utf8[1] = (codepoint & 0x3F) | 0x80;
- codepoint >>= 6;
- utf8[0] = (codepoint & 0x1F) | 0xC0;
- length = 2;
- } else /* codepoint <= 0xFFFF */ {
- utf8[2] = (codepoint & 0x3F) | 0x80;
- codepoint >>= 6;
- utf8[1] = (codepoint & 0x3F) | 0x80;
- codepoint >>= 6;
- utf8[0] = (codepoint & 0x0F) | 0xE0;
- length = 3;
+static void end_subobject(upb_json_parser *p) {
+ p->top--;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
+ upb_sink_endsubmsg(&p->top->sink, sel);
+}
+
+static bool start_array(upb_json_parser *p) {
+ assert(p->top->f);
+
+ if (!upb_fielddef_isseq(p->top->f)) {
+ upb_status_seterrf(p->status,
+ "Array specified for non-repeated field: %s",
+ upb_fielddef_name(p->top->f));
+ return false;
}
- // TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
- // we have to wait for the next escape to get the full code point).
- upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
- upb_sink_putstring(&p->top->sink, sel, utf8, length, NULL);
+ if (!check_stack(p)) return false;
+
+ upb_jsonparser_frame *inner = p->top + 1;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
+ upb_sink_startseq(&p->top->sink, sel, &inner->sink);
+ inner->m = p->top->m;
+ inner->f = p->top->f;
+ p->top = inner;
+
+ return true;
}
+static void end_array(upb_json_parser *p) {
+ assert(p->top > p->stack);
+
+ p->top--;
+ upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
+ upb_sink_endseq(&p->top->sink, sel);
+}
+
+static void start_object(upb_json_parser *p) {
+ upb_sink_startmsg(&p->top->sink);
+}
+
+static void end_object(upb_json_parser *p) {
+ upb_status status;
+ upb_sink_endmsg(&p->top->sink, &status);
+}
+
+
#define CHECK_RETURN_TOP(x) if (!(x)) goto error
+
+/* The actual parser **********************************************************/
+
// What follows is the Ragel parser itself. The language is specified in Ragel
// and the actions call our C functions above.
+//
+// Ragel has an extensive set of functionality, and we use only a small part of
+// it. There are many action types but we only use a few:
+//
+// ">" -- transition into a machine
+// "%" -- transition out of a machine
+// "@" -- transition into a final state of a machine.
+//
+// "@" transitions are tricky because a machine can transition into a final
+// state repeatedly. But in some cases we know this can't happen, for example
+// a string which is delimited by a final '"' can only transition into its
+// final state once, when the closing '"' is seen.
-#line 596 "upb/json/parser.rl"
+#line 904 "upb/json/parser.rl"
-#line 514 "upb/json/parser.c"
+
+#line 816 "upb/json/parser.c"
static const char _json_actions[] = {
0, 1, 0, 1, 2, 1, 3, 1,
- 4, 1, 5, 1, 6, 1, 7, 1,
- 9, 1, 11, 1, 12, 1, 13, 1,
- 14, 1, 15, 1, 16, 1, 24, 1,
- 26, 2, 3, 7, 2, 5, 2, 2,
- 5, 7, 2, 10, 8, 2, 12, 14,
- 2, 13, 14, 2, 17, 1, 2, 18,
- 26, 2, 19, 8, 2, 20, 26, 2,
- 21, 26, 2, 22, 26, 2, 23, 26,
- 2, 25, 26, 3, 13, 10, 8
+ 5, 1, 6, 1, 7, 1, 8, 1,
+ 10, 1, 12, 1, 13, 1, 14, 1,
+ 15, 1, 16, 1, 17, 1, 21, 1,
+ 25, 1, 27, 2, 3, 8, 2, 4,
+ 5, 2, 6, 2, 2, 6, 8, 2,
+ 11, 9, 2, 13, 15, 2, 14, 15,
+ 2, 18, 1, 2, 19, 27, 2, 20,
+ 9, 2, 22, 27, 2, 23, 27, 2,
+ 24, 27, 2, 26, 27, 3, 14, 11,
+ 9
};
static const unsigned char _json_key_offsets[] = {
- 0, 0, 4, 9, 14, 18, 22, 27,
- 32, 37, 41, 45, 48, 51, 53, 57,
- 61, 63, 65, 70, 72, 74, 83, 89,
- 95, 101, 107, 109, 118, 118, 118, 123,
- 128, 133, 133, 134, 135, 136, 137, 137,
- 138, 139, 140, 140, 141, 142, 143, 143,
- 148, 153, 157, 161, 166, 171, 176, 180,
- 180, 183, 183, 183
+ 0, 0, 4, 9, 14, 15, 19, 24,
+ 29, 34, 38, 42, 45, 48, 50, 54,
+ 58, 60, 62, 67, 69, 71, 80, 86,
+ 92, 98, 104, 106, 115, 116, 116, 116,
+ 121, 126, 131, 132, 133, 134, 135, 135,
+ 136, 137, 138, 138, 139, 140, 141, 141,
+ 146, 151, 152, 156, 161, 166, 171, 175,
+ 175, 178, 178, 178
};
static const char _json_trans_keys[] = {
32, 123, 9, 13, 32, 34, 125, 9,
- 13, 32, 34, 125, 9, 13, 32, 58,
- 9, 13, 32, 58, 9, 13, 32, 93,
- 125, 9, 13, 32, 44, 125, 9, 13,
- 32, 44, 125, 9, 13, 32, 34, 9,
- 13, 45, 48, 49, 57, 48, 49, 57,
- 46, 69, 101, 48, 57, 69, 101, 48,
- 57, 43, 45, 48, 57, 48, 57, 48,
- 57, 46, 69, 101, 48, 57, 34, 92,
- 34, 92, 34, 47, 92, 98, 102, 110,
- 114, 116, 117, 48, 57, 65, 70, 97,
- 102, 48, 57, 65, 70, 97, 102, 48,
- 57, 65, 70, 97, 102, 48, 57, 65,
- 70, 97, 102, 34, 92, 34, 45, 91,
- 102, 110, 116, 123, 48, 57, 32, 93,
- 125, 9, 13, 32, 44, 93, 9, 13,
- 32, 93, 125, 9, 13, 97, 108, 115,
- 101, 117, 108, 108, 114, 117, 101, 32,
- 34, 125, 9, 13, 32, 34, 125, 9,
- 13, 32, 58, 9, 13, 32, 58, 9,
- 13, 32, 93, 125, 9, 13, 32, 44,
- 125, 9, 13, 32, 44, 125, 9, 13,
- 32, 34, 9, 13, 32, 9, 13, 0
+ 13, 32, 34, 125, 9, 13, 34, 32,
+ 58, 9, 13, 32, 93, 125, 9, 13,
+ 32, 44, 125, 9, 13, 32, 44, 125,
+ 9, 13, 32, 34, 9, 13, 45, 48,
+ 49, 57, 48, 49, 57, 46, 69, 101,
+ 48, 57, 69, 101, 48, 57, 43, 45,
+ 48, 57, 48, 57, 48, 57, 46, 69,
+ 101, 48, 57, 34, 92, 34, 92, 34,
+ 47, 92, 98, 102, 110, 114, 116, 117,
+ 48, 57, 65, 70, 97, 102, 48, 57,
+ 65, 70, 97, 102, 48, 57, 65, 70,
+ 97, 102, 48, 57, 65, 70, 97, 102,
+ 34, 92, 34, 45, 91, 102, 110, 116,
+ 123, 48, 57, 34, 32, 93, 125, 9,
+ 13, 32, 44, 93, 9, 13, 32, 93,
+ 125, 9, 13, 97, 108, 115, 101, 117,
+ 108, 108, 114, 117, 101, 32, 34, 125,
+ 9, 13, 32, 34, 125, 9, 13, 34,
+ 32, 58, 9, 13, 32, 93, 125, 9,
+ 13, 32, 44, 125, 9, 13, 32, 44,
+ 125, 9, 13, 32, 34, 9, 13, 32,
+ 9, 13, 0
};
static const char _json_single_lengths[] = {
- 0, 2, 3, 3, 2, 2, 3, 3,
+ 0, 2, 3, 3, 1, 2, 3, 3,
3, 2, 2, 1, 3, 0, 2, 2,
0, 0, 3, 2, 2, 9, 0, 0,
- 0, 0, 2, 7, 0, 0, 3, 3,
- 3, 0, 1, 1, 1, 1, 0, 1,
+ 0, 0, 2, 7, 1, 0, 0, 3,
+ 3, 3, 1, 1, 1, 1, 0, 1,
1, 1, 0, 1, 1, 1, 0, 3,
- 3, 2, 2, 3, 3, 3, 2, 0,
+ 3, 1, 2, 3, 3, 3, 2, 0,
1, 0, 0, 0
};
static const char _json_range_lengths[] = {
- 0, 1, 1, 1, 1, 1, 1, 1,
+ 0, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 0, 0, 0, 3, 3,
- 3, 3, 0, 1, 0, 0, 1, 1,
- 1, 0, 0, 0, 0, 0, 0, 0,
+ 3, 3, 0, 1, 0, 0, 0, 1,
+ 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1,
- 1, 1, 1, 1, 1, 1, 1, 0,
+ 1, 0, 1, 1, 1, 1, 1, 0,
1, 0, 0, 0
};
static const short _json_index_offsets[] = {
- 0, 0, 4, 9, 14, 18, 22, 27,
- 32, 37, 41, 45, 48, 52, 54, 58,
- 62, 64, 66, 71, 74, 77, 87, 91,
- 95, 99, 103, 106, 115, 116, 117, 122,
- 127, 132, 133, 135, 137, 139, 141, 142,
- 144, 146, 148, 149, 151, 153, 155, 156,
- 161, 166, 170, 174, 179, 184, 189, 193,
- 194, 197, 198, 199
+ 0, 0, 4, 9, 14, 16, 20, 25,
+ 30, 35, 39, 43, 46, 50, 52, 56,
+ 60, 62, 64, 69, 72, 75, 85, 89,
+ 93, 97, 101, 104, 113, 115, 116, 117,
+ 122, 127, 132, 134, 136, 138, 140, 141,
+ 143, 145, 147, 148, 150, 152, 154, 155,
+ 160, 165, 167, 171, 176, 181, 186, 190,
+ 191, 194, 195, 196
};
static const char _json_indicies[] = {
0, 2, 0, 1, 3, 4, 5, 3,
- 1, 6, 7, 8, 6, 1, 9, 10,
- 9, 1, 11, 12, 11, 1, 12, 1,
- 1, 12, 13, 14, 15, 16, 14, 1,
- 17, 18, 8, 17, 1, 18, 7, 18,
- 1, 19, 20, 21, 1, 20, 21, 1,
- 23, 24, 24, 22, 25, 1, 24, 24,
- 25, 22, 26, 26, 27, 1, 27, 1,
- 27, 22, 23, 24, 24, 21, 22, 29,
- 30, 28, 32, 33, 31, 34, 34, 34,
- 34, 34, 34, 34, 34, 35, 1, 36,
- 36, 36, 1, 37, 37, 37, 1, 38,
- 38, 38, 1, 39, 39, 39, 1, 41,
- 42, 40, 43, 44, 45, 46, 47, 48,
- 49, 44, 1, 50, 51, 53, 54, 1,
+ 1, 6, 7, 8, 6, 1, 9, 1,
+ 10, 11, 10, 1, 11, 1, 1, 11,
+ 12, 13, 14, 15, 13, 1, 16, 17,
+ 8, 16, 1, 17, 7, 17, 1, 18,
+ 19, 20, 1, 19, 20, 1, 22, 23,
+ 23, 21, 24, 1, 23, 23, 24, 21,
+ 25, 25, 26, 1, 26, 1, 26, 21,
+ 22, 23, 23, 20, 21, 28, 29, 27,
+ 31, 32, 30, 33, 33, 33, 33, 33,
+ 33, 33, 33, 34, 1, 35, 35, 35,
+ 1, 36, 36, 36, 1, 37, 37, 37,
+ 1, 38, 38, 38, 1, 40, 41, 39,
+ 42, 43, 44, 45, 46, 47, 48, 43,
+ 1, 49, 1, 50, 51, 53, 54, 1,
53, 52, 55, 56, 54, 55, 1, 56,
- 1, 1, 56, 52, 57, 58, 1, 59,
- 1, 60, 1, 61, 1, 62, 63, 1,
- 64, 1, 65, 1, 66, 67, 1, 68,
- 1, 69, 1, 70, 71, 72, 73, 71,
- 1, 74, 75, 76, 74, 1, 77, 78,
- 77, 1, 79, 80, 79, 1, 80, 1,
- 1, 80, 81, 82, 83, 84, 82, 1,
- 85, 86, 76, 85, 1, 86, 75, 86,
- 1, 87, 88, 88, 1, 1, 1, 1,
- 0
+ 1, 1, 56, 52, 57, 1, 58, 1,
+ 59, 1, 60, 1, 61, 62, 1, 63,
+ 1, 64, 1, 65, 66, 1, 67, 1,
+ 68, 1, 69, 70, 71, 72, 70, 1,
+ 73, 74, 75, 73, 1, 76, 1, 77,
+ 78, 77, 1, 78, 1, 1, 78, 79,
+ 80, 81, 82, 80, 1, 83, 84, 75,
+ 83, 1, 84, 74, 84, 1, 85, 86,
+ 86, 1, 1, 1, 1, 0
};
static const char _json_trans_targs[] = {
1, 0, 2, 3, 4, 56, 3, 4,
- 56, 5, 6, 5, 6, 7, 8, 9,
- 56, 8, 9, 11, 12, 18, 57, 13,
- 15, 14, 16, 17, 20, 58, 21, 20,
- 58, 21, 19, 22, 23, 24, 25, 26,
- 20, 58, 21, 28, 29, 30, 34, 39,
- 43, 47, 59, 59, 31, 30, 33, 31,
- 32, 59, 35, 36, 37, 38, 59, 40,
- 41, 42, 59, 44, 45, 46, 59, 48,
- 49, 55, 48, 49, 55, 50, 51, 50,
- 51, 52, 53, 54, 55, 53, 54, 59,
- 56
+ 56, 5, 5, 6, 7, 8, 9, 56,
+ 8, 9, 11, 12, 18, 57, 13, 15,
+ 14, 16, 17, 20, 58, 21, 20, 58,
+ 21, 19, 22, 23, 24, 25, 26, 20,
+ 58, 21, 28, 30, 31, 34, 39, 43,
+ 47, 29, 59, 59, 32, 31, 29, 32,
+ 33, 35, 36, 37, 38, 59, 40, 41,
+ 42, 59, 44, 45, 46, 59, 48, 49,
+ 55, 48, 49, 55, 50, 50, 51, 52,
+ 53, 54, 55, 53, 54, 59, 56
};
static const char _json_trans_actions[] = {
- 0, 0, 0, 21, 75, 48, 0, 42,
- 23, 17, 17, 0, 0, 15, 19, 19,
- 45, 0, 0, 0, 0, 0, 1, 0,
- 0, 0, 0, 0, 3, 13, 0, 0,
- 33, 5, 11, 0, 7, 0, 0, 0,
- 36, 39, 9, 57, 51, 25, 0, 0,
- 0, 29, 60, 54, 15, 0, 27, 0,
- 0, 31, 0, 0, 0, 0, 66, 0,
- 0, 0, 69, 0, 0, 0, 63, 21,
- 75, 48, 0, 42, 23, 17, 17, 0,
- 0, 15, 19, 19, 45, 0, 0, 72,
- 0
+ 0, 0, 0, 21, 77, 53, 0, 47,
+ 23, 17, 0, 0, 15, 19, 19, 50,
+ 0, 0, 0, 0, 0, 1, 0, 0,
+ 0, 0, 0, 3, 13, 0, 0, 35,
+ 5, 11, 0, 38, 7, 7, 7, 41,
+ 44, 9, 62, 56, 25, 0, 0, 0,
+ 31, 29, 33, 59, 15, 0, 27, 0,
+ 0, 0, 0, 0, 0, 68, 0, 0,
+ 0, 71, 0, 0, 0, 65, 21, 77,
+ 53, 0, 47, 23, 17, 0, 0, 15,
+ 19, 19, 50, 0, 0, 74, 0
};
static const int json_start = 1;
@@ -9255,13 +9597,14 @@ static const int json_en_value_machine = 27;
static const int json_en_main = 1;
-#line 599 "upb/json/parser.rl"
+#line 907 "upb/json/parser.rl"
size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const upb_bufhandle *handle) {
UPB_UNUSED(hd);
UPB_UNUSED(handle);
upb_json_parser *parser = closure;
+ parser->handle = handle;
// Variables used by Ragel's generated code.
int cs = parser->current_state;
@@ -9271,8 +9614,10 @@ size_t parse(void *closure, const void *hd, const char *buf, size_t size,
const char *p = buf;
const char *pe = buf + size;
+ capture_resume(parser, buf);
+
-#line 684 "upb/json/parser.c"
+#line 987 "upb/json/parser.c"
{
int _klen;
unsigned int _trans;
@@ -9347,114 +9692,118 @@ _match:
switch ( *_acts++ )
{
case 0:
-#line 517 "upb/json/parser.rl"
+#line 819 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
case 1:
-#line 518 "upb/json/parser.rl"
+#line 820 "upb/json/parser.rl"
{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
break;
case 2:
-#line 522 "upb/json/parser.rl"
+#line 824 "upb/json/parser.rl"
{ start_text(parser, p); }
break;
case 3:
-#line 523 "upb/json/parser.rl"
- { CHECK_RETURN_TOP(end_text(parser, p, false)); }
+#line 825 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(end_text(parser, p)); }
break;
case 4:
-#line 529 "upb/json/parser.rl"
- { start_hex(parser, p); }
+#line 831 "upb/json/parser.rl"
+ { start_hex(parser); }
break;
case 5:
-#line 530 "upb/json/parser.rl"
- { hex(parser, p); }
+#line 832 "upb/json/parser.rl"
+ { hexdigit(parser, p); }
break;
case 6:
-#line 536 "upb/json/parser.rl"
- { escape(parser, p); }
+#line 833 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(end_hex(parser)); }
break;
case 7:
-#line 539 "upb/json/parser.rl"
- { {cs = stack[--top]; goto _again;} }
+#line 839 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(escape(parser, p)); }
break;
case 8:
-#line 540 "upb/json/parser.rl"
- { {stack[top++] = cs; cs = 19; goto _again;} }
+#line 845 "upb/json/parser.rl"
+ { p--; {cs = stack[--top]; goto _again;} }
break;
case 9:
-#line 542 "upb/json/parser.rl"
- { p--; {stack[top++] = cs; cs = 27; goto _again;} }
+#line 848 "upb/json/parser.rl"
+ { {stack[top++] = cs; cs = 19; goto _again;} }
break;
case 10:
-#line 547 "upb/json/parser.rl"
- { start_member(parser); }
+#line 850 "upb/json/parser.rl"
+ { p--; {stack[top++] = cs; cs = 27; goto _again;} }
break;
case 11:
-#line 548 "upb/json/parser.rl"
- { CHECK_RETURN_TOP(end_member(parser)); }
+#line 855 "upb/json/parser.rl"
+ { start_member(parser); }
break;
case 12:
-#line 551 "upb/json/parser.rl"
- { clear_member(parser); }
+#line 856 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(end_member(parser)); }
break;
case 13:
-#line 557 "upb/json/parser.rl"
- { start_object(parser); }
+#line 859 "upb/json/parser.rl"
+ { clear_member(parser); }
break;
case 14:
-#line 560 "upb/json/parser.rl"
- { end_object(parser); }
+#line 865 "upb/json/parser.rl"
+ { start_object(parser); }
break;
case 15:
-#line 566 "upb/json/parser.rl"
- { CHECK_RETURN_TOP(start_array(parser)); }
+#line 868 "upb/json/parser.rl"
+ { end_object(parser); }
break;
case 16:
-#line 570 "upb/json/parser.rl"
- { end_array(parser); }
+#line 874 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(start_array(parser)); }
break;
case 17:
-#line 575 "upb/json/parser.rl"
- { start_number(parser, p); }
+#line 878 "upb/json/parser.rl"
+ { end_array(parser); }
break;
case 18:
-#line 576 "upb/json/parser.rl"
- { end_number(parser, p); }
+#line 883 "upb/json/parser.rl"
+ { start_number(parser, p); }
break;
case 19:
-#line 578 "upb/json/parser.rl"
- { CHECK_RETURN_TOP(start_stringval(parser)); }
+#line 884 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(end_number(parser, p)); }
break;
case 20:
-#line 579 "upb/json/parser.rl"
- { end_stringval(parser); }
+#line 886 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(start_stringval(parser)); }
break;
case 21:
-#line 581 "upb/json/parser.rl"
- { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
+#line 887 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(end_stringval(parser)); }
break;
case 22:
-#line 583 "upb/json/parser.rl"
- { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
+#line 889 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(parser_putbool(parser, true)); }
break;
case 23:
-#line 585 "upb/json/parser.rl"
- { /* null value */ }
+#line 891 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(parser_putbool(parser, false)); }
break;
case 24:
-#line 587 "upb/json/parser.rl"
- { CHECK_RETURN_TOP(start_subobject(parser)); }
+#line 893 "upb/json/parser.rl"
+ { /* null value */ }
break;
case 25:
-#line 588 "upb/json/parser.rl"
- { end_subobject(parser); }
+#line 895 "upb/json/parser.rl"
+ { CHECK_RETURN_TOP(start_subobject(parser)); }
break;
case 26:
-#line 593 "upb/json/parser.rl"
+#line 896 "upb/json/parser.rl"
+ { end_subobject(parser); }
+ break;
+ case 27:
+#line 901 "upb/json/parser.rl"
{ p--; {cs = stack[--top]; goto _again;} }
break;
-#line 866 "upb/json/parser.c"
+#line 1173 "upb/json/parser.c"
}
}
@@ -9467,10 +9816,12 @@ _again:
_out: {}
}
-#line 615 "upb/json/parser.rl"
+#line 926 "upb/json/parser.rl"
if (p != pe) {
upb_status_seterrf(parser->status, "Parse error at %s\n", p);
+ } else {
+ capture_suspend(parser, &p);
}
error:
@@ -9487,8 +9838,13 @@ bool end(void *closure, const void *hd) {
return true;
}
+
+/* Public API *****************************************************************/
+
void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
p->limit = p->stack + UPB_JSON_MAX_DEPTH;
+ p->accumulate_buf = NULL;
+ p->accumulate_buf_size = 0;
upb_byteshandler_init(&p->input_handler_);
upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
@@ -9498,6 +9854,7 @@ void upb_json_parser_init(upb_json_parser *p, upb_status *status) {
void upb_json_parser_uninit(upb_json_parser *p) {
upb_byteshandler_uninit(&p->input_handler_);
+ free(p->accumulate_buf);
}
void upb_json_parser_reset(upb_json_parser *p) {
@@ -9508,18 +9865,18 @@ void upb_json_parser_reset(upb_json_parser *p) {
int top;
// Emit Ragel initialization of the parser.
-#line 920 "upb/json/parser.c"
+#line 1235 "upb/json/parser.c"
{
cs = json_start;
top = 0;
}
-#line 655 "upb/json/parser.rl"
+#line 974 "upb/json/parser.rl"
p->current_state = cs;
p->parser_top = top;
- p->text_begin = NULL;
- p->accumulated = NULL;
- p->accumulated_len = 0;
+ accumulate_clear(p);
+ p->multipart_state = MULTIPART_INACTIVE;
+ p->capture = NULL;
}
void upb_json_parser_resetoutput(upb_json_parser *p, upb_sink *sink) {
diff --git a/ruby/ext/google/protobuf_c/upb.h b/ruby/ext/google/protobuf_c/upb.h
index 150aef10..fbcb8e99 100644
--- a/ruby/ext/google/protobuf_c/upb.h
+++ b/ruby/ext/google/protobuf_c/upb.h
@@ -600,6 +600,9 @@ typedef struct {
// Like strdup(), which isn't always available since it's not ANSI C.
char *upb_strdup(const char *s);
+// Variant that works with a length-delimited rather than NULL-delimited string,
+// as supported by strtable.
+char *upb_strdup2(const char *s, size_t len);
UPB_INLINE void _upb_value_setval(upb_value *v, _upb_value val,
upb_ctype_t ctype) {
@@ -654,12 +657,24 @@ FUNCS(fptr, fptr, upb_func*, UPB_CTYPE_FPTR);
typedef union {
uintptr_t num;
- const char *str; // We own, nullz.
+ struct {
+ // We own this. NULL-terminated but may also contain binary data; see
+ // explicit length below.
+ // TODO: move the length to the start of the string in order to reduce
+ // tabkey's size (to one machine word) in a way that supports static
+ // initialization.
+ const char *str;
+ size_t length;
+ } s;
} upb_tabkey;
#define UPB_TABKEY_NUM(n) {n}
#ifdef UPB_C99
-#define UPB_TABKEY_STR(s) {.str = s}
+// Given that |s| is a string literal, sizeof(s) gives us a
+// compile-time-constant strlen(). We must ensure that this works for static
+// data initializers.
+#define UPB_TABKEY_STR(strval) { .s = { .str = strval, \
+ .length = sizeof(strval) - 1 } }
#endif
// TODO(haberman): C++
#define UPB_TABKEY_NONE {0}
@@ -765,7 +780,14 @@ UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
// If a table resize was required but memory allocation failed, false is
// returned and the table is unchanged.
bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
-bool upb_strtable_insert(upb_strtable *t, const char *key, upb_value val);
+bool upb_strtable_insert2(upb_strtable *t, const char *key, size_t len,
+ upb_value val);
+
+// For NULL-terminated strings.
+UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
+ upb_value val) {
+ return upb_strtable_insert2(t, key, strlen(key), val);
+}
// Looks up key in this table, returning "true" if the key was found.
// If v is non-NULL, copies the value for this key into *v.
@@ -782,7 +804,14 @@ UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
// Removes an item from the table. Returns true if the remove was successful,
// and stores the removed item in *val if non-NULL.
bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
-bool upb_strtable_remove(upb_strtable *t, const char *key, upb_value *val);
+bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
+ upb_value *val);
+
+// For NULL-terminated strings.
+UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
+ upb_value *v) {
+ return upb_strtable_remove2(t, key, strlen(key), v);
+}
// Updates an existing entry in an inttable. If the entry does not exist,
// returns false and does nothing. Unlike insert/remove, this does not
@@ -876,6 +905,7 @@ void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
void upb_strtable_next(upb_strtable_iter *i);
bool upb_strtable_done(const upb_strtable_iter *i);
const char *upb_strtable_iter_key(upb_strtable_iter *i);
+size_t upb_strtable_iter_keylength(upb_strtable_iter *i);
upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
void upb_strtable_iter_setdone(upb_strtable_iter *i);
bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
@@ -1777,6 +1807,10 @@ UPB_DEFINE_DEF(upb::MessageDef, msgdef, MSG, UPB_QUOTE(
// just be moved into symtab.c?
MessageDef* Dup(const void* owner) const;
+ // Is this message a map entry?
+ void setmapentry(bool map_entry);
+ bool mapentry() const;
+
// Iteration over fields. The order is undefined.
class iterator : public std::iterator<std::forward_iterator_tag, FieldDef*> {
public:
@@ -1823,6 +1857,11 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def,
upb_inttable itof; // int to field
upb_strtable ntof; // name to field
+ // Is this a map-entry message?
+ // TODO: set this flag properly for static descriptors; regenerate
+ // descriptor.upb.c.
+ bool map_entry;
+
// TODO(haberman): proper extension ranges (there can be multiple).
));
@@ -1830,7 +1869,7 @@ UPB_DEFINE_STRUCT(upb_msgdef, upb_def,
refs, ref2s) \
{ \
UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count, \
- submsg_field_count, itof, ntof \
+ submsg_field_count, itof, ntof, false \
}
UPB_BEGIN_EXTERN_C // {
@@ -1878,6 +1917,9 @@ UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m,
return (upb_fielddef *)upb_msgdef_ntof(m, name, len);
}
+void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry);
+bool upb_msgdef_mapentry(const upb_msgdef *m);
+
// upb_msg_iter i;
// for(upb_msg_begin(&i, m); !upb_msg_done(&i); upb_msg_next(&i)) {
// upb_fielddef *f = upb_msg_iter_field(&i);
@@ -2331,6 +2373,12 @@ inline const FieldDef *MessageDef::FindFieldByName(const char *name,
inline MessageDef* MessageDef::Dup(const void *owner) const {
return upb_msgdef_dup(this, owner);
}
+inline void MessageDef::setmapentry(bool map_entry) {
+ upb_msgdef_setmapentry(this, map_entry);
+}
+inline bool MessageDef::mapentry() const {
+ return upb_msgdef_mapentry(this);
+}
inline MessageDef::iterator MessageDef::begin() { return iterator(this); }
inline MessageDef::iterator MessageDef::end() { return iterator::end(this); }
inline MessageDef::const_iterator MessageDef::begin() const {
@@ -6614,7 +6662,9 @@ typedef enum {
// | unused (24) | opc |
// | upb_inttable* (32 or 64) |
- OP_HALT = 36, // No arg.
+ OP_DISPATCH = 36, // No arg.
+
+ OP_HALT = 37, // No arg.
} opcode;
#define OP_MAX OP_HALT
@@ -7291,15 +7341,24 @@ UPB_DEFINE_STRUCT0(upb_json_parser,
int parser_stack[UPB_JSON_MAX_DEPTH];
int parser_top;
- // A pointer to the beginning of whatever text we are currently parsing.
- const char *text_begin;
+ // The handle for the current buffer.
+ const upb_bufhandle *handle;
- // We have to accumulate text for member names, integers, unicode escapes, and
- // base64 partial results.
+ // Accumulate buffer. See details in parser.rl.
const char *accumulated;
size_t accumulated_len;
- // TODO: add members and code for allocating a buffer when necessary (when the
- // member spans input buffers or contains escapes).
+ char *accumulate_buf;
+ size_t accumulate_buf_size;
+
+ // Multi-part text data. See details in parser.rl.
+ int multipart_state;
+ upb_selector_t string_selector;
+
+ // Input capture. See details in parser.rl.
+ const char *capture;
+
+ // Intermediate result of parsing a unicode escape sequence.
+ uint32_t digit;
));
UPB_BEGIN_EXTERN_C