diff options
author | Josh Haberman <jhaberman@gmail.com> | 2016-07-20 22:07:36 -0700 |
---|---|---|
committer | Josh Haberman <jhaberman@gmail.com> | 2016-07-21 11:37:54 -0700 |
commit | ff7f68ae9fcf4f4e3bdef3c8c372c7ae91b7720b (patch) | |
tree | e63582ff6cb9457f0b3156b8c03f226a42eab45d | |
parent | 30d8416c1cf69c7e349811b38a91dbc810fc66bd (diff) |
Ruby: encode and freeze strings when the are assigned or decoded.
-rw-r--r-- | ruby/ext/google/protobuf_c/encode_decode.c | 53 | ||||
-rw-r--r-- | ruby/ext/google/protobuf_c/map.c | 20 | ||||
-rw-r--r-- | ruby/ext/google/protobuf_c/protobuf.h | 3 | ||||
-rw-r--r-- | ruby/ext/google/protobuf_c/repeated_field.c | 5 | ||||
-rw-r--r-- | ruby/ext/google/protobuf_c/storage.c | 39 | ||||
-rw-r--r-- | ruby/ext/google/protobuf_c/upb.c | 2 | ||||
-rw-r--r-- | ruby/tests/basic.rb | 42 |
7 files changed, 104 insertions, 60 deletions
diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index f6bea50f..dd21a046 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -54,7 +54,7 @@ VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) { static const void* newhandlerdata(upb_handlers* h, uint32_t ofs) { size_t* hd_ofs = ALLOC(size_t); *hd_ofs = ofs; - upb_handlers_addcleanup(h, hd_ofs, free); + upb_handlers_addcleanup(h, hd_ofs, xfree); return hd_ofs; } @@ -69,7 +69,7 @@ static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs, submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t); hd->ofs = ofs; hd->md = upb_fielddef_msgsubdef(f); - upb_handlers_addcleanup(h, hd, free); + upb_handlers_addcleanup(h, hd, xfree); return hd; } @@ -99,7 +99,7 @@ static const void *newoneofhandlerdata(upb_handlers *h, } else { hd->md = NULL; } - upb_handlers_addcleanup(h, hd, free); + upb_handlers_addcleanup(h, hd, xfree); return hd; } @@ -135,7 +135,7 @@ static void* appendstr_handler(void *closure, VALUE ary = (VALUE)closure; VALUE str = rb_str_new2(""); rb_enc_associate(str, kRubyStringUtf8Encoding); - RepeatedField_push(ary, str); + RepeatedField_push_native(ary, &str); return (void*)str; } @@ -146,7 +146,7 @@ static void* appendbytes_handler(void *closure, VALUE ary = (VALUE)closure; VALUE str = rb_str_new2(""); rb_enc_associate(str, kRubyString8bitEncoding); - RepeatedField_push(ary, str); + RepeatedField_push_native(ary, &str); return (void*)str; } @@ -182,6 +182,23 @@ static size_t stringdata_handler(void* closure, const void* hd, return len; } +static bool stringdata_end_handler(void* closure, const void* hd) { + MessageHeader* msg = closure; + const size_t *ofs = hd; + VALUE rb_str = DEREF(msg, *ofs, VALUE); + rb_obj_freeze(rb_str); + return true; +} + +static bool appendstring_end_handler(void* closure, const void* hd) { + VALUE ary = (VALUE)closure; + int size = RepeatedField_size(ary); + VALUE* last = RepeatedField_index_native(ary, size - 1); + VALUE rb_str = *last; + rb_obj_freeze(rb_str); + return true; +} + // Appends a submessage to a repeated field (a regular Ruby array for now). static void *appendsubmsg_handler(void *closure, const void *hd) { VALUE ary = (VALUE)closure; @@ -281,7 +298,7 @@ static bool endmap_handler(void *closure, const void *hd, upb_status* s) { &frame->value_storage); Map_index_set(frame->map, key, value); - free(frame); + xfree(frame); return true; } @@ -360,6 +377,13 @@ static void *oneofbytes_handler(void *closure, return (void*)str; } +static bool oneofstring_end_handler(void* closure, const void* hd) { + MessageHeader* msg = closure; + const oneof_handlerdata_t *oneofdata = hd; + rb_obj_freeze(DEREF(msg, oneofdata->ofs, VALUE)); + return true; +} + // Handler for a submessage field in a oneof. static void *oneofsubmsg_handler(void *closure, const void *hd) { @@ -426,6 +450,7 @@ static void add_handlers_for_repeated_field(upb_handlers *h, appendbytes_handler : appendstr_handler, NULL); upb_handlers_setstring(h, f, stringdata_handler, NULL); + upb_handlers_setendstr(h, f, appendstring_end_handler, NULL); break; } case UPB_TYPE_MESSAGE: { @@ -462,6 +487,7 @@ static void add_handlers_for_singular_field(upb_handlers *h, is_bytes ? bytes_handler : str_handler, &attr); upb_handlers_setstring(h, f, stringdata_handler, &attr); + upb_handlers_setendstr(h, f, stringdata_end_handler, &attr); upb_handlerattr_uninit(&attr); break; } @@ -484,7 +510,7 @@ static void add_handlers_for_mapfield(upb_handlers* h, map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlers_addcleanup(h, hd, free); + upb_handlers_addcleanup(h, hd, xfree); upb_handlerattr_sethandlerdata(&attr, hd); upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr); upb_handlerattr_uninit(&attr); @@ -499,7 +525,7 @@ static void add_handlers_for_mapentry(const upb_msgdef* msgdef, map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc); upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; - upb_handlers_addcleanup(h, hd, free); + upb_handlers_addcleanup(h, hd, xfree); upb_handlerattr_sethandlerdata(&attr, hd); upb_handlers_setendmsg(h, endmap_handler, &attr); @@ -546,6 +572,7 @@ static void add_handlers_for_oneof_field(upb_handlers *h, oneofbytes_handler : oneofstr_handler, &attr); upb_handlers_setstring(h, f, stringdata_handler, NULL); + upb_handlers_setendstr(h, f, oneofstring_end_handler, &attr); break; } case UPB_TYPE_MESSAGE: { @@ -863,9 +890,13 @@ static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) { assert(BUILTIN_TYPE(str) == RUBY_T_STRING); - // Ensure that the string has the correct encoding. We also check at field-set - // time, but the user may have mutated the string object since then. - native_slot_validate_string_encoding(upb_fielddef_type(f), str); + // We should be guaranteed that the string has the correct encoding because + // we ensured this at assignment time and then froze the string. + if (upb_fielddef_type(f) == UPB_TYPE_STRING) { + assert(rb_enc_from_index(ENCODING_GET(value)) == kRubyStringUtf8Encoding); + } else { + assert(rb_enc_from_index(ENCODING_GET(value)) == kRubyString8bitEncoding); + } upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str), &subsink); diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c index 92fc7286..12f1f9dd 100644 --- a/ruby/ext/google/protobuf_c/map.c +++ b/ruby/ext/google/protobuf_c/map.c @@ -63,16 +63,16 @@ // construct a key byte sequence if needed. |out_key| and |out_length| provide // the resulting key data/length. #define TABLE_KEY_BUF_LENGTH 8 // sizeof(uint64_t) -static void table_key(Map* self, VALUE key, - char* buf, - const char** out_key, - size_t* out_length) { +static VALUE table_key(Map* self, VALUE key, + char* buf, + const char** out_key, + size_t* out_length) { switch (self->key_type) { case UPB_TYPE_BYTES: case UPB_TYPE_STRING: // Strings: use string content directly. Check_Type(key, T_STRING); - native_slot_validate_string_encoding(self->key_type, key); + key = native_slot_encode_and_freeze_string(self->key_type, key); *out_key = RSTRING_PTR(key); *out_length = RSTRING_LEN(key); break; @@ -93,6 +93,8 @@ static void table_key(Map* self, VALUE key, assert(false); break; } + + return key; } static VALUE table_key_to_ruby(Map* self, const char* buf, size_t length) { @@ -357,7 +359,7 @@ VALUE Map_index(VALUE _self, VALUE key) { const char* keyval = NULL; size_t length = 0; upb_value v; - table_key(self, key, keybuf, &keyval, &length); + key = table_key(self, key, keybuf, &keyval, &length); if (upb_strtable_lookup2(&self->table, keyval, length, &v)) { void* mem = value_memory(&v); @@ -383,7 +385,7 @@ VALUE Map_index_set(VALUE _self, VALUE key, VALUE value) { size_t length = 0; upb_value v; void* mem; - table_key(self, key, keybuf, &keyval, &length); + key = table_key(self, key, keybuf, &keyval, &length); mem = value_memory(&v); native_slot_set(self->value_type, self->value_type_class, mem, value); @@ -411,7 +413,7 @@ VALUE Map_has_key(VALUE _self, VALUE key) { char keybuf[TABLE_KEY_BUF_LENGTH]; const char* keyval = NULL; size_t length = 0; - table_key(self, key, keybuf, &keyval, &length); + key = table_key(self, key, keybuf, &keyval, &length); if (upb_strtable_lookup2(&self->table, keyval, length, NULL)) { return Qtrue; @@ -434,7 +436,7 @@ VALUE Map_delete(VALUE _self, VALUE key) { const char* keyval = NULL; size_t length = 0; upb_value v; - table_key(self, key, keybuf, &keyval, &length); + key = table_key(self, key, keybuf, &keyval, &length); if (upb_strtable_remove2(&self->table, keyval, length, &v)) { void* mem = value_memory(&v); diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 2834c894..08b8d516 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -313,7 +313,7 @@ void native_slot_dup(upb_fieldtype_t type, void* to, void* from); void native_slot_deep_copy(upb_fieldtype_t type, void* to, void* from); bool native_slot_eq(upb_fieldtype_t type, void* mem1, void* mem2); -void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value); +VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value); void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE value); extern rb_encoding* kRubyStringUtf8Encoding; @@ -366,6 +366,7 @@ RepeatedField* ruby_to_RepeatedField(VALUE value); VALUE RepeatedField_each(VALUE _self); VALUE RepeatedField_index(int argc, VALUE* argv, VALUE _self); void* RepeatedField_index_native(VALUE _self, int index); +int RepeatedField_size(VALUE _self); VALUE RepeatedField_index_set(VALUE _self, VALUE _index, VALUE val); void RepeatedField_reserve(RepeatedField* self, int new_size); VALUE RepeatedField_push(VALUE _self, VALUE val); diff --git a/ruby/ext/google/protobuf_c/repeated_field.c b/ruby/ext/google/protobuf_c/repeated_field.c index 83afbc91..47c207a5 100644 --- a/ruby/ext/google/protobuf_c/repeated_field.c +++ b/ruby/ext/google/protobuf_c/repeated_field.c @@ -244,6 +244,11 @@ void* RepeatedField_index_native(VALUE _self, int index) { return RepeatedField_memoryat(self, index, element_size); } +int RepeatedField_size(VALUE _self) { + RepeatedField* self = ruby_to_RepeatedField(_self); + return self->size; +} + /* * Private ruby method, used by RepeatedField.pop */ diff --git a/ruby/ext/google/protobuf_c/storage.c b/ruby/ext/google/protobuf_c/storage.c index 1c839781..3ff2bda6 100644 --- a/ruby/ext/google/protobuf_c/storage.c +++ b/ruby/ext/google/protobuf_c/storage.c @@ -117,25 +117,24 @@ void native_slot_check_int_range_precision(upb_fieldtype_t type, VALUE val) { } } -void native_slot_validate_string_encoding(upb_fieldtype_t type, VALUE value) { - bool bad_encoding = false; - rb_encoding* string_encoding = rb_enc_from_index(ENCODING_GET(value)); - if (type == UPB_TYPE_STRING) { - bad_encoding = - string_encoding != kRubyStringUtf8Encoding && - string_encoding != kRubyStringASCIIEncoding; - } else { - bad_encoding = - string_encoding != kRubyString8bitEncoding; - } - // Check that encoding is UTF-8 or ASCII (for string fields) or ASCII-8BIT - // (for bytes fields). - if (bad_encoding) { - rb_raise(rb_eTypeError, "Encoding for '%s' fields must be %s (was %s)", - (type == UPB_TYPE_STRING) ? "string" : "bytes", - (type == UPB_TYPE_STRING) ? "UTF-8 or ASCII" : "ASCII-8BIT", - rb_enc_name(string_encoding)); +VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) { + rb_encoding* desired_encoding = (type == UPB_TYPE_STRING) ? + kRubyStringUtf8Encoding : kRubyString8bitEncoding; + VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding); + + // Note: this will not duplicate underlying string data unless necessary. + value = rb_str_encode(value, desired_encoding_value, 0, Qnil); + + if (type == UPB_TYPE_STRING && + rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) { + rb_raise(rb_eEncodingError, "String is invalid UTF-8"); } + + // Ensure the data remains valid. Since we called #encode a moment ago, + // this does not freeze the string the user assigned. + rb_obj_freeze(value); + + return value; } void native_slot_set(upb_fieldtype_t type, VALUE type_class, @@ -181,8 +180,8 @@ void native_slot_set_value_and_case(upb_fieldtype_t type, VALUE type_class, if (CLASS_OF(value) != rb_cString) { rb_raise(rb_eTypeError, "Invalid argument for string field."); } - native_slot_validate_string_encoding(type, value); - DEREF(memory, VALUE) = value; + + DEREF(memory, VALUE) = native_slot_encode_and_freeze_string(type, value); break; } case UPB_TYPE_MESSAGE: { diff --git a/ruby/ext/google/protobuf_c/upb.c b/ruby/ext/google/protobuf_c/upb.c index 74a2a1db..976a3934 100644 --- a/ruby/ext/google/protobuf_c/upb.c +++ b/ruby/ext/google/protobuf_c/upb.c @@ -11076,8 +11076,8 @@ static bool end_stringval(upb_json_parser *p) { case UPB_TYPE_STRING: { upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); - upb_sink_endstr(&p->top->sink, sel); p->top--; + upb_sink_endstr(&p->top->sink, sel); break; } diff --git a/ruby/tests/basic.rb b/ruby/tests/basic.rb index fee07e33..c7d8cdd1 100644 --- a/ruby/tests/basic.rb +++ b/ruby/tests/basic.rb @@ -255,14 +255,17 @@ module BasicTest m = TestMessage.new # Assigning a normal (ASCII or UTF8) string to a bytes field, or - # ASCII-8BIT to a string field, raises an error. - assert_raise TypeError do - m.optional_bytes = "Test string ASCII".encode!('ASCII') - end - assert_raise TypeError do + # ASCII-8BIT to a string field will convert to the proper encoding. + m.optional_bytes = "Test string ASCII".encode!('ASCII') + assert m.optional_bytes.frozen? + assert_equal Encoding::ASCII_8BIT, m.optional_bytes.encoding + assert_equal "Test string ASCII", m.optional_bytes + + assert_raise Encoding::UndefinedConversionError do m.optional_bytes = "Test string UTF-8 \u0100".encode!('UTF-8') end - assert_raise TypeError do + + assert_raise Encoding::UndefinedConversionError do m.optional_string = ["FFFF"].pack('H*') end @@ -270,11 +273,10 @@ module BasicTest m.optional_bytes = ["FFFF"].pack('H*') m.optional_string = "\u0100" - # strings are mutable so we can do this, but serialize should catch it. + # strings are immutable so we can't do this, but serialize should catch it. m.optional_string = "asdf".encode!('UTF-8') - m.optional_string.encode!('ASCII-8BIT') - assert_raise TypeError do - data = TestMessage.encode(m) + assert_raise RuntimeError do + m.optional_string.encode!('ASCII-8BIT') end end @@ -558,7 +560,7 @@ module BasicTest assert_raise TypeError do m[1] = 1 end - assert_raise TypeError do + assert_raise Encoding::UndefinedConversionError do bytestring = ["FFFF"].pack("H*") m[bytestring] = 1 end @@ -566,9 +568,8 @@ module BasicTest m = Google::Protobuf::Map.new(:bytes, :int32) bytestring = ["FFFF"].pack("H*") m[bytestring] = 1 - assert_raise TypeError do - m["asdf"] = 1 - end + # Allowed -- we will automatically convert to ASCII-8BIT. + m["asdf"] = 1 assert_raise TypeError do m[1] = 1 end @@ -853,15 +854,20 @@ module BasicTest def test_encode_decode_helpers m = TestMessage.new(:optional_string => 'foo', :repeated_string => ['bar1', 'bar2']) + assert_equal 'foo', m.optional_string + assert_equal ['bar1', 'bar2'], m.repeated_string + json = m.to_json m2 = TestMessage.decode_json(json) - assert m2.optional_string == 'foo' - assert m2.repeated_string == ['bar1', 'bar2'] + assert_equal 'foo', m2.optional_string + assert_equal ['bar1', 'bar2'], m2.repeated_string + assert m2.optional_string.frozen? + assert m2.repeated_string[0].frozen? proto = m.to_proto m2 = TestMessage.decode(proto) - assert m2.optional_string == 'foo' - assert m2.repeated_string == ['bar1', 'bar2'] + assert_equal 'foo', m2.optional_string + assert_equal ['bar1', 'bar2'], m2.repeated_string end def test_protobuf_encode_decode_helpers |