diff options
24 files changed, 699 insertions, 160 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index fe90cb9a..ea3252bb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,4 @@ -2017-11-09 version 3.5.0 (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript) +2017-11-13 version 3.5.0 (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript) Planned Future Changes * Make C++ implementation C++11 only: we plan to require C++11 to build protobuf code starting from 3.6.0 release. Please join this github issue: @@ -25,7 +25,7 @@ Java * Proto3 messages are now preserving unknown fields by default. If you’d like - to drop unknown fields, please use the DiscardUnknownFieldsParser API. For + to drop unknown fields, please use the DiscardUnknownFieldsParser API. For example: Parser<Foo> parser = DiscardUnknownFieldsParser.wrap(Foo.parser()); Foo foo = parser.parseFrom(input); @@ -168,7 +168,7 @@ * Note: AppEngine 2.5 is deprecated on June 2017 that AppEngine 2.5 will never update protobuf runtime. Users who depend on AppEngine 2.5 should use old protoc. - + PHP * Support PHP generic services. Specify file option php_generic_service=true to enable generating service interface. diff --git a/Makefile.am b/Makefile.am index cd1cfc3e..cba02e3d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -907,6 +907,7 @@ ruby_EXTRA_DIST= \ ruby/src/main/java/google/ProtobufJavaService.java \ ruby/src/main/sentinel.proto \ ruby/tests/basic.rb \ + ruby/tests/encode_decode_test.rb \ ruby/tests/gc_test.rb \ ruby/tests/repeated_field_test.rb \ ruby/tests/stress.rb \ diff --git a/php/ext/google/protobuf/encode_decode.c b/php/ext/google/protobuf/encode_decode.c index 7e2b3ae6..b98121bb 100644 --- a/php/ext/google/protobuf/encode_decode.c +++ b/php/ext/google/protobuf/encode_decode.c @@ -1402,7 +1402,6 @@ static void putarray(zval* array, const upb_fielddef* f, upb_sink* sink, RepeatedField* intern = UNBOX(RepeatedField, array); HashTable *ht = PHP_PROTO_HASH_OF(intern->array); size = zend_hash_num_elements(ht); - // size = zend_hash_num_elements(PHP_PROTO_HASH_OF(intern->array)); if (size == 0) return; upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink); @@ -1614,3 +1613,102 @@ PHP_METHOD(Message, mergeFromJsonString) { stackenv_uninit(&se); } } + +// TODO(teboring): refactoring with putrawmsg +static void discard_unknown_fields(MessageHeader* msg) { + upb_msg_field_iter it; + + stringsink* unknown = DEREF(message_data(msg), 0, stringsink*); + if (unknown != NULL) { + stringsink_uninit(unknown); + DEREF(message_data(msg), 0, stringsink*) = NULL; + } + + // Recursively discard unknown fields of submessages. + Descriptor* desc = msg->descriptor; + TSRMLS_FETCH(); + for (upb_msg_field_begin(&it, desc->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + upb_fielddef* f = upb_msg_iter_field(&it); + uint32_t offset = desc->layout->fields[upb_fielddef_index(f)].offset; + bool containing_oneof = false; + + if (upb_fielddef_containingoneof(f)) { + uint32_t oneof_case_offset = + desc->layout->fields[upb_fielddef_index(f)].case_offset; + // For a oneof, check that this field is actually present -- skip all the + // below if not. + if (DEREF(message_data(msg), oneof_case_offset, uint32_t) != + upb_fielddef_number(f)) { + continue; + } + // Otherwise, fall through to the appropriate singular-field handler + // below. + containing_oneof = true; + } + + if (is_map_field(f)) { + MapIter map_it; + int len, size; + const upb_fielddef* value_field; + + value_field = map_field_value(f); + if (!upb_fielddef_issubmsg(value_field)) continue; + + zval* map_php = CACHED_PTR_TO_ZVAL_PTR( + DEREF(message_data(msg), offset, CACHED_VALUE*)); + if (map_php == NULL) continue; + + Map* intern = UNBOX(Map, map_php); + for (map_begin(map_php, &map_it TSRMLS_CC); + !map_done(&map_it); map_next(&map_it)) { + upb_value value = map_iter_value(&map_it, &len); + void* memory = raw_value(upb_value_memory(&value), value_field); +#if PHP_MAJOR_VERSION < 7 + MessageHeader *submsg = UNBOX(MessageHeader, *(zval**)memory); +#else + MessageHeader *submsg = + (MessageHeader*)((char*)(Z_OBJ_P((zval*)memory)) - + XtOffsetOf(MessageHeader, std)); +#endif + discard_unknown_fields(submsg); + } + } else if (upb_fielddef_isseq(f)) { + if (!upb_fielddef_issubmsg(f)) continue; + + zval* array_php = CACHED_PTR_TO_ZVAL_PTR( + DEREF(message_data(msg), offset, CACHED_VALUE*)); + if (array_php == NULL) continue; + + int size, i; + RepeatedField* intern = UNBOX(RepeatedField, array_php); + HashTable *ht = PHP_PROTO_HASH_OF(intern->array); + size = zend_hash_num_elements(ht); + if (size == 0) continue; + + for (i = 0; i < size; i++) { + void* memory = repeated_field_index_native(intern, i TSRMLS_CC); +#if PHP_MAJOR_VERSION < 7 + MessageHeader *submsg = UNBOX(MessageHeader, *(zval**)memory); +#else + MessageHeader *submsg = + (MessageHeader*)((char*)(Z_OBJ_P((zval*)memory)) - + XtOffsetOf(MessageHeader, std)); +#endif + discard_unknown_fields(submsg); + } + } else if (upb_fielddef_issubmsg(f)) { + zval* submsg_php = CACHED_PTR_TO_ZVAL_PTR( + DEREF(message_data(msg), offset, CACHED_VALUE*)); + if (Z_TYPE_P(submsg_php) == IS_NULL) continue; + MessageHeader* submsg = UNBOX(MessageHeader, submsg_php); + discard_unknown_fields(submsg); + } + } +} + +PHP_METHOD(Message, discardUnknownFields) { + MessageHeader* msg = UNBOX(MessageHeader, getThis()); + discard_unknown_fields(msg); +} diff --git a/php/ext/google/protobuf/message.c b/php/ext/google/protobuf/message.c index cf75d979..b14c1f0c 100644 --- a/php/ext/google/protobuf/message.c +++ b/php/ext/google/protobuf/message.c @@ -42,6 +42,7 @@ static void hex_to_binary(const char* hex, char** binary, int* binary_len); static zend_function_entry message_methods[] = { PHP_ME(Message, clear, NULL, ZEND_ACC_PUBLIC) + PHP_ME(Message, discardUnknownFields, NULL, ZEND_ACC_PUBLIC) PHP_ME(Message, serializeToString, NULL, ZEND_ACC_PUBLIC) PHP_ME(Message, mergeFromString, NULL, ZEND_ACC_PUBLIC) PHP_ME(Message, serializeToJsonString, NULL, ZEND_ACC_PUBLIC) diff --git a/php/ext/google/protobuf/package.xml b/php/ext/google/protobuf/package.xml index 0107ebc2..626eb46b 100644 --- a/php/ext/google/protobuf/package.xml +++ b/php/ext/google/protobuf/package.xml @@ -10,11 +10,11 @@ <email>protobuf-opensource@google.com</email> <active>yes</active> </lead> - <date>2017-12-06</date> + <date>2017-12-11</date> <time>11:02:07</time> <version> - <release>3.5.0.1</release> - <api>3.5.0.1</api> + <release>3.5.1</release> + <api>3.5.1</api> </version> <stability> <release>stable</release> @@ -170,6 +170,22 @@ GA release. </release> <release> <version> + <release>3.5.0</release> + <api>3.5.0</api> + </version> + <stability> + <release>stable</release> + <api>stable</api> + </stability> + <date>2017-11-15</date> + <time>11:02:07</time> + <license uri="https://opensource.org/licenses/BSD-3-Clause">3-Clause BSD License</license> + <notes> +GA release. + </notes> + </release> + <release> + <version> <release>3.5.0.1</release> <api>3.5.0.1</api> </version> @@ -184,5 +200,21 @@ GA release. GA release. </notes> </release> + <release> + <version> + <release>3.5.1</release> + <api>3.5.1</api> + </version> + <stability> + <release>stable</release> + <api>stable</api> + </stability> + <date>2017-12-11</date> + <time>11:02:07</time> + <license uri="https://opensource.org/licenses/BSD-3-Clause">3-Clause BSD License</license> + <notes> +GA release. + </notes> + </release> </changelog> </package> diff --git a/php/ext/google/protobuf/protobuf.h b/php/ext/google/protobuf/protobuf.h index 8289eb2b..f299b415 100644 --- a/php/ext/google/protobuf/protobuf.h +++ b/php/ext/google/protobuf/protobuf.h @@ -37,7 +37,7 @@ #include "upb.h" #define PHP_PROTOBUF_EXTNAME "protobuf" -#define PHP_PROTOBUF_VERSION "3.5.0.1" +#define PHP_PROTOBUF_VERSION "3.5.1" #define MAX_LENGTH_OF_INT64 20 #define SIZEOF_INT64 8 @@ -957,6 +957,7 @@ PHP_METHOD(Message, serializeToString); PHP_METHOD(Message, mergeFromString); PHP_METHOD(Message, serializeToJsonString); PHP_METHOD(Message, mergeFromJsonString); +PHP_METHOD(Message, discardUnknownFields); // ----------------------------------------------------------------------------- // Type check / conversion. diff --git a/php/src/Google/Protobuf/Any.php b/php/src/Google/Protobuf/Any.php index 91ba4bd5..f027b05d 100644 --- a/php/src/Google/Protobuf/Any.php +++ b/php/src/Google/Protobuf/Any.php @@ -4,7 +4,6 @@ namespace Google\Protobuf; -use Google\Protobuf\Internal\DescriptorPool; use Google\Protobuf\Internal\GPBType; use Google\Protobuf\Internal\GPBUtil; use Google\Protobuf\Internal\Message; @@ -217,7 +216,7 @@ class Any extends \Google\Protobuf\Internal\Message substr($this->type_url, $url_prifix_len); // Create message according to fully qualified name. - $pool = DescriptorPool::getGeneratedPool(); + $pool = \Google\Protobuf\Internal\DescriptorPool::getGeneratedPool(); $desc = $pool->getDescriptorByProtoName( ".".$fully_qualifed_name); if (is_null($desc)) { throw new \Exception("Class ".$fully_qualifed_name @@ -248,7 +247,7 @@ class Any extends \Google\Protobuf\Internal\Message $this->value = $msg->serializeToString(); // Set type url. - $pool = DescriptorPool::getGeneratedPool(); + $pool = \Google\Protobuf\Internal\DescriptorPool::getGeneratedPool(); $desc = $pool->getDescriptorByClassName(get_class($msg)); $fully_qualifed_name = $desc->getFullName(); $this->type_url = GPBUtil::TYPE_URL_PREFIX.substr( @@ -262,7 +261,7 @@ class Any extends \Google\Protobuf\Internal\Message */ public function is($klass) { - $pool = DescriptorPool::getGeneratedPool(); + $pool = \Google\Protobuf\Internal\DescriptorPool::getGeneratedPool(); $desc = $pool->getDescriptorByClassName($klass); $fully_qualifed_name = $desc->getFullName(); $type_url = GPBUtil::TYPE_URL_PREFIX.substr( diff --git a/php/src/Google/Protobuf/Internal/Message.php b/php/src/Google/Protobuf/Internal/Message.php index a7a4f272..93e81c01 100644 --- a/php/src/Google/Protobuf/Internal/Message.php +++ b/php/src/Google/Protobuf/Internal/Message.php @@ -577,6 +577,43 @@ class Message } /** + * Clear all unknown fields previously parsed. + * @return null. + */ + public function discardUnknownFields() + { + $this->unknown = ""; + foreach ($this->desc->getField() as $field) { + if ($field->getType() != GPBType::MESSAGE) { + continue; + } + if ($field->isMap()) { + $value_field = $field->getMessageType()->getFieldByNumber(2); + if ($value_field->getType() != GPBType::MESSAGE) { + continue; + } + $getter = $field->getGetter(); + $map = $this->$getter(); + foreach ($map as $key => $value) { + $value->discardUnknownFields(); + } + } else if ($field->getLabel() === GPBLabel::REPEATED) { + $getter = $field->getGetter(); + $arr = $this->$getter(); + foreach ($arr as $sub) { + $sub->discardUnknownFields(); + } + } else if ($field->getLabel() === GPBLabel::OPTIONAL) { + $getter = $field->getGetter(); + $sub = $this->$getter(); + if (!is_null($sub)) { + $sub->discardUnknownFields(); + } + } + } + } + + /** * Merges the contents of the specified message into current message. * * This method merges the contents of the specified message into the diff --git a/php/tests/encode_decode_test.php b/php/tests/encode_decode_test.php index 4dca922b..74d5526d 100644 --- a/php/tests/encode_decode_test.php +++ b/php/tests/encode_decode_test.php @@ -443,29 +443,74 @@ class EncodeDecodeTest extends TestBase public function testUnknown() { + // Test preserve unknown for varint. $m = new TestMessage(); - $from = hex2bin('F80601'); + $from = hex2bin('F80601'); // TODO(teboring): Add a util to encode + // varint for better readability $m->mergeFromString($from); $to = $m->serializeToString(); $this->assertSame(bin2hex($from), bin2hex($to)); + // Test preserve unknown for 64-bit. $m = new TestMessage(); $from = hex2bin('F9060000000000000000'); $m->mergeFromString($from); $to = $m->serializeToString(); $this->assertSame(bin2hex($from), bin2hex($to)); + // Test preserve unknown for length delimited. $m = new TestMessage(); $from = hex2bin('FA0600'); $m->mergeFromString($from); $to = $m->serializeToString(); $this->assertSame(bin2hex($from), bin2hex($to)); + // Test preserve unknown for 32-bit. $m = new TestMessage(); $from = hex2bin('FD0600000000'); $m->mergeFromString($from); $to = $m->serializeToString(); $this->assertSame(bin2hex($from), bin2hex($to)); + + // Test discard unknown in message. + $m = new TestMessage(); + $from = hex2bin('F80601'); + $m->mergeFromString($from); + $m->discardUnknownFields(); + $to = $m->serializeToString(); + $this->assertSame("", bin2hex($to)); + + // Test discard unknown for singular message field. + $m = new TestMessage(); + $from = hex2bin('8A0103F80601'); + $m->mergeFromString($from); + $m->discardUnknownFields(); + $to = $m->serializeToString(); + $this->assertSame("8a0100", bin2hex($to)); + + // Test discard unknown for repeated message field. + $m = new TestMessage(); + $from = hex2bin('FA0203F80601'); + $m->mergeFromString($from); + $m->discardUnknownFields(); + $to = $m->serializeToString(); + $this->assertSame("fa0200", bin2hex($to)); + + // Test discard unknown for map message value field. + $m = new TestMessage(); + $from = hex2bin("BA050708011203F80601"); + $m->mergeFromString($from); + $m->discardUnknownFields(); + $to = $m->serializeToString(); + $this->assertSame("ba050408011200", bin2hex($to)); + + // Test discard unknown for singular message field. + $m = new TestMessage(); + $from = hex2bin('9A0403F80601'); + $m->mergeFromString($from); + $m->discardUnknownFields(); + $to = $m->serializeToString(); + $this->assertSame("9a0400", bin2hex($to)); } public function testJsonEncode() diff --git a/post_process_dist.sh b/post_process_dist.sh index a5f95800..eb5f584d 100755 --- a/post_process_dist.sh +++ b/post_process_dist.sh @@ -27,7 +27,7 @@ fi set -ex -LANGUAGES="cpp csharp java javanano js objectivec python ruby php" +LANGUAGES="cpp csharp java javanano js objectivec python ruby php all" BASENAME=`basename $1 .tar.gz` VERSION=${BASENAME:9} diff --git a/protoc-artifacts/README.md b/protoc-artifacts/README.md index 17eb77f8..fc8ef586 100644 --- a/protoc-artifacts/README.md +++ b/protoc-artifacts/README.md @@ -70,9 +70,9 @@ support. DO NOT close the staging repository until you have done the deployment for all platforms. Currently the following platforms are supported: - Linux (x86_32 and x86_64) - Windows (x86_32 and x86_64) with - - Cygwin64 with MinGW compilers (x86_64) - - MSYS with MinGW32 (x86_32) - - Cross compile in Linux with MinGW-w64 (x86_32, x86_64) + - Cygwin64 with MinGW compilers (x86_64) + - MSYS with MinGW32 (x86_32) + - Cross compile in Linux with MinGW-w64 (x86_32, x86_64) - MacOSX (x86_32 and x86_64) As for MSYS2/MinGW64 for Windows: protoc will build, but it insists on @@ -174,8 +174,9 @@ stored: ### Tested build environments We have successfully built artifacts on the following environments: - Linux x86_32 and x86_64: - - Centos 6.6 (within Docker 1.6.1) - - Ubuntu 14.04.2 64-bit + - Centos 6.6 (within Docker 1.6.1) + - Ubuntu 14.04.2 64-bit +- Linux aarch_64: Cross compiled with `g++-aarch64-linux-gnu` on Ubuntu 14.04.2 64-bit - Windows x86_32: MSYS with ``mingw32-gcc-g++ 4.8.1-4`` on Windows 7 64-bit - Windows x86_32: Cross compile with ``i686-w64-mingw32-g++ 4.8.2`` on Ubuntu 14.04.2 64-bit - Windows x86_64: Cygwin64 with ``mingw64-x86_64-gcc-g++ 4.8.3-1`` on Windows 7 64-bit diff --git a/python/google/protobuf/__init__.py b/python/google/protobuf/__init__.py index 76cb51c3..0f4d63b0 100755 --- a/python/google/protobuf/__init__.py +++ b/python/google/protobuf/__init__.py @@ -30,7 +30,7 @@ # Copyright 2007 Google Inc. All Rights Reserved. -__version__ = '3.5.0' +__version__ = '3.5.0.post1' if __name__ != '__main__': try: diff --git a/python/google/protobuf/descriptor_pool.py b/python/google/protobuf/descriptor_pool.py index cb7146b6..f4c533a4 100644 --- a/python/google/protobuf/descriptor_pool.py +++ b/python/google/protobuf/descriptor_pool.py @@ -127,6 +127,9 @@ class DescriptorPool(object): self._service_descriptors = {} self._file_descriptors = {} self._toplevel_extensions = {} + # TODO(jieluo): Remove _file_desc_by_toplevel_extension after + # maybe year 2020 for compatibility issue (with 3.4.1 only). + self._file_desc_by_toplevel_extension = {} # We store extensions in two two-level mappings: The first key is the # descriptor of the message being extended, the second key is the extension # full name or its tag number. @@ -252,6 +255,12 @@ class DescriptorPool(object): """ self._AddFileDescriptor(file_desc) + # TODO(jieluo): This is a temporary solution for FieldDescriptor.file. + # FieldDescriptor.file is added in code gen. Remove this solution after + # maybe 2020 for compatibility reason (with 3.4.1 only). + for extension in file_desc.extensions_by_name.values(): + self._file_desc_by_toplevel_extension[ + extension.full_name] = file_desc def _AddFileDescriptor(self, file_desc): """Adds a FileDescriptor to the pool, non-recursively. @@ -331,7 +340,7 @@ class DescriptorPool(object): pass try: - return self._toplevel_extensions[symbol].file + return self._file_desc_by_toplevel_extension[symbol] except KeyError: pass diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c index 34d9663a..d9d2ebac 100644 --- a/ruby/ext/google/protobuf_c/defs.c +++ b/ruby/ext/google/protobuf_c/defs.c @@ -76,7 +76,7 @@ static upb_enumdef* check_enum_notfrozen(const upb_enumdef* def) { // ----------------------------------------------------------------------------- #define DEFINE_CLASS(name, string_name) \ - VALUE c ## name; \ + VALUE c ## name = Qnil; \ const rb_data_type_t _ ## name ## _type = { \ string_name, \ { name ## _mark, name ## _free, NULL }, \ @@ -126,11 +126,11 @@ void DescriptorPool_register(VALUE module) { rb_define_method(klass, "lookup", DescriptorPool_lookup, 1); rb_define_singleton_method(klass, "generated_pool", DescriptorPool_generated_pool, 0); - cDescriptorPool = klass; rb_gc_register_address(&cDescriptorPool); + cDescriptorPool = klass; - generated_pool = rb_class_new_instance(0, NULL, klass); rb_gc_register_address(&generated_pool); + generated_pool = rb_class_new_instance(0, NULL, klass); } static void add_descriptor_to_pool(DescriptorPool* self, @@ -299,8 +299,8 @@ void Descriptor_register(VALUE module) { rb_define_method(klass, "name", Descriptor_name, 0); rb_define_method(klass, "name=", Descriptor_name_set, 1); rb_include_module(klass, rb_mEnumerable); - cDescriptor = klass; rb_gc_register_address(&cDescriptor); + cDescriptor = klass; } /* @@ -518,8 +518,8 @@ void FieldDescriptor_register(VALUE module) { rb_define_method(klass, "subtype", FieldDescriptor_subtype, 0); rb_define_method(klass, "get", FieldDescriptor_get, 1); rb_define_method(klass, "set", FieldDescriptor_set, 2); - cFieldDescriptor = klass; rb_gc_register_address(&cFieldDescriptor); + cFieldDescriptor = klass; } /* @@ -916,8 +916,8 @@ void OneofDescriptor_register(VALUE module) { rb_define_method(klass, "add_field", OneofDescriptor_add_field, 1); rb_define_method(klass, "each", OneofDescriptor_each, 0); rb_include_module(klass, rb_mEnumerable); - cOneofDescriptor = klass; rb_gc_register_address(&cOneofDescriptor); + cOneofDescriptor = klass; } /* @@ -1037,8 +1037,8 @@ void EnumDescriptor_register(VALUE module) { rb_define_method(klass, "each", EnumDescriptor_each, 0); rb_define_method(klass, "enummodule", EnumDescriptor_enummodule, 0); rb_include_module(klass, rb_mEnumerable); - cEnumDescriptor = klass; rb_gc_register_address(&cEnumDescriptor); + cEnumDescriptor = klass; } /* @@ -1202,8 +1202,8 @@ void MessageBuilderContext_register(VALUE module) { rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1); rb_define_method(klass, "map", MessageBuilderContext_map, -1); rb_define_method(klass, "oneof", MessageBuilderContext_oneof, 1); - cMessageBuilderContext = klass; rb_gc_register_address(&cMessageBuilderContext); + cMessageBuilderContext = klass; } /* @@ -1491,8 +1491,8 @@ void OneofBuilderContext_register(VALUE module) { rb_define_method(klass, "initialize", OneofBuilderContext_initialize, 2); rb_define_method(klass, "optional", OneofBuilderContext_optional, -1); - cOneofBuilderContext = klass; rb_gc_register_address(&cOneofBuilderContext); + cOneofBuilderContext = klass; } /* @@ -1569,8 +1569,8 @@ void EnumBuilderContext_register(VALUE module) { rb_define_method(klass, "initialize", EnumBuilderContext_initialize, 1); rb_define_method(klass, "value", EnumBuilderContext_value, 2); - cEnumBuilderContext = klass; rb_gc_register_address(&cEnumBuilderContext); + cEnumBuilderContext = klass; } /* @@ -1645,8 +1645,8 @@ void Builder_register(VALUE module) { rb_define_method(klass, "add_enum", Builder_add_enum, 1); rb_define_method(klass, "initialize", Builder_initialize, 0); rb_define_method(klass, "finalize_to_pool", Builder_finalize_to_pool, 1); - cBuilder = klass; rb_gc_register_address(&cBuilder); + cBuilder = klass; } /* diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c index d1b6e89e..12080d03 100644 --- a/ruby/ext/google/protobuf_c/encode_decode.c +++ b/ruby/ext/google/protobuf_c/encode_decode.c @@ -1305,3 +1305,91 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) { } } +static void discard_unknown(VALUE msg_rb, const Descriptor* desc) { + MessageHeader* msg; + upb_msg_field_iter it; + + TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg); + + stringsink* unknown = msg->unknown_fields; + if (unknown != NULL) { + stringsink_uninit(unknown); + msg->unknown_fields = NULL; + } + + for (upb_msg_field_begin(&it, desc->msgdef); + !upb_msg_field_done(&it); + upb_msg_field_next(&it)) { + upb_fielddef *f = upb_msg_iter_field(&it); + uint32_t offset = + desc->layout->fields[upb_fielddef_index(f)].offset + + sizeof(MessageHeader); + + if (upb_fielddef_containingoneof(f)) { + uint32_t oneof_case_offset = + desc->layout->fields[upb_fielddef_index(f)].case_offset + + sizeof(MessageHeader); + // For a oneof, check that this field is actually present -- skip all the + // below if not. + if (DEREF(msg, oneof_case_offset, uint32_t) != + upb_fielddef_number(f)) { + continue; + } + // Otherwise, fall through to the appropriate singular-field handler + // below. + } + + if (!upb_fielddef_issubmsg(f)) { + continue; + } + + if (is_map_field(f)) { + if (!upb_fielddef_issubmsg(map_field_value(f))) continue; + VALUE map = DEREF(msg, offset, VALUE); + if (map == Qnil) continue; + Map_iter map_it; + for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) { + VALUE submsg = Map_iter_value(&map_it); + VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); + const Descriptor* subdesc = ruby_to_Descriptor(descriptor); + discard_unknown(submsg, subdesc); + } + } else if (upb_fielddef_isseq(f)) { + VALUE ary = DEREF(msg, offset, VALUE); + if (ary == Qnil) continue; + int size = NUM2INT(RepeatedField_length(ary)); + for (int i = 0; i < size; i++) { + void* memory = RepeatedField_index_native(ary, i); + VALUE submsg = *((VALUE *)memory); + VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); + const Descriptor* subdesc = ruby_to_Descriptor(descriptor); + discard_unknown(submsg, subdesc); + } + } else { + VALUE submsg = DEREF(msg, offset, VALUE); + if (submsg == Qnil) continue; + VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned); + const Descriptor* subdesc = ruby_to_Descriptor(descriptor); + discard_unknown(submsg, subdesc); + } + } +} + +/* + * call-seq: + * Google::Protobuf.discard_unknown(msg) + * + * Discard unknown fields in the given message object and recursively discard + * unknown fields in submessages. + */ +VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) { + VALUE klass = CLASS_OF(msg_rb); + VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned); + Descriptor* desc = ruby_to_Descriptor(descriptor); + if (klass == cRepeatedField || klass == cMap) { + rb_raise(rb_eArgError, "Expected proto msg for discard unknown."); + } else { + discard_unknown(msg_rb, desc); + } + return Qnil; +} diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c index 26e22dc7..8c2f6424 100644 --- a/ruby/ext/google/protobuf_c/map.c +++ b/ruby/ext/google/protobuf_c/map.c @@ -825,8 +825,8 @@ VALUE Map_iter_value(Map_iter* iter) { void Map_register(VALUE module) { VALUE klass = rb_define_class_under(module, "Map", rb_cObject); rb_define_alloc_func(klass, Map_alloc); - cMap = klass; rb_gc_register_address(&cMap); + cMap = klass; rb_define_method(klass, "initialize", Map_init, -1); rb_define_method(klass, "each", Map_each, 0); diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c index c7750c44..db696426 100644 --- a/ruby/ext/google/protobuf_c/protobuf.c +++ b/ruby/ext/google/protobuf_c/protobuf.c @@ -103,6 +103,8 @@ void Init_protobuf_c() { cError = rb_const_get(protobuf, rb_intern("Error")); cParseError = rb_const_get(protobuf, rb_intern("ParseError")); + rb_define_singleton_method(protobuf, "discard_unknown", + Google_Protobuf_discard_unknown, 1); rb_define_singleton_method(protobuf, "deep_copy", Google_Protobuf_deep_copy, 1); diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h index 1291ac59..5266aa8d 100644 --- a/ruby/ext/google/protobuf_c/protobuf.h +++ b/ruby/ext/google/protobuf_c/protobuf.h @@ -515,6 +515,7 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb); VALUE Message_decode_json(VALUE klass, VALUE data); VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass); +VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb); VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj); VALUE build_module_from_enumdesc(EnumDescriptor* enumdef); diff --git a/ruby/ext/google/protobuf_c/repeated_field.c b/ruby/ext/google/protobuf_c/repeated_field.c index 1c651c19..c6620ee6 100644 --- a/ruby/ext/google/protobuf_c/repeated_field.c +++ b/ruby/ext/google/protobuf_c/repeated_field.c @@ -626,8 +626,8 @@ void RepeatedField_register(VALUE module) { VALUE klass = rb_define_class_under( module, "RepeatedField", rb_cObject); rb_define_alloc_func(klass, RepeatedField_alloc); - cRepeatedField = klass; rb_gc_register_address(&cRepeatedField); + cRepeatedField = klass; rb_define_method(klass, "initialize", RepeatedField_init, -1); diff --git a/ruby/tests/encode_decode_test.rb b/ruby/tests/encode_decode_test.rb new file mode 100644 index 00000000..09581ab0 --- /dev/null +++ b/ruby/tests/encode_decode_test.rb @@ -0,0 +1,63 @@ +#!/usr/bin/ruby + +# generated_code.rb is in the same directory as this test. +$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) + +require 'generated_code_pb' +require 'test/unit' + +def hex2bin(s) + s.scan(/../).map { |x| x.hex.chr }.join +end + +class EncodeDecodeTest < Test::Unit::TestCase + def test_discard_unknown + # Test discard unknown in message. + unknown_msg = A::B::C::TestUnknown.new(:unknown_field => 1) + from = A::B::C::TestUnknown.encode(unknown_msg) + m = A::B::C::TestMessage.decode(from) + Google::Protobuf.discard_unknown(m) + to = A::B::C::TestMessage.encode(m) + assert_equal '', to + + # Test discard unknown for singular message field. + unknown_msg = A::B::C::TestUnknown.new( + :optional_unknown => + A::B::C::TestUnknown.new(:unknown_field => 1)) + from = A::B::C::TestUnknown.encode(unknown_msg) + m = A::B::C::TestMessage.decode(from) + Google::Protobuf.discard_unknown(m) + to = A::B::C::TestMessage.encode(m.optional_msg) + assert_equal '', to + + # Test discard unknown for repeated message field. + unknown_msg = A::B::C::TestUnknown.new( + :repeated_unknown => + [A::B::C::TestUnknown.new(:unknown_field => 1)]) + from = A::B::C::TestUnknown.encode(unknown_msg) + m = A::B::C::TestMessage.decode(from) + Google::Protobuf.discard_unknown(m) + to = A::B::C::TestMessage.encode(m.repeated_msg[0]) + assert_equal '', to + + # Test discard unknown for map value message field. + unknown_msg = A::B::C::TestUnknown.new( + :map_unknown => + {"" => A::B::C::TestUnknown.new(:unknown_field => 1)}) + from = A::B::C::TestUnknown.encode(unknown_msg) + m = A::B::C::TestMessage.decode(from) + Google::Protobuf.discard_unknown(m) + to = A::B::C::TestMessage.encode(m.map_string_msg['']) + assert_equal '', to + + # Test discard unknown for oneof message field. + unknown_msg = A::B::C::TestUnknown.new( + :oneof_unknown => + A::B::C::TestUnknown.new(:unknown_field => 1)) + from = A::B::C::TestUnknown.encode(unknown_msg) + m = A::B::C::TestMessage.decode(from) + Google::Protobuf.discard_unknown(m) + to = A::B::C::TestMessage.encode(m.oneof_msg) + assert_equal '', to + end +end diff --git a/ruby/tests/generated_code.proto b/ruby/tests/generated_code.proto index 62fd83ed..3b934bd6 100644 --- a/ruby/tests/generated_code.proto +++ b/ruby/tests/generated_code.proto @@ -57,6 +57,9 @@ message TestMessage { } NestedMessage nested_message = 80; + + // Reserved for non-existing field test. + // int32 non_exist = 89; } enum TestEnum { @@ -65,3 +68,13 @@ enum TestEnum { B = 2; C = 3; } + +message TestUnknown { + TestUnknown optional_unknown = 11; + repeated TestUnknown repeated_unknown = 31; + oneof my_oneof { + TestUnknown oneof_unknown = 51; + } + map<string, TestUnknown> map_unknown = 67; + int32 unknown_field = 89; +} diff --git a/src/google/protobuf/stubs/io_win32.cc b/src/google/protobuf/stubs/io_win32.cc index fa2cb8b1..ad2d2d26 100644 --- a/src/google/protobuf/stubs/io_win32.cc +++ b/src/google/protobuf/stubs/io_win32.cc @@ -30,10 +30,11 @@ // Author: laszlocsomor@google.com (Laszlo Csomor) // -// Implementation for long-path-aware open/mkdir/etc. on Windows. +// Implementation for long-path-aware open/mkdir/access/etc. on Windows, as well +// as for the supporting utility functions. // // These functions convert the input path to an absolute Windows path -// with "\\?\" prefix if necessary, then pass that to _wopen/_wmkdir/etc. +// with "\\?\" prefix, then pass that to _wopen/_wmkdir/_waccess/etc. // (declared in <io.h>) respectively. This allows working with files/directories // whose paths are longer than MAX_PATH (260 chars). // @@ -59,7 +60,6 @@ #include <google/protobuf/stubs/io_win32.h> #include <google/protobuf/stubs/scoped_ptr.h> -#include <cassert> #include <memory> #include <sstream> #include <string> @@ -89,6 +89,11 @@ struct CharTraits<wchar_t> { static bool is_alpha(wchar_t ch) { return iswalpha(ch); } }; +template <typename char_type> +bool null_or_empty(const char_type* s) { + return s == nullptr || *s == 0; +} + // Returns true if the path starts with a drive letter, e.g. "c:". // Note that this won't check for the "\" after the drive letter, so this also // returns true for "c:foo" (which is "c:\${PWD}\foo"). @@ -121,16 +126,7 @@ bool is_drive_relative(const char_type* path) { return has_drive_letter(path) && (path[2] == 0 || !is_separator(path[2])); } -template <typename char_type> -void replace_directory_separators(char_type* p) { - for (; *p; ++p) { - if (*p == '/') { - *p = '\\'; - } - } -} - -string join_paths(const string& path1, const string& path2) { +wstring join_paths(const wstring& path1, const wstring& path2) { if (path1.empty() || is_path_absolute(path2.c_str()) || has_longpath_prefix(path2.c_str())) { return path2; @@ -144,23 +140,23 @@ string join_paths(const string& path1, const string& path2) { : (path1 + path2); } else { return is_separator(path2[0]) ? (path1 + path2) - : (path1 + '\\' + path2); + : (path1 + L'\\' + path2); } } -string normalize(string path) { +wstring normalize(wstring path) { if (has_longpath_prefix(path.c_str())) { path = path.substr(4); } - static const string dot("."); - static const string dotdot(".."); + static const wstring dot(L"."); + static const wstring dotdot(L".."); - std::vector<string> segments; + std::vector<wstring> segments; int segment_start = -1; // Find the path segments in `path` (separated by "/"). for (int i = 0;; ++i) { - if (!is_separator(path[i]) && path[i] != '\0') { + if (!is_separator(path[i]) && path[i] != L'\0') { // The current character does not end a segment, so start one unless it's // already started. if (segment_start < 0) { @@ -169,7 +165,7 @@ string normalize(string path) { } else if (segment_start >= 0 && i > segment_start) { // The current character is "/" or "\0", so this ends a segment. // Add that to `segments` if there's anything to add; handle "." and "..". - string segment(path, segment_start, i - segment_start); + wstring segment(path, segment_start, i - segment_start); segment_start = -1; if (segment == dotdot) { if (!segments.empty() && @@ -180,7 +176,7 @@ string normalize(string path) { segments.push_back(segment); } } - if (path[i] == '\0') { + if (path[i] == L'\0') { break; } } @@ -189,64 +185,62 @@ string normalize(string path) { // form of it, e.g. "c:\.."). if (segments.size() == 1 && segments[0].size() == 2 && has_drive_letter(segments[0].c_str())) { - return segments[0] + '\\'; + return segments[0] + L'\\'; } // Join all segments. bool first = true; - std::ostringstream result; + std::wstringstream result; for (int i = 0; i < segments.size(); ++i) { if (!first) { - result << '\\'; + result << L'\\'; } first = false; result << segments[i]; } // Preserve trailing separator if the input contained it. if (!path.empty() && is_separator(path[path.size() - 1])) { - result << '\\'; + result << L'\\'; } return result.str(); } -WCHAR* as_wstring(const string& s) { - int len = ::MultiByteToWideChar(CP_UTF8, 0, s.c_str(), s.size(), NULL, 0); - WCHAR* result = new WCHAR[len + 1]; - ::MultiByteToWideChar(CP_UTF8, 0, s.c_str(), s.size(), result, len + 1); - result[len] = 0; - return result; -} - -void as_wchar_path(const string& path, wstring* wchar_path) { - scoped_array<WCHAR> wbuf(as_wstring(path)); - replace_directory_separators(wbuf.get()); - wchar_path->assign(wbuf.get()); -} - -bool as_windows_path(const string& path, wstring* result) { - if (path.empty()) { +bool as_windows_path(const char* path, wstring* result) { + if (null_or_empty(path)) { result->clear(); return true; } - if (is_separator(path[0]) || is_drive_relative(path.c_str())) { + wstring wpath; + if (!strings::utf8_to_wcs(path, &wpath)) { + return false; + } + if (has_longpath_prefix(wpath.c_str())) { + *result = wpath; + return true; + } + if (is_separator(path[0]) || is_drive_relative(path)) { return false; } - string mutable_path = path; - if (!is_path_absolute(mutable_path.c_str()) && - !has_longpath_prefix(mutable_path.c_str())) { - char cwd[MAX_PATH]; - ::GetCurrentDirectoryA(MAX_PATH, cwd); - mutable_path = join_paths(cwd, mutable_path); + + if (!is_path_absolute(wpath.c_str())) { + int size = ::GetCurrentDirectoryW(0, NULL); + if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return false; + } + scoped_array<WCHAR> wcwd(new WCHAR[size]); + ::GetCurrentDirectoryW(size, wcwd.get()); + wpath = join_paths(wcwd.get(), wpath); } - as_wchar_path(normalize(mutable_path), result); - if (!has_longpath_prefix(result->c_str())) { + wpath = normalize(wpath); + if (!has_longpath_prefix(wpath.c_str())) { // Add the "\\?\" prefix unconditionally. This way we prevent the Win32 API // from processing the path and "helpfully" removing trailing dots from the // path, for example. // See https://github.com/bazelbuild/bazel/issues/2935 - *result = wstring(L"\\\\?\\") + *result; + wpath = wstring(L"\\\\?\\") + wpath; } + *result = wpath; return true; } @@ -319,13 +313,21 @@ int stat(const char* path, struct _stat* buffer) { FILE* fopen(const char* path, const char* mode) { #ifdef SUPPORT_LONGPATHS + if (null_or_empty(path)) { + errno = EINVAL; + return NULL; + } wstring wpath; if (!as_windows_path(path, &wpath)) { errno = ENOENT; return NULL; } - scoped_array<WCHAR> wmode(as_wstring(mode)); - return ::_wfopen(wpath.c_str(), wmode.get()); + wstring wmode; + if (!strings::utf8_to_wcs(mode, &wmode)) { + errno = EINVAL; + return NULL; + } + return ::_wfopen(wpath.c_str(), wmode.c_str()); #else return ::fopen(path, mode); #endif @@ -347,16 +349,65 @@ int write(int fd, const void* buffer, size_t size) { return ::_write(fd, buffer, size); } -wstring testonly_path_to_winpath(const string& path) { +wstring testonly_utf8_to_winpath(const char* path) { wstring wpath; - as_windows_path(path, &wpath); - return wpath; + return as_windows_path(path, &wpath) ? wpath : wstring(); +} + +namespace strings { + +bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) { + if (null_or_empty(s)) { + out->clear(); + return true; + } + BOOL usedDefaultChar = FALSE; + SetLastError(0); + int size = WideCharToMultiByte( + outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0, NULL, + outUtf8 ? NULL : &usedDefaultChar); + if ((size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) + || usedDefaultChar) { + return false; + } + scoped_array<CHAR> astr(new CHAR[size]); + WideCharToMultiByte( + outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, NULL, NULL); + out->assign(astr.get()); + return true; +} + +bool mbs_to_wcs(const char* s, wstring* out, bool inUtf8) { + if (null_or_empty(s)) { + out->clear(); + return true; + } + + SetLastError(0); + int size = + MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0); + if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return false; + } + scoped_array<WCHAR> wstr(new WCHAR[size]); + MultiByteToWideChar( + inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, wstr.get(), size + 1); + out->assign(wstr.get()); + return true; } +bool utf8_to_wcs(const char* input, wstring* out) { + return mbs_to_wcs(input, out, true); +} + +bool wcs_to_utf8(const wchar_t* input, string* out) { + return wcs_to_mbs(input, out, true); +} + +} // namespace strings } // namespace win32 } // namespace internal } // namespace protobuf } // namespace google #endif // defined(_WIN32) - diff --git a/src/google/protobuf/stubs/io_win32.h b/src/google/protobuf/stubs/io_win32.h index 53160089..9e17d253 100644 --- a/src/google/protobuf/stubs/io_win32.h +++ b/src/google/protobuf/stubs/io_win32.h @@ -69,8 +69,25 @@ LIBPROTOBUF_EXPORT int read(int fd, void* buffer, size_t size); LIBPROTOBUF_EXPORT int setmode(int fd, int mode); LIBPROTOBUF_EXPORT int stat(const char* path, struct _stat* buffer); LIBPROTOBUF_EXPORT int write(int fd, const void* buffer, size_t size); -LIBPROTOBUF_EXPORT std::wstring testonly_path_to_winpath( - const std::string& path); +LIBPROTOBUF_EXPORT std::wstring testonly_utf8_to_winpath(const char* path); + +namespace strings { + +// Convert from UTF-16 to Active-Code-Page-encoded or to UTF-8-encoded text. +LIBPROTOBUF_EXPORT bool wcs_to_mbs( + const wchar_t* s, std::string* out, bool outUtf8); + +// Convert from Active-Code-Page-encoded or UTF-8-encoded text to UTF-16. +LIBPROTOBUF_EXPORT bool mbs_to_wcs( + const char* s, std::wstring* out, bool inUtf8); + +// Convert from UTF-8-encoded text to UTF-16. +LIBPROTOBUF_EXPORT bool utf8_to_wcs(const char* input, std::wstring* out); + +// Convert from UTF-16-encoded text to UTF-8. +LIBPROTOBUF_EXPORT bool wcs_to_utf8(const wchar_t* input, std::string* out); + +} // namespace strings } // namespace win32 } // namespace internal diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc index ce6f7162..b216aece 100644 --- a/src/google/protobuf/stubs/io_win32_unittest.cc +++ b/src/google/protobuf/stubs/io_win32_unittest.cc @@ -30,7 +30,8 @@ // Author: laszlocsomor@google.com (Laszlo Csomor) // -// Unit tests for long-path-aware open/mkdir/access on Windows. +// Unit tests for long-path-aware open/mkdir/access/etc. on Windows, as well as +// for the supporting utility functions. // // This file is only used on Windows, it's empty on other platforms. @@ -48,7 +49,6 @@ #include <google/protobuf/stubs/io_win32.h> #include <google/protobuf/stubs/scoped_ptr.h> -#include <google/protobuf/testing/googletest.h> #include <gtest/gtest.h> #include <memory> @@ -61,6 +61,27 @@ namespace internal { namespace win32 { namespace { +const char kUtf8Text[] = { + 'h', 'i', ' ', + // utf-8: 11010000 10011111, utf-16: 100 0001 1111 = 0x041F + 0xd0, 0x9f, + // utf-8: 11010001 10000000, utf-16: 100 0100 0000 = 0x0440 + 0xd1, 0x80, + // utf-8: 11010000 10111000, utf-16: 100 0011 1000 = 0x0438 + 0xd0, 0xb8, + // utf-8: 11010000 10110010, utf-16: 100 0011 0010 = 0x0432 + 0xd0, 0xb2, + // utf-8: 11010000 10110101, utf-16: 100 0011 0101 = 0x0435 + 0xd0, 0xb5, + // utf-8: 11010001 10000010, utf-16: 100 0100 0010 = 0x0442 + 0xd1, 0x82, 0 +}; + +const wchar_t kUtf16Text[] = { + L'h', L'i', L' ', + L'\x41f', L'\x440', L'\x438', L'\x432', L'\x435', L'\x442', 0 +}; + using std::string; using std::wstring; @@ -73,6 +94,7 @@ class IoWin32Test : public ::testing::Test { bool CreateAllUnder(wstring path); bool DeleteAllUnder(wstring path); + WCHAR working_directory[MAX_PATH]; string test_tmpdir; wstring wtest_tmpdir; }; @@ -89,71 +111,88 @@ void StripTrailingSlashes(string* str) { for (; i >= 0 && ((*str)[i] == '/' || (*str)[i] == '\\'); --i) {} str->resize(i+1); } + +bool GetEnvVarAsUtf8(const WCHAR* name, string* result) { + DWORD size = ::GetEnvironmentVariableW(name, NULL, 0); + if (size > 0 && GetLastError() != ERROR_ENVVAR_NOT_FOUND) { + scoped_array<WCHAR> wcs(new WCHAR[size]); + ::GetEnvironmentVariableW(name, wcs.get(), size); + // GetEnvironmentVariableA retrieves an Active-Code-Page-encoded text which + // we'd first need to convert to UTF-16 then to UTF-8, because there seems + // to be no API function to do that conversion directly. + // GetEnvironmentVariableW retrieves an UTF-16-encoded text, which we need + // to convert to UTF-8. + return strings::wcs_to_utf8(wcs.get(), result); + } else { + return false; + } +} + +bool GetCwdAsUtf8(string* result) { + DWORD size = ::GetCurrentDirectoryW(0, NULL); + if (size > 0) { + scoped_array<WCHAR> wcs(new WCHAR[size]); + ::GetCurrentDirectoryW(size, wcs.get()); + // GetCurrentDirectoryA retrieves an Active-Code-Page-encoded text which + // we'd first need to convert to UTF-16 then to UTF-8, because there seems + // to be no API function to do that conversion directly. + // GetCurrentDirectoryW retrieves an UTF-16-encoded text, which we need + // to convert to UTF-8. + return strings::wcs_to_utf8(wcs.get(), result); + } else { + return false; + } +} + } // namespace void IoWin32Test::SetUp() { - test_tmpdir = string(TestTempDir()); + test_tmpdir.clear(); wtest_tmpdir.clear(); - if (test_tmpdir.empty()) { - const char* test_tmpdir_env = getenv("TEST_TMPDIR"); - if (test_tmpdir_env != NULL && *test_tmpdir_env) { - test_tmpdir = string(test_tmpdir_env); - } - - // Only Bazel defines TEST_TMPDIR, CMake does not, so look for other - // suitable environment variables. - if (test_tmpdir.empty()) { - static const char* names[] = {"TEMP", "TMP"}; - for (int i = 0; i < sizeof(names)/sizeof(names[0]); ++i) { - const char* name = names[i]; - test_tmpdir_env = getenv(name); - if (test_tmpdir_env != NULL && *test_tmpdir_env) { - test_tmpdir = string(test_tmpdir_env); - break; - } - } - } + EXPECT_GT(::GetCurrentDirectoryW(MAX_PATH, working_directory), 0); - // No other temp directory was found. Use the current director - if (test_tmpdir.empty()) { - char buffer[MAX_PATH]; - // Use GetCurrentDirectoryA instead of GetCurrentDirectoryW, because the - // current working directory must always be shorter than MAX_PATH, even - // with - // "\\?\" prefix (except on Windows 10 version 1607 and beyond, after - // opting in to long paths by default [1]). - // - // [1] https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath - DWORD result = ::GetCurrentDirectoryA(MAX_PATH, buffer); - if (result > 0) { - test_tmpdir = string(buffer); - } else { - // Using assertions in SetUp/TearDown seems to confuse the test - // framework, so just leave the member variables empty in case of - // failure. - GOOGLE_CHECK_OK(false); - return; - } - } + string tmp; + bool ok = false; + if (!ok) { + // Bazel sets this environment variable when it runs tests. + ok = GetEnvVarAsUtf8(L"TEST_TMPDIR", &tmp); } - - StripTrailingSlashes(&test_tmpdir); - test_tmpdir += "\\io_win32_unittest.tmp"; - - // CreateDirectoryA's limit is 248 chars, see MSDN. - // https://msdn.microsoft.com/en-us/library/windows/desktop/aa363855(v=vs.85).aspx - wtest_tmpdir = testonly_path_to_winpath(test_tmpdir); - if (!DeleteAllUnder(wtest_tmpdir) || !CreateAllUnder(wtest_tmpdir)) { - GOOGLE_CHECK_OK(false); - test_tmpdir.clear(); - wtest_tmpdir.clear(); + if (!ok) { + // Bazel 0.8.0 sets this environment for every build and test action. + ok = GetEnvVarAsUtf8(L"TEMP", &tmp); } + if (!ok) { + // Bazel 0.8.0 sets this environment for every build and test action. + ok = GetEnvVarAsUtf8(L"TMP", &tmp); + } + if (!ok) { + // Fall back to using the current directory. + ok = GetCwdAsUtf8(&tmp); + } + if (!ok || tmp.empty()) { + FAIL() << "Cannot find a temp directory."; + } + + StripTrailingSlashes(&tmp); + std::stringstream result; + // Deleting files and directories is asynchronous on Windows, and if TearDown + // just deleted the previous temp directory, sometimes we cannot recreate the + // same directory. + // Use a counter so every test method gets its own temp directory. + static unsigned int counter = 0; + result << tmp << "\\w32tst" << counter++ << ".tmp"; + test_tmpdir = result.str(); + wtest_tmpdir = testonly_utf8_to_winpath(test_tmpdir.c_str()); + ASSERT_FALSE(wtest_tmpdir.empty()); + ASSERT_TRUE(DeleteAllUnder(wtest_tmpdir)); + ASSERT_TRUE(CreateAllUnder(wtest_tmpdir)); } void IoWin32Test::TearDown() { if (!wtest_tmpdir.empty()) { DeleteAllUnder(wtest_tmpdir); } + ::SetCurrentDirectoryW(working_directory); } bool IoWin32Test::CreateAllUnder(wstring path) { @@ -191,8 +230,8 @@ bool IoWin32Test::DeleteAllUnder(wstring path) { path = wstring(L"\\\\?\\") + path; } // Append "\" if necessary. - if (path[path.size() - 1] != '\\') { - path.push_back('\\'); + if (path[path.size() - 1] != L'\\') { + path.push_back(L'\\'); } WIN32_FIND_DATAW metadata; @@ -309,13 +348,24 @@ TEST_F(IoWin32Test, MkdirTest) { ASSERT_EQ(errno, ENOENT); } +TEST_F(IoWin32Test, MkdirTestNonAscii) { + ASSERT_INITIALIZED; + + // Create a non-ASCII path. + // Ensure that we can create the directory using SetCurrentDirectoryW. + EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1").c_str(), NULL)); + EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1\\" + kUtf16Text).c_str(), NULL)); + // Ensure that we can create a very similarly named directory using mkdir. + // We don't attemp to delete and recreate the same directory, because on + // Windows, deleting files and directories seems to be asynchronous. + EXPECT_EQ(mkdir((test_tmpdir + "\\2").c_str(), 0644), 0); + EXPECT_EQ(mkdir((test_tmpdir + "\\2\\" + kUtf8Text).c_str(), 0644), 0); +} + TEST_F(IoWin32Test, ChdirTest) { - char owd[MAX_PATH]; - EXPECT_GT(::GetCurrentDirectoryA(MAX_PATH, owd), 0); string path("C:\\"); EXPECT_EQ(access(path.c_str(), F_OK), 0); ASSERT_EQ(chdir(path.c_str()), 0); - EXPECT_TRUE(::SetCurrentDirectoryA(owd)); // Do not try to chdir into the test_tmpdir, it may already contain directory // names with trailing dots. @@ -330,17 +380,37 @@ TEST_F(IoWin32Test, ChdirTest) { ASSERT_NE(chdir(path.c_str()), 0); } +TEST_F(IoWin32Test, ChdirTestNonAscii) { + ASSERT_INITIALIZED; + + // Create a directory with a non-ASCII path and ensure we can cd into it. + wstring wNonAscii(wtest_tmpdir + L"\\" + kUtf16Text); + string nonAscii; + EXPECT_TRUE(strings::wcs_to_utf8(wNonAscii.c_str(), &nonAscii)); + EXPECT_TRUE(CreateDirectoryW(wNonAscii.c_str(), NULL)); + WCHAR cwd[MAX_PATH]; + EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd)); + // Ensure that we can cd into the path using SetCurrentDirectoryW. + EXPECT_TRUE(SetCurrentDirectoryW(wNonAscii.c_str())); + EXPECT_TRUE(SetCurrentDirectoryW(cwd)); + // Ensure that we can cd into the path using chdir. + ASSERT_EQ(chdir(nonAscii.c_str()), 0); + // Ensure that the GetCurrentDirectoryW returns the desired path. + EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd)); + ASSERT_EQ(wNonAscii, cwd); +} + TEST_F(IoWin32Test, AsWindowsPathTest) { DWORD size = GetCurrentDirectoryW(0, NULL); scoped_array<wchar_t> cwd_str(new wchar_t[size]); EXPECT_GT(GetCurrentDirectoryW(size, cwd_str.get()), 0); wstring cwd = wstring(L"\\\\?\\") + cwd_str.get(); - ASSERT_EQ(testonly_path_to_winpath("relative_mkdirtest"), + ASSERT_EQ(testonly_utf8_to_winpath("relative_mkdirtest"), cwd + L"\\relative_mkdirtest"); - ASSERT_EQ(testonly_path_to_winpath("preserve//\\trailing///"), + ASSERT_EQ(testonly_utf8_to_winpath("preserve//\\trailing///"), cwd + L"\\preserve\\trailing\\"); - ASSERT_EQ(testonly_path_to_winpath("./normalize_me\\/../blah"), + ASSERT_EQ(testonly_utf8_to_winpath("./normalize_me\\/../blah"), cwd + L"\\blah"); std::ostringstream relpath; for (wchar_t* p = cwd_str.get(); *p; ++p) { @@ -349,18 +419,28 @@ TEST_F(IoWin32Test, AsWindowsPathTest) { } } relpath << ".\\/../\\./beyond-toplevel"; - ASSERT_EQ(testonly_path_to_winpath(relpath.str()), + ASSERT_EQ(testonly_utf8_to_winpath(relpath.str().c_str()), wstring(L"\\\\?\\") + cwd_str.get()[0] + L":\\beyond-toplevel"); // Absolute unix paths lack drive letters, driveless absolute windows paths // do too. Neither can be converted to a drive-specifying absolute Windows // path. - ASSERT_EQ(testonly_path_to_winpath("/absolute/unix/path"), L""); + ASSERT_EQ(testonly_utf8_to_winpath("/absolute/unix/path"), L""); // Though valid on Windows, we also don't support UNC paths (\\UNC\\blah). - ASSERT_EQ(testonly_path_to_winpath("\\driveless\\absolute"), L""); + ASSERT_EQ(testonly_utf8_to_winpath("\\driveless\\absolute"), L""); // Though valid in cmd.exe, drive-relative paths are not supported. - ASSERT_EQ(testonly_path_to_winpath("c:foo"), L""); - ASSERT_EQ(testonly_path_to_winpath("c:/foo"), L"\\\\?\\c:\\foo"); + ASSERT_EQ(testonly_utf8_to_winpath("c:foo"), L""); + ASSERT_EQ(testonly_utf8_to_winpath("c:/foo"), L"\\\\?\\c:\\foo"); + ASSERT_EQ(testonly_utf8_to_winpath("\\\\?\\C:\\foo"), L"\\\\?\\C:\\foo"); +} + +TEST_F(IoWin32Test, Utf8Utf16ConversionTest) { + string mbs; + wstring wcs; + ASSERT_TRUE(strings::utf8_to_wcs(kUtf8Text, &wcs)); + ASSERT_TRUE(strings::wcs_to_utf8(kUtf16Text, &mbs)); + ASSERT_EQ(wcs, kUtf16Text); + ASSERT_EQ(mbs, kUtf8Text); } } // namespace |