aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--CHANGES.txt6
-rw-r--r--Makefile.am1
-rw-r--r--php/ext/google/protobuf/encode_decode.c100
-rw-r--r--php/ext/google/protobuf/message.c1
-rw-r--r--php/ext/google/protobuf/package.xml38
-rw-r--r--php/ext/google/protobuf/protobuf.h3
-rw-r--r--php/src/Google/Protobuf/Any.php7
-rw-r--r--php/src/Google/Protobuf/Internal/Message.php37
-rw-r--r--php/tests/encode_decode_test.php47
-rwxr-xr-xpost_process_dist.sh2
-rw-r--r--protoc-artifacts/README.md11
-rwxr-xr-xpython/google/protobuf/__init__.py2
-rw-r--r--python/google/protobuf/descriptor_pool.py11
-rw-r--r--ruby/ext/google/protobuf_c/defs.c22
-rw-r--r--ruby/ext/google/protobuf_c/encode_decode.c88
-rw-r--r--ruby/ext/google/protobuf_c/map.c2
-rw-r--r--ruby/ext/google/protobuf_c/protobuf.c2
-rw-r--r--ruby/ext/google/protobuf_c/protobuf.h1
-rw-r--r--ruby/ext/google/protobuf_c/repeated_field.c2
-rw-r--r--ruby/tests/encode_decode_test.rb63
-rw-r--r--ruby/tests/generated_code.proto13
-rw-r--r--src/google/protobuf/stubs/io_win32.cc165
-rw-r--r--src/google/protobuf/stubs/io_win32.h21
-rw-r--r--src/google/protobuf/stubs/io_win32_unittest.cc214
24 files changed, 699 insertions, 160 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index fe90cb9a..ea3252bb 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,4 @@
-2017-11-09 version 3.5.0 (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript)
+2017-11-13 version 3.5.0 (C++/Java/Python/PHP/Objective-C/C#/Ruby/JavaScript)
Planned Future Changes
* Make C++ implementation C++11 only: we plan to require C++11 to build
protobuf code starting from 3.6.0 release. Please join this github issue:
@@ -25,7 +25,7 @@
Java
* Proto3 messages are now preserving unknown fields by default. If you’d like
- to drop unknown fields, please use the DiscardUnknownFieldsParser  API. For
+ to drop unknown fields, please use the DiscardUnknownFieldsParser API. For
example:
Parser<Foo> parser = DiscardUnknownFieldsParser.wrap(Foo.parser());
Foo foo = parser.parseFrom(input);
@@ -168,7 +168,7 @@
* Note: AppEngine 2.5 is deprecated on June 2017 that AppEngine 2.5 will
never update protobuf runtime. Users who depend on AppEngine 2.5 should use
old protoc.
-
+
PHP
* Support PHP generic services. Specify file option php_generic_service=true
to enable generating service interface.
diff --git a/Makefile.am b/Makefile.am
index cd1cfc3e..cba02e3d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -907,6 +907,7 @@ ruby_EXTRA_DIST= \
ruby/src/main/java/google/ProtobufJavaService.java \
ruby/src/main/sentinel.proto \
ruby/tests/basic.rb \
+ ruby/tests/encode_decode_test.rb \
ruby/tests/gc_test.rb \
ruby/tests/repeated_field_test.rb \
ruby/tests/stress.rb \
diff --git a/php/ext/google/protobuf/encode_decode.c b/php/ext/google/protobuf/encode_decode.c
index 7e2b3ae6..b98121bb 100644
--- a/php/ext/google/protobuf/encode_decode.c
+++ b/php/ext/google/protobuf/encode_decode.c
@@ -1402,7 +1402,6 @@ static void putarray(zval* array, const upb_fielddef* f, upb_sink* sink,
RepeatedField* intern = UNBOX(RepeatedField, array);
HashTable *ht = PHP_PROTO_HASH_OF(intern->array);
size = zend_hash_num_elements(ht);
- // size = zend_hash_num_elements(PHP_PROTO_HASH_OF(intern->array));
if (size == 0) return;
upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
@@ -1614,3 +1613,102 @@ PHP_METHOD(Message, mergeFromJsonString) {
stackenv_uninit(&se);
}
}
+
+// TODO(teboring): refactoring with putrawmsg
+static void discard_unknown_fields(MessageHeader* msg) {
+ upb_msg_field_iter it;
+
+ stringsink* unknown = DEREF(message_data(msg), 0, stringsink*);
+ if (unknown != NULL) {
+ stringsink_uninit(unknown);
+ DEREF(message_data(msg), 0, stringsink*) = NULL;
+ }
+
+ // Recursively discard unknown fields of submessages.
+ Descriptor* desc = msg->descriptor;
+ TSRMLS_FETCH();
+ for (upb_msg_field_begin(&it, desc->msgdef);
+ !upb_msg_field_done(&it);
+ upb_msg_field_next(&it)) {
+ upb_fielddef* f = upb_msg_iter_field(&it);
+ uint32_t offset = desc->layout->fields[upb_fielddef_index(f)].offset;
+ bool containing_oneof = false;
+
+ if (upb_fielddef_containingoneof(f)) {
+ uint32_t oneof_case_offset =
+ desc->layout->fields[upb_fielddef_index(f)].case_offset;
+ // For a oneof, check that this field is actually present -- skip all the
+ // below if not.
+ if (DEREF(message_data(msg), oneof_case_offset, uint32_t) !=
+ upb_fielddef_number(f)) {
+ continue;
+ }
+ // Otherwise, fall through to the appropriate singular-field handler
+ // below.
+ containing_oneof = true;
+ }
+
+ if (is_map_field(f)) {
+ MapIter map_it;
+ int len, size;
+ const upb_fielddef* value_field;
+
+ value_field = map_field_value(f);
+ if (!upb_fielddef_issubmsg(value_field)) continue;
+
+ zval* map_php = CACHED_PTR_TO_ZVAL_PTR(
+ DEREF(message_data(msg), offset, CACHED_VALUE*));
+ if (map_php == NULL) continue;
+
+ Map* intern = UNBOX(Map, map_php);
+ for (map_begin(map_php, &map_it TSRMLS_CC);
+ !map_done(&map_it); map_next(&map_it)) {
+ upb_value value = map_iter_value(&map_it, &len);
+ void* memory = raw_value(upb_value_memory(&value), value_field);
+#if PHP_MAJOR_VERSION < 7
+ MessageHeader *submsg = UNBOX(MessageHeader, *(zval**)memory);
+#else
+ MessageHeader *submsg =
+ (MessageHeader*)((char*)(Z_OBJ_P((zval*)memory)) -
+ XtOffsetOf(MessageHeader, std));
+#endif
+ discard_unknown_fields(submsg);
+ }
+ } else if (upb_fielddef_isseq(f)) {
+ if (!upb_fielddef_issubmsg(f)) continue;
+
+ zval* array_php = CACHED_PTR_TO_ZVAL_PTR(
+ DEREF(message_data(msg), offset, CACHED_VALUE*));
+ if (array_php == NULL) continue;
+
+ int size, i;
+ RepeatedField* intern = UNBOX(RepeatedField, array_php);
+ HashTable *ht = PHP_PROTO_HASH_OF(intern->array);
+ size = zend_hash_num_elements(ht);
+ if (size == 0) continue;
+
+ for (i = 0; i < size; i++) {
+ void* memory = repeated_field_index_native(intern, i TSRMLS_CC);
+#if PHP_MAJOR_VERSION < 7
+ MessageHeader *submsg = UNBOX(MessageHeader, *(zval**)memory);
+#else
+ MessageHeader *submsg =
+ (MessageHeader*)((char*)(Z_OBJ_P((zval*)memory)) -
+ XtOffsetOf(MessageHeader, std));
+#endif
+ discard_unknown_fields(submsg);
+ }
+ } else if (upb_fielddef_issubmsg(f)) {
+ zval* submsg_php = CACHED_PTR_TO_ZVAL_PTR(
+ DEREF(message_data(msg), offset, CACHED_VALUE*));
+ if (Z_TYPE_P(submsg_php) == IS_NULL) continue;
+ MessageHeader* submsg = UNBOX(MessageHeader, submsg_php);
+ discard_unknown_fields(submsg);
+ }
+ }
+}
+
+PHP_METHOD(Message, discardUnknownFields) {
+ MessageHeader* msg = UNBOX(MessageHeader, getThis());
+ discard_unknown_fields(msg);
+}
diff --git a/php/ext/google/protobuf/message.c b/php/ext/google/protobuf/message.c
index cf75d979..b14c1f0c 100644
--- a/php/ext/google/protobuf/message.c
+++ b/php/ext/google/protobuf/message.c
@@ -42,6 +42,7 @@ static void hex_to_binary(const char* hex, char** binary, int* binary_len);
static zend_function_entry message_methods[] = {
PHP_ME(Message, clear, NULL, ZEND_ACC_PUBLIC)
+ PHP_ME(Message, discardUnknownFields, NULL, ZEND_ACC_PUBLIC)
PHP_ME(Message, serializeToString, NULL, ZEND_ACC_PUBLIC)
PHP_ME(Message, mergeFromString, NULL, ZEND_ACC_PUBLIC)
PHP_ME(Message, serializeToJsonString, NULL, ZEND_ACC_PUBLIC)
diff --git a/php/ext/google/protobuf/package.xml b/php/ext/google/protobuf/package.xml
index 0107ebc2..626eb46b 100644
--- a/php/ext/google/protobuf/package.xml
+++ b/php/ext/google/protobuf/package.xml
@@ -10,11 +10,11 @@
<email>protobuf-opensource@google.com</email>
<active>yes</active>
</lead>
- <date>2017-12-06</date>
+ <date>2017-12-11</date>
<time>11:02:07</time>
<version>
- <release>3.5.0.1</release>
- <api>3.5.0.1</api>
+ <release>3.5.1</release>
+ <api>3.5.1</api>
</version>
<stability>
<release>stable</release>
@@ -170,6 +170,22 @@ GA release.
</release>
<release>
<version>
+ <release>3.5.0</release>
+ <api>3.5.0</api>
+ </version>
+ <stability>
+ <release>stable</release>
+ <api>stable</api>
+ </stability>
+ <date>2017-11-15</date>
+ <time>11:02:07</time>
+ <license uri="https://opensource.org/licenses/BSD-3-Clause">3-Clause BSD License</license>
+ <notes>
+GA release.
+ </notes>
+ </release>
+ <release>
+ <version>
<release>3.5.0.1</release>
<api>3.5.0.1</api>
</version>
@@ -184,5 +200,21 @@ GA release.
GA release.
</notes>
</release>
+ <release>
+ <version>
+ <release>3.5.1</release>
+ <api>3.5.1</api>
+ </version>
+ <stability>
+ <release>stable</release>
+ <api>stable</api>
+ </stability>
+ <date>2017-12-11</date>
+ <time>11:02:07</time>
+ <license uri="https://opensource.org/licenses/BSD-3-Clause">3-Clause BSD License</license>
+ <notes>
+GA release.
+ </notes>
+ </release>
</changelog>
</package>
diff --git a/php/ext/google/protobuf/protobuf.h b/php/ext/google/protobuf/protobuf.h
index 8289eb2b..f299b415 100644
--- a/php/ext/google/protobuf/protobuf.h
+++ b/php/ext/google/protobuf/protobuf.h
@@ -37,7 +37,7 @@
#include "upb.h"
#define PHP_PROTOBUF_EXTNAME "protobuf"
-#define PHP_PROTOBUF_VERSION "3.5.0.1"
+#define PHP_PROTOBUF_VERSION "3.5.1"
#define MAX_LENGTH_OF_INT64 20
#define SIZEOF_INT64 8
@@ -957,6 +957,7 @@ PHP_METHOD(Message, serializeToString);
PHP_METHOD(Message, mergeFromString);
PHP_METHOD(Message, serializeToJsonString);
PHP_METHOD(Message, mergeFromJsonString);
+PHP_METHOD(Message, discardUnknownFields);
// -----------------------------------------------------------------------------
// Type check / conversion.
diff --git a/php/src/Google/Protobuf/Any.php b/php/src/Google/Protobuf/Any.php
index 91ba4bd5..f027b05d 100644
--- a/php/src/Google/Protobuf/Any.php
+++ b/php/src/Google/Protobuf/Any.php
@@ -4,7 +4,6 @@
namespace Google\Protobuf;
-use Google\Protobuf\Internal\DescriptorPool;
use Google\Protobuf\Internal\GPBType;
use Google\Protobuf\Internal\GPBUtil;
use Google\Protobuf\Internal\Message;
@@ -217,7 +216,7 @@ class Any extends \Google\Protobuf\Internal\Message
substr($this->type_url, $url_prifix_len);
// Create message according to fully qualified name.
- $pool = DescriptorPool::getGeneratedPool();
+ $pool = \Google\Protobuf\Internal\DescriptorPool::getGeneratedPool();
$desc = $pool->getDescriptorByProtoName( ".".$fully_qualifed_name);
if (is_null($desc)) {
throw new \Exception("Class ".$fully_qualifed_name
@@ -248,7 +247,7 @@ class Any extends \Google\Protobuf\Internal\Message
$this->value = $msg->serializeToString();
// Set type url.
- $pool = DescriptorPool::getGeneratedPool();
+ $pool = \Google\Protobuf\Internal\DescriptorPool::getGeneratedPool();
$desc = $pool->getDescriptorByClassName(get_class($msg));
$fully_qualifed_name = $desc->getFullName();
$this->type_url = GPBUtil::TYPE_URL_PREFIX.substr(
@@ -262,7 +261,7 @@ class Any extends \Google\Protobuf\Internal\Message
*/
public function is($klass)
{
- $pool = DescriptorPool::getGeneratedPool();
+ $pool = \Google\Protobuf\Internal\DescriptorPool::getGeneratedPool();
$desc = $pool->getDescriptorByClassName($klass);
$fully_qualifed_name = $desc->getFullName();
$type_url = GPBUtil::TYPE_URL_PREFIX.substr(
diff --git a/php/src/Google/Protobuf/Internal/Message.php b/php/src/Google/Protobuf/Internal/Message.php
index a7a4f272..93e81c01 100644
--- a/php/src/Google/Protobuf/Internal/Message.php
+++ b/php/src/Google/Protobuf/Internal/Message.php
@@ -577,6 +577,43 @@ class Message
}
/**
+ * Clear all unknown fields previously parsed.
+ * @return null.
+ */
+ public function discardUnknownFields()
+ {
+ $this->unknown = "";
+ foreach ($this->desc->getField() as $field) {
+ if ($field->getType() != GPBType::MESSAGE) {
+ continue;
+ }
+ if ($field->isMap()) {
+ $value_field = $field->getMessageType()->getFieldByNumber(2);
+ if ($value_field->getType() != GPBType::MESSAGE) {
+ continue;
+ }
+ $getter = $field->getGetter();
+ $map = $this->$getter();
+ foreach ($map as $key => $value) {
+ $value->discardUnknownFields();
+ }
+ } else if ($field->getLabel() === GPBLabel::REPEATED) {
+ $getter = $field->getGetter();
+ $arr = $this->$getter();
+ foreach ($arr as $sub) {
+ $sub->discardUnknownFields();
+ }
+ } else if ($field->getLabel() === GPBLabel::OPTIONAL) {
+ $getter = $field->getGetter();
+ $sub = $this->$getter();
+ if (!is_null($sub)) {
+ $sub->discardUnknownFields();
+ }
+ }
+ }
+ }
+
+ /**
* Merges the contents of the specified message into current message.
*
* This method merges the contents of the specified message into the
diff --git a/php/tests/encode_decode_test.php b/php/tests/encode_decode_test.php
index 4dca922b..74d5526d 100644
--- a/php/tests/encode_decode_test.php
+++ b/php/tests/encode_decode_test.php
@@ -443,29 +443,74 @@ class EncodeDecodeTest extends TestBase
public function testUnknown()
{
+ // Test preserve unknown for varint.
$m = new TestMessage();
- $from = hex2bin('F80601');
+ $from = hex2bin('F80601'); // TODO(teboring): Add a util to encode
+ // varint for better readability
$m->mergeFromString($from);
$to = $m->serializeToString();
$this->assertSame(bin2hex($from), bin2hex($to));
+ // Test preserve unknown for 64-bit.
$m = new TestMessage();
$from = hex2bin('F9060000000000000000');
$m->mergeFromString($from);
$to = $m->serializeToString();
$this->assertSame(bin2hex($from), bin2hex($to));
+ // Test preserve unknown for length delimited.
$m = new TestMessage();
$from = hex2bin('FA0600');
$m->mergeFromString($from);
$to = $m->serializeToString();
$this->assertSame(bin2hex($from), bin2hex($to));
+ // Test preserve unknown for 32-bit.
$m = new TestMessage();
$from = hex2bin('FD0600000000');
$m->mergeFromString($from);
$to = $m->serializeToString();
$this->assertSame(bin2hex($from), bin2hex($to));
+
+ // Test discard unknown in message.
+ $m = new TestMessage();
+ $from = hex2bin('F80601');
+ $m->mergeFromString($from);
+ $m->discardUnknownFields();
+ $to = $m->serializeToString();
+ $this->assertSame("", bin2hex($to));
+
+ // Test discard unknown for singular message field.
+ $m = new TestMessage();
+ $from = hex2bin('8A0103F80601');
+ $m->mergeFromString($from);
+ $m->discardUnknownFields();
+ $to = $m->serializeToString();
+ $this->assertSame("8a0100", bin2hex($to));
+
+ // Test discard unknown for repeated message field.
+ $m = new TestMessage();
+ $from = hex2bin('FA0203F80601');
+ $m->mergeFromString($from);
+ $m->discardUnknownFields();
+ $to = $m->serializeToString();
+ $this->assertSame("fa0200", bin2hex($to));
+
+ // Test discard unknown for map message value field.
+ $m = new TestMessage();
+ $from = hex2bin("BA050708011203F80601");
+ $m->mergeFromString($from);
+ $m->discardUnknownFields();
+ $to = $m->serializeToString();
+ $this->assertSame("ba050408011200", bin2hex($to));
+
+ // Test discard unknown for singular message field.
+ $m = new TestMessage();
+ $from = hex2bin('9A0403F80601');
+ $m->mergeFromString($from);
+ $m->discardUnknownFields();
+ $to = $m->serializeToString();
+ $this->assertSame("9a0400", bin2hex($to));
}
public function testJsonEncode()
diff --git a/post_process_dist.sh b/post_process_dist.sh
index a5f95800..eb5f584d 100755
--- a/post_process_dist.sh
+++ b/post_process_dist.sh
@@ -27,7 +27,7 @@ fi
set -ex
-LANGUAGES="cpp csharp java javanano js objectivec python ruby php"
+LANGUAGES="cpp csharp java javanano js objectivec python ruby php all"
BASENAME=`basename $1 .tar.gz`
VERSION=${BASENAME:9}
diff --git a/protoc-artifacts/README.md b/protoc-artifacts/README.md
index 17eb77f8..fc8ef586 100644
--- a/protoc-artifacts/README.md
+++ b/protoc-artifacts/README.md
@@ -70,9 +70,9 @@ support. DO NOT close the staging repository until you have done the
deployment for all platforms. Currently the following platforms are supported:
- Linux (x86_32 and x86_64)
- Windows (x86_32 and x86_64) with
- - Cygwin64 with MinGW compilers (x86_64)
- - MSYS with MinGW32 (x86_32)
- - Cross compile in Linux with MinGW-w64 (x86_32, x86_64)
+ - Cygwin64 with MinGW compilers (x86_64)
+ - MSYS with MinGW32 (x86_32)
+ - Cross compile in Linux with MinGW-w64 (x86_32, x86_64)
- MacOSX (x86_32 and x86_64)
As for MSYS2/MinGW64 for Windows: protoc will build, but it insists on
@@ -174,8 +174,9 @@ stored:
### Tested build environments
We have successfully built artifacts on the following environments:
- Linux x86_32 and x86_64:
- - Centos 6.6 (within Docker 1.6.1)
- - Ubuntu 14.04.2 64-bit
+ - Centos 6.6 (within Docker 1.6.1)
+ - Ubuntu 14.04.2 64-bit
+- Linux aarch_64: Cross compiled with `g++-aarch64-linux-gnu` on Ubuntu 14.04.2 64-bit
- Windows x86_32: MSYS with ``mingw32-gcc-g++ 4.8.1-4`` on Windows 7 64-bit
- Windows x86_32: Cross compile with ``i686-w64-mingw32-g++ 4.8.2`` on Ubuntu 14.04.2 64-bit
- Windows x86_64: Cygwin64 with ``mingw64-x86_64-gcc-g++ 4.8.3-1`` on Windows 7 64-bit
diff --git a/python/google/protobuf/__init__.py b/python/google/protobuf/__init__.py
index 76cb51c3..0f4d63b0 100755
--- a/python/google/protobuf/__init__.py
+++ b/python/google/protobuf/__init__.py
@@ -30,7 +30,7 @@
# Copyright 2007 Google Inc. All Rights Reserved.
-__version__ = '3.5.0'
+__version__ = '3.5.0.post1'
if __name__ != '__main__':
try:
diff --git a/python/google/protobuf/descriptor_pool.py b/python/google/protobuf/descriptor_pool.py
index cb7146b6..f4c533a4 100644
--- a/python/google/protobuf/descriptor_pool.py
+++ b/python/google/protobuf/descriptor_pool.py
@@ -127,6 +127,9 @@ class DescriptorPool(object):
self._service_descriptors = {}
self._file_descriptors = {}
self._toplevel_extensions = {}
+ # TODO(jieluo): Remove _file_desc_by_toplevel_extension after
+ # maybe year 2020 for compatibility issue (with 3.4.1 only).
+ self._file_desc_by_toplevel_extension = {}
# We store extensions in two two-level mappings: The first key is the
# descriptor of the message being extended, the second key is the extension
# full name or its tag number.
@@ -252,6 +255,12 @@ class DescriptorPool(object):
"""
self._AddFileDescriptor(file_desc)
+ # TODO(jieluo): This is a temporary solution for FieldDescriptor.file.
+ # FieldDescriptor.file is added in code gen. Remove this solution after
+ # maybe 2020 for compatibility reason (with 3.4.1 only).
+ for extension in file_desc.extensions_by_name.values():
+ self._file_desc_by_toplevel_extension[
+ extension.full_name] = file_desc
def _AddFileDescriptor(self, file_desc):
"""Adds a FileDescriptor to the pool, non-recursively.
@@ -331,7 +340,7 @@ class DescriptorPool(object):
pass
try:
- return self._toplevel_extensions[symbol].file
+ return self._file_desc_by_toplevel_extension[symbol]
except KeyError:
pass
diff --git a/ruby/ext/google/protobuf_c/defs.c b/ruby/ext/google/protobuf_c/defs.c
index 34d9663a..d9d2ebac 100644
--- a/ruby/ext/google/protobuf_c/defs.c
+++ b/ruby/ext/google/protobuf_c/defs.c
@@ -76,7 +76,7 @@ static upb_enumdef* check_enum_notfrozen(const upb_enumdef* def) {
// -----------------------------------------------------------------------------
#define DEFINE_CLASS(name, string_name) \
- VALUE c ## name; \
+ VALUE c ## name = Qnil; \
const rb_data_type_t _ ## name ## _type = { \
string_name, \
{ name ## _mark, name ## _free, NULL }, \
@@ -126,11 +126,11 @@ void DescriptorPool_register(VALUE module) {
rb_define_method(klass, "lookup", DescriptorPool_lookup, 1);
rb_define_singleton_method(klass, "generated_pool",
DescriptorPool_generated_pool, 0);
- cDescriptorPool = klass;
rb_gc_register_address(&cDescriptorPool);
+ cDescriptorPool = klass;
- generated_pool = rb_class_new_instance(0, NULL, klass);
rb_gc_register_address(&generated_pool);
+ generated_pool = rb_class_new_instance(0, NULL, klass);
}
static void add_descriptor_to_pool(DescriptorPool* self,
@@ -299,8 +299,8 @@ void Descriptor_register(VALUE module) {
rb_define_method(klass, "name", Descriptor_name, 0);
rb_define_method(klass, "name=", Descriptor_name_set, 1);
rb_include_module(klass, rb_mEnumerable);
- cDescriptor = klass;
rb_gc_register_address(&cDescriptor);
+ cDescriptor = klass;
}
/*
@@ -518,8 +518,8 @@ void FieldDescriptor_register(VALUE module) {
rb_define_method(klass, "subtype", FieldDescriptor_subtype, 0);
rb_define_method(klass, "get", FieldDescriptor_get, 1);
rb_define_method(klass, "set", FieldDescriptor_set, 2);
- cFieldDescriptor = klass;
rb_gc_register_address(&cFieldDescriptor);
+ cFieldDescriptor = klass;
}
/*
@@ -916,8 +916,8 @@ void OneofDescriptor_register(VALUE module) {
rb_define_method(klass, "add_field", OneofDescriptor_add_field, 1);
rb_define_method(klass, "each", OneofDescriptor_each, 0);
rb_include_module(klass, rb_mEnumerable);
- cOneofDescriptor = klass;
rb_gc_register_address(&cOneofDescriptor);
+ cOneofDescriptor = klass;
}
/*
@@ -1037,8 +1037,8 @@ void EnumDescriptor_register(VALUE module) {
rb_define_method(klass, "each", EnumDescriptor_each, 0);
rb_define_method(klass, "enummodule", EnumDescriptor_enummodule, 0);
rb_include_module(klass, rb_mEnumerable);
- cEnumDescriptor = klass;
rb_gc_register_address(&cEnumDescriptor);
+ cEnumDescriptor = klass;
}
/*
@@ -1202,8 +1202,8 @@ void MessageBuilderContext_register(VALUE module) {
rb_define_method(klass, "repeated", MessageBuilderContext_repeated, -1);
rb_define_method(klass, "map", MessageBuilderContext_map, -1);
rb_define_method(klass, "oneof", MessageBuilderContext_oneof, 1);
- cMessageBuilderContext = klass;
rb_gc_register_address(&cMessageBuilderContext);
+ cMessageBuilderContext = klass;
}
/*
@@ -1491,8 +1491,8 @@ void OneofBuilderContext_register(VALUE module) {
rb_define_method(klass, "initialize",
OneofBuilderContext_initialize, 2);
rb_define_method(klass, "optional", OneofBuilderContext_optional, -1);
- cOneofBuilderContext = klass;
rb_gc_register_address(&cOneofBuilderContext);
+ cOneofBuilderContext = klass;
}
/*
@@ -1569,8 +1569,8 @@ void EnumBuilderContext_register(VALUE module) {
rb_define_method(klass, "initialize",
EnumBuilderContext_initialize, 1);
rb_define_method(klass, "value", EnumBuilderContext_value, 2);
- cEnumBuilderContext = klass;
rb_gc_register_address(&cEnumBuilderContext);
+ cEnumBuilderContext = klass;
}
/*
@@ -1645,8 +1645,8 @@ void Builder_register(VALUE module) {
rb_define_method(klass, "add_enum", Builder_add_enum, 1);
rb_define_method(klass, "initialize", Builder_initialize, 0);
rb_define_method(klass, "finalize_to_pool", Builder_finalize_to_pool, 1);
- cBuilder = klass;
rb_gc_register_address(&cBuilder);
+ cBuilder = klass;
}
/*
diff --git a/ruby/ext/google/protobuf_c/encode_decode.c b/ruby/ext/google/protobuf_c/encode_decode.c
index d1b6e89e..12080d03 100644
--- a/ruby/ext/google/protobuf_c/encode_decode.c
+++ b/ruby/ext/google/protobuf_c/encode_decode.c
@@ -1305,3 +1305,91 @@ VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
}
}
+static void discard_unknown(VALUE msg_rb, const Descriptor* desc) {
+ MessageHeader* msg;
+ upb_msg_field_iter it;
+
+ TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
+
+ stringsink* unknown = msg->unknown_fields;
+ if (unknown != NULL) {
+ stringsink_uninit(unknown);
+ msg->unknown_fields = NULL;
+ }
+
+ for (upb_msg_field_begin(&it, desc->msgdef);
+ !upb_msg_field_done(&it);
+ upb_msg_field_next(&it)) {
+ upb_fielddef *f = upb_msg_iter_field(&it);
+ uint32_t offset =
+ desc->layout->fields[upb_fielddef_index(f)].offset +
+ sizeof(MessageHeader);
+
+ if (upb_fielddef_containingoneof(f)) {
+ uint32_t oneof_case_offset =
+ desc->layout->fields[upb_fielddef_index(f)].case_offset +
+ sizeof(MessageHeader);
+ // For a oneof, check that this field is actually present -- skip all the
+ // below if not.
+ if (DEREF(msg, oneof_case_offset, uint32_t) !=
+ upb_fielddef_number(f)) {
+ continue;
+ }
+ // Otherwise, fall through to the appropriate singular-field handler
+ // below.
+ }
+
+ if (!upb_fielddef_issubmsg(f)) {
+ continue;
+ }
+
+ if (is_map_field(f)) {
+ if (!upb_fielddef_issubmsg(map_field_value(f))) continue;
+ VALUE map = DEREF(msg, offset, VALUE);
+ if (map == Qnil) continue;
+ Map_iter map_it;
+ for (Map_begin(map, &map_it); !Map_done(&map_it); Map_next(&map_it)) {
+ VALUE submsg = Map_iter_value(&map_it);
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
+ discard_unknown(submsg, subdesc);
+ }
+ } else if (upb_fielddef_isseq(f)) {
+ VALUE ary = DEREF(msg, offset, VALUE);
+ if (ary == Qnil) continue;
+ int size = NUM2INT(RepeatedField_length(ary));
+ for (int i = 0; i < size; i++) {
+ void* memory = RepeatedField_index_native(ary, i);
+ VALUE submsg = *((VALUE *)memory);
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
+ discard_unknown(submsg, subdesc);
+ }
+ } else {
+ VALUE submsg = DEREF(msg, offset, VALUE);
+ if (submsg == Qnil) continue;
+ VALUE descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
+ const Descriptor* subdesc = ruby_to_Descriptor(descriptor);
+ discard_unknown(submsg, subdesc);
+ }
+ }
+}
+
+/*
+ * call-seq:
+ * Google::Protobuf.discard_unknown(msg)
+ *
+ * Discard unknown fields in the given message object and recursively discard
+ * unknown fields in submessages.
+ */
+VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb) {
+ VALUE klass = CLASS_OF(msg_rb);
+ VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
+ Descriptor* desc = ruby_to_Descriptor(descriptor);
+ if (klass == cRepeatedField || klass == cMap) {
+ rb_raise(rb_eArgError, "Expected proto msg for discard unknown.");
+ } else {
+ discard_unknown(msg_rb, desc);
+ }
+ return Qnil;
+}
diff --git a/ruby/ext/google/protobuf_c/map.c b/ruby/ext/google/protobuf_c/map.c
index 26e22dc7..8c2f6424 100644
--- a/ruby/ext/google/protobuf_c/map.c
+++ b/ruby/ext/google/protobuf_c/map.c
@@ -825,8 +825,8 @@ VALUE Map_iter_value(Map_iter* iter) {
void Map_register(VALUE module) {
VALUE klass = rb_define_class_under(module, "Map", rb_cObject);
rb_define_alloc_func(klass, Map_alloc);
- cMap = klass;
rb_gc_register_address(&cMap);
+ cMap = klass;
rb_define_method(klass, "initialize", Map_init, -1);
rb_define_method(klass, "each", Map_each, 0);
diff --git a/ruby/ext/google/protobuf_c/protobuf.c b/ruby/ext/google/protobuf_c/protobuf.c
index c7750c44..db696426 100644
--- a/ruby/ext/google/protobuf_c/protobuf.c
+++ b/ruby/ext/google/protobuf_c/protobuf.c
@@ -103,6 +103,8 @@ void Init_protobuf_c() {
cError = rb_const_get(protobuf, rb_intern("Error"));
cParseError = rb_const_get(protobuf, rb_intern("ParseError"));
+ rb_define_singleton_method(protobuf, "discard_unknown",
+ Google_Protobuf_discard_unknown, 1);
rb_define_singleton_method(protobuf, "deep_copy",
Google_Protobuf_deep_copy, 1);
diff --git a/ruby/ext/google/protobuf_c/protobuf.h b/ruby/ext/google/protobuf_c/protobuf.h
index 1291ac59..5266aa8d 100644
--- a/ruby/ext/google/protobuf_c/protobuf.h
+++ b/ruby/ext/google/protobuf_c/protobuf.h
@@ -515,6 +515,7 @@ VALUE Message_encode(VALUE klass, VALUE msg_rb);
VALUE Message_decode_json(VALUE klass, VALUE data);
VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass);
+VALUE Google_Protobuf_discard_unknown(VALUE self, VALUE msg_rb);
VALUE Google_Protobuf_deep_copy(VALUE self, VALUE obj);
VALUE build_module_from_enumdesc(EnumDescriptor* enumdef);
diff --git a/ruby/ext/google/protobuf_c/repeated_field.c b/ruby/ext/google/protobuf_c/repeated_field.c
index 1c651c19..c6620ee6 100644
--- a/ruby/ext/google/protobuf_c/repeated_field.c
+++ b/ruby/ext/google/protobuf_c/repeated_field.c
@@ -626,8 +626,8 @@ void RepeatedField_register(VALUE module) {
VALUE klass = rb_define_class_under(
module, "RepeatedField", rb_cObject);
rb_define_alloc_func(klass, RepeatedField_alloc);
- cRepeatedField = klass;
rb_gc_register_address(&cRepeatedField);
+ cRepeatedField = klass;
rb_define_method(klass, "initialize",
RepeatedField_init, -1);
diff --git a/ruby/tests/encode_decode_test.rb b/ruby/tests/encode_decode_test.rb
new file mode 100644
index 00000000..09581ab0
--- /dev/null
+++ b/ruby/tests/encode_decode_test.rb
@@ -0,0 +1,63 @@
+#!/usr/bin/ruby
+
+# generated_code.rb is in the same directory as this test.
+$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
+
+require 'generated_code_pb'
+require 'test/unit'
+
+def hex2bin(s)
+ s.scan(/../).map { |x| x.hex.chr }.join
+end
+
+class EncodeDecodeTest < Test::Unit::TestCase
+ def test_discard_unknown
+ # Test discard unknown in message.
+ unknown_msg = A::B::C::TestUnknown.new(:unknown_field => 1)
+ from = A::B::C::TestUnknown.encode(unknown_msg)
+ m = A::B::C::TestMessage.decode(from)
+ Google::Protobuf.discard_unknown(m)
+ to = A::B::C::TestMessage.encode(m)
+ assert_equal '', to
+
+ # Test discard unknown for singular message field.
+ unknown_msg = A::B::C::TestUnknown.new(
+ :optional_unknown =>
+ A::B::C::TestUnknown.new(:unknown_field => 1))
+ from = A::B::C::TestUnknown.encode(unknown_msg)
+ m = A::B::C::TestMessage.decode(from)
+ Google::Protobuf.discard_unknown(m)
+ to = A::B::C::TestMessage.encode(m.optional_msg)
+ assert_equal '', to
+
+ # Test discard unknown for repeated message field.
+ unknown_msg = A::B::C::TestUnknown.new(
+ :repeated_unknown =>
+ [A::B::C::TestUnknown.new(:unknown_field => 1)])
+ from = A::B::C::TestUnknown.encode(unknown_msg)
+ m = A::B::C::TestMessage.decode(from)
+ Google::Protobuf.discard_unknown(m)
+ to = A::B::C::TestMessage.encode(m.repeated_msg[0])
+ assert_equal '', to
+
+ # Test discard unknown for map value message field.
+ unknown_msg = A::B::C::TestUnknown.new(
+ :map_unknown =>
+ {"" => A::B::C::TestUnknown.new(:unknown_field => 1)})
+ from = A::B::C::TestUnknown.encode(unknown_msg)
+ m = A::B::C::TestMessage.decode(from)
+ Google::Protobuf.discard_unknown(m)
+ to = A::B::C::TestMessage.encode(m.map_string_msg[''])
+ assert_equal '', to
+
+ # Test discard unknown for oneof message field.
+ unknown_msg = A::B::C::TestUnknown.new(
+ :oneof_unknown =>
+ A::B::C::TestUnknown.new(:unknown_field => 1))
+ from = A::B::C::TestUnknown.encode(unknown_msg)
+ m = A::B::C::TestMessage.decode(from)
+ Google::Protobuf.discard_unknown(m)
+ to = A::B::C::TestMessage.encode(m.oneof_msg)
+ assert_equal '', to
+ end
+end
diff --git a/ruby/tests/generated_code.proto b/ruby/tests/generated_code.proto
index 62fd83ed..3b934bd6 100644
--- a/ruby/tests/generated_code.proto
+++ b/ruby/tests/generated_code.proto
@@ -57,6 +57,9 @@ message TestMessage {
}
NestedMessage nested_message = 80;
+
+ // Reserved for non-existing field test.
+ // int32 non_exist = 89;
}
enum TestEnum {
@@ -65,3 +68,13 @@ enum TestEnum {
B = 2;
C = 3;
}
+
+message TestUnknown {
+ TestUnknown optional_unknown = 11;
+ repeated TestUnknown repeated_unknown = 31;
+ oneof my_oneof {
+ TestUnknown oneof_unknown = 51;
+ }
+ map<string, TestUnknown> map_unknown = 67;
+ int32 unknown_field = 89;
+}
diff --git a/src/google/protobuf/stubs/io_win32.cc b/src/google/protobuf/stubs/io_win32.cc
index fa2cb8b1..ad2d2d26 100644
--- a/src/google/protobuf/stubs/io_win32.cc
+++ b/src/google/protobuf/stubs/io_win32.cc
@@ -30,10 +30,11 @@
// Author: laszlocsomor@google.com (Laszlo Csomor)
//
-// Implementation for long-path-aware open/mkdir/etc. on Windows.
+// Implementation for long-path-aware open/mkdir/access/etc. on Windows, as well
+// as for the supporting utility functions.
//
// These functions convert the input path to an absolute Windows path
-// with "\\?\" prefix if necessary, then pass that to _wopen/_wmkdir/etc.
+// with "\\?\" prefix, then pass that to _wopen/_wmkdir/_waccess/etc.
// (declared in <io.h>) respectively. This allows working with files/directories
// whose paths are longer than MAX_PATH (260 chars).
//
@@ -59,7 +60,6 @@
#include <google/protobuf/stubs/io_win32.h>
#include <google/protobuf/stubs/scoped_ptr.h>
-#include <cassert>
#include <memory>
#include <sstream>
#include <string>
@@ -89,6 +89,11 @@ struct CharTraits<wchar_t> {
static bool is_alpha(wchar_t ch) { return iswalpha(ch); }
};
+template <typename char_type>
+bool null_or_empty(const char_type* s) {
+ return s == nullptr || *s == 0;
+}
+
// Returns true if the path starts with a drive letter, e.g. "c:".
// Note that this won't check for the "\" after the drive letter, so this also
// returns true for "c:foo" (which is "c:\${PWD}\foo").
@@ -121,16 +126,7 @@ bool is_drive_relative(const char_type* path) {
return has_drive_letter(path) && (path[2] == 0 || !is_separator(path[2]));
}
-template <typename char_type>
-void replace_directory_separators(char_type* p) {
- for (; *p; ++p) {
- if (*p == '/') {
- *p = '\\';
- }
- }
-}
-
-string join_paths(const string& path1, const string& path2) {
+wstring join_paths(const wstring& path1, const wstring& path2) {
if (path1.empty() || is_path_absolute(path2.c_str()) ||
has_longpath_prefix(path2.c_str())) {
return path2;
@@ -144,23 +140,23 @@ string join_paths(const string& path1, const string& path2) {
: (path1 + path2);
} else {
return is_separator(path2[0]) ? (path1 + path2)
- : (path1 + '\\' + path2);
+ : (path1 + L'\\' + path2);
}
}
-string normalize(string path) {
+wstring normalize(wstring path) {
if (has_longpath_prefix(path.c_str())) {
path = path.substr(4);
}
- static const string dot(".");
- static const string dotdot("..");
+ static const wstring dot(L".");
+ static const wstring dotdot(L"..");
- std::vector<string> segments;
+ std::vector<wstring> segments;
int segment_start = -1;
// Find the path segments in `path` (separated by "/").
for (int i = 0;; ++i) {
- if (!is_separator(path[i]) && path[i] != '\0') {
+ if (!is_separator(path[i]) && path[i] != L'\0') {
// The current character does not end a segment, so start one unless it's
// already started.
if (segment_start < 0) {
@@ -169,7 +165,7 @@ string normalize(string path) {
} else if (segment_start >= 0 && i > segment_start) {
// The current character is "/" or "\0", so this ends a segment.
// Add that to `segments` if there's anything to add; handle "." and "..".
- string segment(path, segment_start, i - segment_start);
+ wstring segment(path, segment_start, i - segment_start);
segment_start = -1;
if (segment == dotdot) {
if (!segments.empty() &&
@@ -180,7 +176,7 @@ string normalize(string path) {
segments.push_back(segment);
}
}
- if (path[i] == '\0') {
+ if (path[i] == L'\0') {
break;
}
}
@@ -189,64 +185,62 @@ string normalize(string path) {
// form of it, e.g. "c:\..").
if (segments.size() == 1 && segments[0].size() == 2 &&
has_drive_letter(segments[0].c_str())) {
- return segments[0] + '\\';
+ return segments[0] + L'\\';
}
// Join all segments.
bool first = true;
- std::ostringstream result;
+ std::wstringstream result;
for (int i = 0; i < segments.size(); ++i) {
if (!first) {
- result << '\\';
+ result << L'\\';
}
first = false;
result << segments[i];
}
// Preserve trailing separator if the input contained it.
if (!path.empty() && is_separator(path[path.size() - 1])) {
- result << '\\';
+ result << L'\\';
}
return result.str();
}
-WCHAR* as_wstring(const string& s) {
- int len = ::MultiByteToWideChar(CP_UTF8, 0, s.c_str(), s.size(), NULL, 0);
- WCHAR* result = new WCHAR[len + 1];
- ::MultiByteToWideChar(CP_UTF8, 0, s.c_str(), s.size(), result, len + 1);
- result[len] = 0;
- return result;
-}
-
-void as_wchar_path(const string& path, wstring* wchar_path) {
- scoped_array<WCHAR> wbuf(as_wstring(path));
- replace_directory_separators(wbuf.get());
- wchar_path->assign(wbuf.get());
-}
-
-bool as_windows_path(const string& path, wstring* result) {
- if (path.empty()) {
+bool as_windows_path(const char* path, wstring* result) {
+ if (null_or_empty(path)) {
result->clear();
return true;
}
- if (is_separator(path[0]) || is_drive_relative(path.c_str())) {
+ wstring wpath;
+ if (!strings::utf8_to_wcs(path, &wpath)) {
+ return false;
+ }
+ if (has_longpath_prefix(wpath.c_str())) {
+ *result = wpath;
+ return true;
+ }
+ if (is_separator(path[0]) || is_drive_relative(path)) {
return false;
}
- string mutable_path = path;
- if (!is_path_absolute(mutable_path.c_str()) &&
- !has_longpath_prefix(mutable_path.c_str())) {
- char cwd[MAX_PATH];
- ::GetCurrentDirectoryA(MAX_PATH, cwd);
- mutable_path = join_paths(cwd, mutable_path);
+
+ if (!is_path_absolute(wpath.c_str())) {
+ int size = ::GetCurrentDirectoryW(0, NULL);
+ if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+ return false;
+ }
+ scoped_array<WCHAR> wcwd(new WCHAR[size]);
+ ::GetCurrentDirectoryW(size, wcwd.get());
+ wpath = join_paths(wcwd.get(), wpath);
}
- as_wchar_path(normalize(mutable_path), result);
- if (!has_longpath_prefix(result->c_str())) {
+ wpath = normalize(wpath);
+ if (!has_longpath_prefix(wpath.c_str())) {
// Add the "\\?\" prefix unconditionally. This way we prevent the Win32 API
// from processing the path and "helpfully" removing trailing dots from the
// path, for example.
// See https://github.com/bazelbuild/bazel/issues/2935
- *result = wstring(L"\\\\?\\") + *result;
+ wpath = wstring(L"\\\\?\\") + wpath;
}
+ *result = wpath;
return true;
}
@@ -319,13 +313,21 @@ int stat(const char* path, struct _stat* buffer) {
FILE* fopen(const char* path, const char* mode) {
#ifdef SUPPORT_LONGPATHS
+ if (null_or_empty(path)) {
+ errno = EINVAL;
+ return NULL;
+ }
wstring wpath;
if (!as_windows_path(path, &wpath)) {
errno = ENOENT;
return NULL;
}
- scoped_array<WCHAR> wmode(as_wstring(mode));
- return ::_wfopen(wpath.c_str(), wmode.get());
+ wstring wmode;
+ if (!strings::utf8_to_wcs(mode, &wmode)) {
+ errno = EINVAL;
+ return NULL;
+ }
+ return ::_wfopen(wpath.c_str(), wmode.c_str());
#else
return ::fopen(path, mode);
#endif
@@ -347,16 +349,65 @@ int write(int fd, const void* buffer, size_t size) {
return ::_write(fd, buffer, size);
}
-wstring testonly_path_to_winpath(const string& path) {
+wstring testonly_utf8_to_winpath(const char* path) {
wstring wpath;
- as_windows_path(path, &wpath);
- return wpath;
+ return as_windows_path(path, &wpath) ? wpath : wstring();
+}
+
+namespace strings {
+
+bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) {
+ if (null_or_empty(s)) {
+ out->clear();
+ return true;
+ }
+ BOOL usedDefaultChar = FALSE;
+ SetLastError(0);
+ int size = WideCharToMultiByte(
+ outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0, NULL,
+ outUtf8 ? NULL : &usedDefaultChar);
+ if ((size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+ || usedDefaultChar) {
+ return false;
+ }
+ scoped_array<CHAR> astr(new CHAR[size]);
+ WideCharToMultiByte(
+ outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, NULL, NULL);
+ out->assign(astr.get());
+ return true;
+}
+
+bool mbs_to_wcs(const char* s, wstring* out, bool inUtf8) {
+ if (null_or_empty(s)) {
+ out->clear();
+ return true;
+ }
+
+ SetLastError(0);
+ int size =
+ MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0);
+ if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+ return false;
+ }
+ scoped_array<WCHAR> wstr(new WCHAR[size]);
+ MultiByteToWideChar(
+ inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, wstr.get(), size + 1);
+ out->assign(wstr.get());
+ return true;
}
+bool utf8_to_wcs(const char* input, wstring* out) {
+ return mbs_to_wcs(input, out, true);
+}
+
+bool wcs_to_utf8(const wchar_t* input, string* out) {
+ return wcs_to_mbs(input, out, true);
+}
+
+} // namespace strings
} // namespace win32
} // namespace internal
} // namespace protobuf
} // namespace google
#endif // defined(_WIN32)
-
diff --git a/src/google/protobuf/stubs/io_win32.h b/src/google/protobuf/stubs/io_win32.h
index 53160089..9e17d253 100644
--- a/src/google/protobuf/stubs/io_win32.h
+++ b/src/google/protobuf/stubs/io_win32.h
@@ -69,8 +69,25 @@ LIBPROTOBUF_EXPORT int read(int fd, void* buffer, size_t size);
LIBPROTOBUF_EXPORT int setmode(int fd, int mode);
LIBPROTOBUF_EXPORT int stat(const char* path, struct _stat* buffer);
LIBPROTOBUF_EXPORT int write(int fd, const void* buffer, size_t size);
-LIBPROTOBUF_EXPORT std::wstring testonly_path_to_winpath(
- const std::string& path);
+LIBPROTOBUF_EXPORT std::wstring testonly_utf8_to_winpath(const char* path);
+
+namespace strings {
+
+// Convert from UTF-16 to Active-Code-Page-encoded or to UTF-8-encoded text.
+LIBPROTOBUF_EXPORT bool wcs_to_mbs(
+ const wchar_t* s, std::string* out, bool outUtf8);
+
+// Convert from Active-Code-Page-encoded or UTF-8-encoded text to UTF-16.
+LIBPROTOBUF_EXPORT bool mbs_to_wcs(
+ const char* s, std::wstring* out, bool inUtf8);
+
+// Convert from UTF-8-encoded text to UTF-16.
+LIBPROTOBUF_EXPORT bool utf8_to_wcs(const char* input, std::wstring* out);
+
+// Convert from UTF-16-encoded text to UTF-8.
+LIBPROTOBUF_EXPORT bool wcs_to_utf8(const wchar_t* input, std::string* out);
+
+} // namespace strings
} // namespace win32
} // namespace internal
diff --git a/src/google/protobuf/stubs/io_win32_unittest.cc b/src/google/protobuf/stubs/io_win32_unittest.cc
index ce6f7162..b216aece 100644
--- a/src/google/protobuf/stubs/io_win32_unittest.cc
+++ b/src/google/protobuf/stubs/io_win32_unittest.cc
@@ -30,7 +30,8 @@
// Author: laszlocsomor@google.com (Laszlo Csomor)
//
-// Unit tests for long-path-aware open/mkdir/access on Windows.
+// Unit tests for long-path-aware open/mkdir/access/etc. on Windows, as well as
+// for the supporting utility functions.
//
// This file is only used on Windows, it's empty on other platforms.
@@ -48,7 +49,6 @@
#include <google/protobuf/stubs/io_win32.h>
#include <google/protobuf/stubs/scoped_ptr.h>
-#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
#include <memory>
@@ -61,6 +61,27 @@ namespace internal {
namespace win32 {
namespace {
+const char kUtf8Text[] = {
+ 'h', 'i', ' ',
+ // utf-8: 11010000 10011111, utf-16: 100 0001 1111 = 0x041F
+ 0xd0, 0x9f,
+ // utf-8: 11010001 10000000, utf-16: 100 0100 0000 = 0x0440
+ 0xd1, 0x80,
+ // utf-8: 11010000 10111000, utf-16: 100 0011 1000 = 0x0438
+ 0xd0, 0xb8,
+ // utf-8: 11010000 10110010, utf-16: 100 0011 0010 = 0x0432
+ 0xd0, 0xb2,
+ // utf-8: 11010000 10110101, utf-16: 100 0011 0101 = 0x0435
+ 0xd0, 0xb5,
+ // utf-8: 11010001 10000010, utf-16: 100 0100 0010 = 0x0442
+ 0xd1, 0x82, 0
+};
+
+const wchar_t kUtf16Text[] = {
+ L'h', L'i', L' ',
+ L'\x41f', L'\x440', L'\x438', L'\x432', L'\x435', L'\x442', 0
+};
+
using std::string;
using std::wstring;
@@ -73,6 +94,7 @@ class IoWin32Test : public ::testing::Test {
bool CreateAllUnder(wstring path);
bool DeleteAllUnder(wstring path);
+ WCHAR working_directory[MAX_PATH];
string test_tmpdir;
wstring wtest_tmpdir;
};
@@ -89,71 +111,88 @@ void StripTrailingSlashes(string* str) {
for (; i >= 0 && ((*str)[i] == '/' || (*str)[i] == '\\'); --i) {}
str->resize(i+1);
}
+
+bool GetEnvVarAsUtf8(const WCHAR* name, string* result) {
+ DWORD size = ::GetEnvironmentVariableW(name, NULL, 0);
+ if (size > 0 && GetLastError() != ERROR_ENVVAR_NOT_FOUND) {
+ scoped_array<WCHAR> wcs(new WCHAR[size]);
+ ::GetEnvironmentVariableW(name, wcs.get(), size);
+ // GetEnvironmentVariableA retrieves an Active-Code-Page-encoded text which
+ // we'd first need to convert to UTF-16 then to UTF-8, because there seems
+ // to be no API function to do that conversion directly.
+ // GetEnvironmentVariableW retrieves an UTF-16-encoded text, which we need
+ // to convert to UTF-8.
+ return strings::wcs_to_utf8(wcs.get(), result);
+ } else {
+ return false;
+ }
+}
+
+bool GetCwdAsUtf8(string* result) {
+ DWORD size = ::GetCurrentDirectoryW(0, NULL);
+ if (size > 0) {
+ scoped_array<WCHAR> wcs(new WCHAR[size]);
+ ::GetCurrentDirectoryW(size, wcs.get());
+ // GetCurrentDirectoryA retrieves an Active-Code-Page-encoded text which
+ // we'd first need to convert to UTF-16 then to UTF-8, because there seems
+ // to be no API function to do that conversion directly.
+ // GetCurrentDirectoryW retrieves an UTF-16-encoded text, which we need
+ // to convert to UTF-8.
+ return strings::wcs_to_utf8(wcs.get(), result);
+ } else {
+ return false;
+ }
+}
+
} // namespace
void IoWin32Test::SetUp() {
- test_tmpdir = string(TestTempDir());
+ test_tmpdir.clear();
wtest_tmpdir.clear();
- if (test_tmpdir.empty()) {
- const char* test_tmpdir_env = getenv("TEST_TMPDIR");
- if (test_tmpdir_env != NULL && *test_tmpdir_env) {
- test_tmpdir = string(test_tmpdir_env);
- }
-
- // Only Bazel defines TEST_TMPDIR, CMake does not, so look for other
- // suitable environment variables.
- if (test_tmpdir.empty()) {
- static const char* names[] = {"TEMP", "TMP"};
- for (int i = 0; i < sizeof(names)/sizeof(names[0]); ++i) {
- const char* name = names[i];
- test_tmpdir_env = getenv(name);
- if (test_tmpdir_env != NULL && *test_tmpdir_env) {
- test_tmpdir = string(test_tmpdir_env);
- break;
- }
- }
- }
+ EXPECT_GT(::GetCurrentDirectoryW(MAX_PATH, working_directory), 0);
- // No other temp directory was found. Use the current director
- if (test_tmpdir.empty()) {
- char buffer[MAX_PATH];
- // Use GetCurrentDirectoryA instead of GetCurrentDirectoryW, because the
- // current working directory must always be shorter than MAX_PATH, even
- // with
- // "\\?\" prefix (except on Windows 10 version 1607 and beyond, after
- // opting in to long paths by default [1]).
- //
- // [1] https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath
- DWORD result = ::GetCurrentDirectoryA(MAX_PATH, buffer);
- if (result > 0) {
- test_tmpdir = string(buffer);
- } else {
- // Using assertions in SetUp/TearDown seems to confuse the test
- // framework, so just leave the member variables empty in case of
- // failure.
- GOOGLE_CHECK_OK(false);
- return;
- }
- }
+ string tmp;
+ bool ok = false;
+ if (!ok) {
+ // Bazel sets this environment variable when it runs tests.
+ ok = GetEnvVarAsUtf8(L"TEST_TMPDIR", &tmp);
}
-
- StripTrailingSlashes(&test_tmpdir);
- test_tmpdir += "\\io_win32_unittest.tmp";
-
- // CreateDirectoryA's limit is 248 chars, see MSDN.
- // https://msdn.microsoft.com/en-us/library/windows/desktop/aa363855(v=vs.85).aspx
- wtest_tmpdir = testonly_path_to_winpath(test_tmpdir);
- if (!DeleteAllUnder(wtest_tmpdir) || !CreateAllUnder(wtest_tmpdir)) {
- GOOGLE_CHECK_OK(false);
- test_tmpdir.clear();
- wtest_tmpdir.clear();
+ if (!ok) {
+ // Bazel 0.8.0 sets this environment for every build and test action.
+ ok = GetEnvVarAsUtf8(L"TEMP", &tmp);
}
+ if (!ok) {
+ // Bazel 0.8.0 sets this environment for every build and test action.
+ ok = GetEnvVarAsUtf8(L"TMP", &tmp);
+ }
+ if (!ok) {
+ // Fall back to using the current directory.
+ ok = GetCwdAsUtf8(&tmp);
+ }
+ if (!ok || tmp.empty()) {
+ FAIL() << "Cannot find a temp directory.";
+ }
+
+ StripTrailingSlashes(&tmp);
+ std::stringstream result;
+ // Deleting files and directories is asynchronous on Windows, and if TearDown
+ // just deleted the previous temp directory, sometimes we cannot recreate the
+ // same directory.
+ // Use a counter so every test method gets its own temp directory.
+ static unsigned int counter = 0;
+ result << tmp << "\\w32tst" << counter++ << ".tmp";
+ test_tmpdir = result.str();
+ wtest_tmpdir = testonly_utf8_to_winpath(test_tmpdir.c_str());
+ ASSERT_FALSE(wtest_tmpdir.empty());
+ ASSERT_TRUE(DeleteAllUnder(wtest_tmpdir));
+ ASSERT_TRUE(CreateAllUnder(wtest_tmpdir));
}
void IoWin32Test::TearDown() {
if (!wtest_tmpdir.empty()) {
DeleteAllUnder(wtest_tmpdir);
}
+ ::SetCurrentDirectoryW(working_directory);
}
bool IoWin32Test::CreateAllUnder(wstring path) {
@@ -191,8 +230,8 @@ bool IoWin32Test::DeleteAllUnder(wstring path) {
path = wstring(L"\\\\?\\") + path;
}
// Append "\" if necessary.
- if (path[path.size() - 1] != '\\') {
- path.push_back('\\');
+ if (path[path.size() - 1] != L'\\') {
+ path.push_back(L'\\');
}
WIN32_FIND_DATAW metadata;
@@ -309,13 +348,24 @@ TEST_F(IoWin32Test, MkdirTest) {
ASSERT_EQ(errno, ENOENT);
}
+TEST_F(IoWin32Test, MkdirTestNonAscii) {
+ ASSERT_INITIALIZED;
+
+ // Create a non-ASCII path.
+ // Ensure that we can create the directory using SetCurrentDirectoryW.
+ EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1").c_str(), NULL));
+ EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1\\" + kUtf16Text).c_str(), NULL));
+ // Ensure that we can create a very similarly named directory using mkdir.
+ // We don't attemp to delete and recreate the same directory, because on
+ // Windows, deleting files and directories seems to be asynchronous.
+ EXPECT_EQ(mkdir((test_tmpdir + "\\2").c_str(), 0644), 0);
+ EXPECT_EQ(mkdir((test_tmpdir + "\\2\\" + kUtf8Text).c_str(), 0644), 0);
+}
+
TEST_F(IoWin32Test, ChdirTest) {
- char owd[MAX_PATH];
- EXPECT_GT(::GetCurrentDirectoryA(MAX_PATH, owd), 0);
string path("C:\\");
EXPECT_EQ(access(path.c_str(), F_OK), 0);
ASSERT_EQ(chdir(path.c_str()), 0);
- EXPECT_TRUE(::SetCurrentDirectoryA(owd));
// Do not try to chdir into the test_tmpdir, it may already contain directory
// names with trailing dots.
@@ -330,17 +380,37 @@ TEST_F(IoWin32Test, ChdirTest) {
ASSERT_NE(chdir(path.c_str()), 0);
}
+TEST_F(IoWin32Test, ChdirTestNonAscii) {
+ ASSERT_INITIALIZED;
+
+ // Create a directory with a non-ASCII path and ensure we can cd into it.
+ wstring wNonAscii(wtest_tmpdir + L"\\" + kUtf16Text);
+ string nonAscii;
+ EXPECT_TRUE(strings::wcs_to_utf8(wNonAscii.c_str(), &nonAscii));
+ EXPECT_TRUE(CreateDirectoryW(wNonAscii.c_str(), NULL));
+ WCHAR cwd[MAX_PATH];
+ EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd));
+ // Ensure that we can cd into the path using SetCurrentDirectoryW.
+ EXPECT_TRUE(SetCurrentDirectoryW(wNonAscii.c_str()));
+ EXPECT_TRUE(SetCurrentDirectoryW(cwd));
+ // Ensure that we can cd into the path using chdir.
+ ASSERT_EQ(chdir(nonAscii.c_str()), 0);
+ // Ensure that the GetCurrentDirectoryW returns the desired path.
+ EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd));
+ ASSERT_EQ(wNonAscii, cwd);
+}
+
TEST_F(IoWin32Test, AsWindowsPathTest) {
DWORD size = GetCurrentDirectoryW(0, NULL);
scoped_array<wchar_t> cwd_str(new wchar_t[size]);
EXPECT_GT(GetCurrentDirectoryW(size, cwd_str.get()), 0);
wstring cwd = wstring(L"\\\\?\\") + cwd_str.get();
- ASSERT_EQ(testonly_path_to_winpath("relative_mkdirtest"),
+ ASSERT_EQ(testonly_utf8_to_winpath("relative_mkdirtest"),
cwd + L"\\relative_mkdirtest");
- ASSERT_EQ(testonly_path_to_winpath("preserve//\\trailing///"),
+ ASSERT_EQ(testonly_utf8_to_winpath("preserve//\\trailing///"),
cwd + L"\\preserve\\trailing\\");
- ASSERT_EQ(testonly_path_to_winpath("./normalize_me\\/../blah"),
+ ASSERT_EQ(testonly_utf8_to_winpath("./normalize_me\\/../blah"),
cwd + L"\\blah");
std::ostringstream relpath;
for (wchar_t* p = cwd_str.get(); *p; ++p) {
@@ -349,18 +419,28 @@ TEST_F(IoWin32Test, AsWindowsPathTest) {
}
}
relpath << ".\\/../\\./beyond-toplevel";
- ASSERT_EQ(testonly_path_to_winpath(relpath.str()),
+ ASSERT_EQ(testonly_utf8_to_winpath(relpath.str().c_str()),
wstring(L"\\\\?\\") + cwd_str.get()[0] + L":\\beyond-toplevel");
// Absolute unix paths lack drive letters, driveless absolute windows paths
// do too. Neither can be converted to a drive-specifying absolute Windows
// path.
- ASSERT_EQ(testonly_path_to_winpath("/absolute/unix/path"), L"");
+ ASSERT_EQ(testonly_utf8_to_winpath("/absolute/unix/path"), L"");
// Though valid on Windows, we also don't support UNC paths (\\UNC\\blah).
- ASSERT_EQ(testonly_path_to_winpath("\\driveless\\absolute"), L"");
+ ASSERT_EQ(testonly_utf8_to_winpath("\\driveless\\absolute"), L"");
// Though valid in cmd.exe, drive-relative paths are not supported.
- ASSERT_EQ(testonly_path_to_winpath("c:foo"), L"");
- ASSERT_EQ(testonly_path_to_winpath("c:/foo"), L"\\\\?\\c:\\foo");
+ ASSERT_EQ(testonly_utf8_to_winpath("c:foo"), L"");
+ ASSERT_EQ(testonly_utf8_to_winpath("c:/foo"), L"\\\\?\\c:\\foo");
+ ASSERT_EQ(testonly_utf8_to_winpath("\\\\?\\C:\\foo"), L"\\\\?\\C:\\foo");
+}
+
+TEST_F(IoWin32Test, Utf8Utf16ConversionTest) {
+ string mbs;
+ wstring wcs;
+ ASSERT_TRUE(strings::utf8_to_wcs(kUtf8Text, &wcs));
+ ASSERT_TRUE(strings::wcs_to_utf8(kUtf16Text, &mbs));
+ ASSERT_EQ(wcs, kUtf16Text);
+ ASSERT_EQ(mbs, kUtf8Text);
}
} // namespace