Never use strlen on utf8 runs so null characters work.

Fixes https://github.com/google/protobuf/issues/1933 Add a new test that forces strings into two different implementations from the NSString class cluster to help confirm we're exercising both paths by which CodedOutputStream will extract data from an NSString. Move the old +load test (that was flawed because the behavior really depends on the type of string from the NSString class cluster); into a unittest that targets the specific case we're adding a behavior confirmation on. As a bonus, improve the TextFormat generation of string characters < 0x20.
author: Thomas Van Lenten <thomasvl@google.com> 2016-08-08 18:02:43 -0400
committer: Thomas Van Lenten <thomasvl@google.com> 2016-08-09 10:37:16 -0400
commit: 1a6c1d092df6431c86b74a059214ab76942b8b33 (patch)
tree: 8a2d418467e7bcf2985243f90aa1900441f70c56 /objectivec/Tests/GPBCodedOuputStreamTests.m
parent: 237f321e338b50503eb38728afa6ad29bea6076a (diff)
1 files changed, 86 insertions, 0 deletions
diff --git a/objectivec/Tests/GPBCodedOuputStreamTests.m b/objectivec/Tests/GPBCodedOuputStreamTests.m
index 0723b645..2ad326be 100644
--- a/objectivec/Tests/GPBCodedOuputStreamTests.m
+++ b/objectivec/Tests/GPBCodedOuputStreamTests.m
@@ -193,6 +193,32 @@
   }
 }
 
+- (void)assertWriteStringNoTag:(NSData*)data
+                         value:(NSString *)value
+                       context:(NSString *)contextMessage {
+  NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory];
+  GPBCodedOutputStream* output =
+      [GPBCodedOutputStream streamWithOutputStream:rawOutput];
+  [output writeStringNoTag:value];
+  [output flush];
+
+  NSData* actual =
+      [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
+  XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
+
+  // Try different block sizes.
+  for (int blockSize = 1; blockSize <= 16; blockSize *= 2) {
+    rawOutput = [NSOutputStream outputStreamToMemory];
+    output = [GPBCodedOutputStream streamWithOutputStream:rawOutput
+                                               bufferSize:blockSize];
+    [output writeStringNoTag:value];
+    [output flush];
+
+    actual = [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
+    XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
+  }
+}
+
 - (void)testWriteVarint1 {
   [self assertWriteVarint:bytes(0x00) value:0];
 }
@@ -337,4 +363,64 @@
   XCTAssertEqualObjects(rawBytes, goldenData);
 }
 
+- (void)testCFStringGetCStringPtrAndStringsWithNullChars {
+  // This test exists to verify that CFStrings with embedded NULLs still expose
+  // their raw buffer if they are backed by UTF8 storage. If this fails, the
+  // quick/direct access paths in GPBCodedOutputStream that depend on
+  // CFStringGetCStringPtr need to be re-evalutated (maybe just removed).
+  // And yes, we do get NULLs in strings from some servers.
+
+  char zeroTest[] = "\0Test\0String";
+  // Note: there is a \0 at the end of this since it is a c-string.
+  NSString *asNSString = [[NSString alloc] initWithBytes:zeroTest
+                                                  length:sizeof(zeroTest)
+                                                encoding:NSUTF8StringEncoding];
+  const char *cString =
+      CFStringGetCStringPtr((CFStringRef)asNSString, kCFStringEncodingUTF8);
+  XCTAssertTrue(cString != NULL);
+  // Again, if the above assert fails, then it means NSString no longer exposes
+  // the raw utf8 storage of a string created from utf8 input, so the code using
+  // CFStringGetCStringPtr in GPBCodedOutputStream will still work (it will take
+  // a different code path); but the optimizations for when
+  // CFStringGetCStringPtr does work could possibly go away.
+
+  XCTAssertEqual(sizeof(zeroTest),
+                 [asNSString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]);
+  XCTAssertTrue(0 == memcmp(cString, zeroTest, sizeof(zeroTest)));
+  [asNSString release];
+}
+
+- (void)testWriteStringsWithZeroChar {
+  // Unicode allows `\0` as a character, and NSString is a class cluster, so
+  // there are a few different classes that could end up beind a given string.
+  // Historically, we've seen differences based on constant strings in code and
+  // strings built via the NSString apis. So this round trips them to ensure
+  // they are acting as expected.
+
+  NSArray<NSString *> *strs = @[
+    @"\0at start",
+    @"in\0middle",
+    @"at end\0",
+  ];
+  int i = 0;
+  for (NSString *str in strs) {
+    NSData *asUTF8 = [str dataUsingEncoding:NSUTF8StringEncoding];
+    NSMutableData *expected = [NSMutableData data];
+    uint8_t lengthByte = (uint8_t)asUTF8.length;
+    [expected appendBytes:&lengthByte length:1];
+    [expected appendData:asUTF8];
+
+    NSString *context = [NSString stringWithFormat:@"Loop %d - Literal", i];
+    [self assertWriteStringNoTag:expected value:str context:context];
+
+    // Force a new string to be built which gets a different class from the
+    // NSString class cluster than the literal did.
+    NSString *str2 = [NSString stringWithFormat:@"%@", str];
+    context = [NSString stringWithFormat:@"Loop %d - Built", i];
+    [self assertWriteStringNoTag:expected value:str2 context:context];
+
+    ++i;
+  }
+}
+
 @end
author	Thomas Van Lenten <thomasvl@google.com>	2016-08-08 18:02:43 -0400
committer	Thomas Van Lenten <thomasvl@google.com>	2016-08-09 10:37:16 -0400
commit	1a6c1d092df6431c86b74a059214ab76942b8b33 (patch)
tree	8a2d418467e7bcf2985243f90aa1900441f70c56 /objectivec/Tests/GPBCodedOuputStreamTests.m
parent	237f321e338b50503eb38728afa6ad29bea6076a (diff)