aboutsummaryrefslogtreecommitdiffhomepage
path: root/java/core/src/test
diff options
context:
space:
mode:
authorGravatar Anuraag Agrawal <rag@istellar.jp>2017-10-31 15:12:11 +0900
committerGravatar Anuraag Agrawal <anuraaga@gmail.com>2017-11-29 15:44:58 +0900
commit3e944aec9ebdf5043780fba751d604c0a55511f2 (patch)
tree4dcf8fa933b44c4c306b1b06724b1658ce568432 /java/core/src/test
parent3c6fd3f7f9ebd5bdd909e0915f79f5a118773d46 (diff)
Add a UTF-8 decoder that uses Unsafe to directly decode a byte buffer.
Diffstat (limited to 'java/core/src/test')
-rw-r--r--java/core/src/test/java/com/google/protobuf/DecodeUtf8Test.java325
-rw-r--r--java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java9
2 files changed, 334 insertions, 0 deletions
diff --git a/java/core/src/test/java/com/google/protobuf/DecodeUtf8Test.java b/java/core/src/test/java/com/google/protobuf/DecodeUtf8Test.java
new file mode 100644
index 00000000..359d4d74
--- /dev/null
+++ b/java/core/src/test/java/com/google/protobuf/DecodeUtf8Test.java
@@ -0,0 +1,325 @@
+package com.google.protobuf;
+
+import com.google.protobuf.Utf8.Processor;
+import com.google.protobuf.Utf8.SafeProcessor;
+import com.google.protobuf.Utf8.UnsafeProcessor;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+import junit.framework.TestCase;
+
+public class DecodeUtf8Test extends TestCase {
+ private static Logger logger = Logger.getLogger(DecodeUtf8Test.class.getName());
+
+ private static final Processor SAFE_PROCESSOR = new SafeProcessor();
+ private static final Processor UNSAFE_PROCESSOR = new UnsafeProcessor();
+
+ public void testRoundTripAllValidChars() throws Exception {
+ for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) {
+ if (i < Character.MIN_SURROGATE || i > Character.MAX_SURROGATE) {
+ String str = new String(Character.toChars(i));
+ assertRoundTrips(str);
+ }
+ }
+ }
+
+ // Test all 1, 2, 3 invalid byte combinations. Valid ones would have been covered above.
+
+ public void testOneByte() throws Exception {
+ int valid = 0;
+ for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
+ ByteString bs = ByteString.copyFrom(new byte[] { (byte) i });
+ if (!bs.isValidUtf8()) {
+ assertInvalid(bs.toByteArray());
+ } else {
+ valid++;
+ }
+ }
+ assertEquals(IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
+ }
+
+ public void testTwoBytes() throws Exception {
+ int valid = 0;
+ for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
+ for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
+ ByteString bs = ByteString.copyFrom(new byte[]{(byte) i, (byte) j});
+ if (!bs.isValidUtf8()) {
+ assertInvalid(bs.toByteArray());
+ } else {
+ valid++;
+ }
+ }
+ }
+ assertEquals(IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT, valid);
+ }
+
+ public void testThreeBytes() throws Exception {
+ // Travis' OOM killer doesn't like this test
+ if (System.getenv("TRAVIS") == null) {
+ int count = 0;
+ int valid = 0;
+ for (int i = Byte.MIN_VALUE; i <= Byte.MAX_VALUE; i++) {
+ for (int j = Byte.MIN_VALUE; j <= Byte.MAX_VALUE; j++) {
+ for (int k = Byte.MIN_VALUE; k <= Byte.MAX_VALUE; k++) {
+ byte[] bytes = new byte[]{(byte) i, (byte) j, (byte) k};
+ ByteString bs = ByteString.copyFrom(bytes);
+ if (!bs.isValidUtf8()) {
+ assertInvalid(bytes);
+ } else {
+ valid++;
+ }
+ count++;
+ if (count % 1000000L == 0) {
+ logger.info("Processed " + (count / 1000000L) + " million characters");
+ }
+ }
+ }
+ }
+ assertEquals(IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT, valid);
+ }
+ }
+
+ /**
+ * Tests that round tripping of a sample of four byte permutations work.
+ */
+ public void testInvalid_4BytesSamples() throws Exception {
+ // Bad trailing bytes
+ assertInvalid(0xF0, 0xA4, 0xAD, 0x7F);
+ assertInvalid(0xF0, 0xA4, 0xAD, 0xC0);
+
+ // Special cases for byte2
+ assertInvalid(0xF0, 0x8F, 0xAD, 0xA2);
+ assertInvalid(0xF4, 0x90, 0xAD, 0xA2);
+ }
+
+ public void testRealStrings() throws Exception {
+ // English
+ assertRoundTrips("The quick brown fox jumps over the lazy dog");
+ // German
+ assertRoundTrips("Quizdeltagerne spiste jordb\u00e6r med fl\u00f8de, mens cirkusklovnen");
+ // Japanese
+ assertRoundTrips(
+ "\u3044\u308d\u306f\u306b\u307b\u3078\u3068\u3061\u308a\u306c\u308b\u3092");
+ // Hebrew
+ assertRoundTrips(
+ "\u05d3\u05d2 \u05e1\u05e7\u05e8\u05df \u05e9\u05d8 \u05d1\u05d9\u05dd "
+ + "\u05de\u05d0\u05d5\u05db\u05d6\u05d1 \u05d5\u05dc\u05e4\u05ea\u05e2"
+ + " \u05de\u05e6\u05d0 \u05dc\u05d5 \u05d7\u05d1\u05e8\u05d4 "
+ + "\u05d0\u05d9\u05da \u05d4\u05e7\u05dc\u05d9\u05d8\u05d4");
+ // Thai
+ assertRoundTrips(
+ " \u0e08\u0e07\u0e1d\u0e48\u0e32\u0e1f\u0e31\u0e19\u0e1e\u0e31\u0e12"
+ + "\u0e19\u0e32\u0e27\u0e34\u0e0a\u0e32\u0e01\u0e32\u0e23");
+ // Chinese
+ assertRoundTrips(
+ "\u8fd4\u56de\u94fe\u4e2d\u7684\u4e0b\u4e00\u4e2a\u4ee3\u7406\u9879\u9009\u62e9\u5668");
+ // Chinese with 4-byte chars
+ assertRoundTrips("\uD841\uDF0E\uD841\uDF31\uD841\uDF79\uD843\uDC53\uD843\uDC78"
+ + "\uD843\uDC96\uD843\uDCCF\uD843\uDCD5\uD843\uDD15\uD843\uDD7C\uD843\uDD7F"
+ + "\uD843\uDE0E\uD843\uDE0F\uD843\uDE77\uD843\uDE9D\uD843\uDEA2");
+ // Mixed
+ assertRoundTrips(
+ "The quick brown \u3044\u308d\u306f\u306b\u307b\u3078\u8fd4\u56de\u94fe"
+ + "\u4e2d\u7684\u4e0b\u4e00");
+ }
+
+ public void testOverlong() throws Exception {
+ assertInvalid(0xc0, 0xaf);
+ assertInvalid(0xe0, 0x80, 0xaf);
+ assertInvalid(0xf0, 0x80, 0x80, 0xaf);
+
+ // Max overlong
+ assertInvalid(0xc1, 0xbf);
+ assertInvalid(0xe0, 0x9f, 0xbf);
+ assertInvalid(0xf0 ,0x8f, 0xbf, 0xbf);
+
+ // null overlong
+ assertInvalid(0xc0, 0x80);
+ assertInvalid(0xe0, 0x80, 0x80);
+ assertInvalid(0xf0, 0x80, 0x80, 0x80);
+ }
+
+ public void testIllegalCodepoints() throws Exception {
+ // Single surrogate
+ assertInvalid(0xed, 0xa0, 0x80);
+ assertInvalid(0xed, 0xad, 0xbf);
+ assertInvalid(0xed, 0xae, 0x80);
+ assertInvalid(0xed, 0xaf, 0xbf);
+ assertInvalid(0xed, 0xb0, 0x80);
+ assertInvalid(0xed, 0xbe, 0x80);
+ assertInvalid(0xed, 0xbf, 0xbf);
+
+ // Paired surrogates
+ assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80);
+ assertInvalid(0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf);
+ assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80);
+ assertInvalid(0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf);
+ assertInvalid(0xed, 0xae, 0x80, 0xed, 0xb0, 0x80);
+ assertInvalid(0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf);
+ assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80);
+ assertInvalid(0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf);
+ }
+
+ public void testBufferSlice() throws Exception {
+ String str = "The quick brown fox jumps over the lazy dog";
+ assertRoundTrips(str, 10, 4);
+ assertRoundTrips(str, str.length(), 0);
+ }
+
+ public void testInvalidBufferSlice() throws Exception {
+ byte[] bytes = "The quick brown fox jumps over the lazy dog".getBytes(Internal.UTF_8);
+ assertInvalidSlice(bytes, bytes.length - 3, 4);
+ assertInvalidSlice(bytes, bytes.length, 1);
+ assertInvalidSlice(bytes, bytes.length + 1, 0);
+ assertInvalidSlice(bytes, 0, bytes.length + 1);
+ }
+
+ private void assertInvalid(int... bytesAsInt) throws Exception {
+ byte[] bytes = new byte[bytesAsInt.length];
+ for (int i = 0; i < bytesAsInt.length; i++) {
+ bytes[i] = (byte) bytesAsInt[i];
+ }
+ assertInvalid(bytes);
+ }
+
+ private void assertInvalid(byte[] bytes) throws Exception {
+ try {
+ UNSAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
+ fail();
+ } catch (InvalidProtocolBufferException e) {
+ // Expected.
+ }
+ try {
+ SAFE_PROCESSOR.decodeUtf8(bytes, 0, bytes.length);
+ fail();
+ } catch (InvalidProtocolBufferException e) {
+ // Expected.
+ }
+
+ ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
+ direct.put(bytes);
+ direct.flip();
+ try {
+ UNSAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length);
+ fail();
+ } catch (InvalidProtocolBufferException e) {
+ // Expected.
+ }
+ try {
+ SAFE_PROCESSOR.decodeUtf8(direct, 0, bytes.length);
+ fail();
+ } catch (InvalidProtocolBufferException e) {
+ // Expected.
+ }
+
+ ByteBuffer heap = ByteBuffer.allocate(bytes.length);
+ heap.put(bytes);
+ heap.flip();
+ try {
+ UNSAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length);
+ fail();
+ } catch (InvalidProtocolBufferException e) {
+ // Expected.
+ }
+ try {
+ SAFE_PROCESSOR.decodeUtf8(heap, 0, bytes.length);
+ fail();
+ } catch (InvalidProtocolBufferException e) {
+ // Expected.
+ }
+ }
+
+ private void assertInvalidSlice(byte[] bytes, int index, int size) throws Exception {
+ try {
+ UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size);
+ fail();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ // Expected.
+ }
+ try {
+ SAFE_PROCESSOR.decodeUtf8(bytes, index, size);
+ fail();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ // Expected.
+ }
+
+ ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
+ direct.put(bytes);
+ direct.flip();
+ try {
+ UNSAFE_PROCESSOR.decodeUtf8(direct, index, size);
+ fail();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ // Expected.
+ }
+ try {
+ SAFE_PROCESSOR.decodeUtf8(direct, index, size);
+ fail();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ // Expected.
+ }
+
+ ByteBuffer heap = ByteBuffer.allocate(bytes.length);
+ heap.put(bytes);
+ heap.flip();
+ try {
+ UNSAFE_PROCESSOR.decodeUtf8(heap, index, size);
+ fail();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ // Expected.
+ }
+ try {
+ SAFE_PROCESSOR.decodeUtf8(heap, index, size);
+ fail();
+ } catch (ArrayIndexOutOfBoundsException e) {
+ // Expected.
+ }
+ }
+
+ private void assertRoundTrips(String str) throws Exception {
+ assertRoundTrips(str, 0, -1);
+ }
+
+ private void assertRoundTrips(String str, int index, int size) throws Exception {
+ byte[] bytes = str.getBytes(Internal.UTF_8);
+ if (size == -1) {
+ size = bytes.length;
+ }
+ assertDecode(new String(bytes, index, size, Internal.UTF_8),
+ UNSAFE_PROCESSOR.decodeUtf8(bytes, index, size));
+ assertDecode(new String(bytes, index, size, Internal.UTF_8),
+ SAFE_PROCESSOR.decodeUtf8(bytes, index, size));
+
+ ByteBuffer direct = ByteBuffer.allocateDirect(bytes.length);
+ direct.put(bytes);
+ direct.flip();
+ assertDecode(new String(bytes, index, size, Internal.UTF_8),
+ UNSAFE_PROCESSOR.decodeUtf8(direct, index, size));
+ assertDecode(new String(bytes, index, size, Internal.UTF_8),
+ SAFE_PROCESSOR.decodeUtf8(direct, index, size));
+
+ ByteBuffer heap = ByteBuffer.allocate(bytes.length);
+ heap.put(bytes);
+ heap.flip();
+ assertDecode(new String(bytes, index, size, Internal.UTF_8),
+ UNSAFE_PROCESSOR.decodeUtf8(heap, index, size));
+ assertDecode(new String(bytes, index, size, Internal.UTF_8),
+ SAFE_PROCESSOR.decodeUtf8(heap, index, size));
+ }
+
+ private void assertDecode(String expected, String actual) {
+ if (!expected.equals(actual)) {
+ fail("Failure: Expected (" + codepoints(expected) + ") Actual (" + codepoints(actual) + ")");
+ }
+ }
+
+ private List<String> codepoints(String str) {
+ List<String> codepoints = new ArrayList<String>();
+ for (int i = 0; i < str.length(); i++) {
+ codepoints.add(Long.toHexString(str.charAt(i)));
+ }
+ return codepoints;
+ }
+
+}
diff --git a/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java b/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
index 16a808bf..1bcf63e7 100644
--- a/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
+++ b/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
@@ -273,6 +273,15 @@ final class IsValidUtf8TestUtil {
assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes));
assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes, 0, numBytes));
+ try {
+ assertEquals(s, Utf8.decodeUtf8(bytes, 0, numBytes));
+ } catch (InvalidProtocolBufferException e) {
+ if (isRoundTrippable) {
+ System.out.println("Could not decode utf-8");
+ outputFailure(byteChar, bytes, bytesReencoded);
+ }
+ }
+
// Test partial sequences.
// Partition numBytes into three segments (not necessarily non-empty).
int i = rnd.nextInt(numBytes);