diff options
author | Viktor Szathmáry <phraktle@gmail.com> | 2014-09-09 16:31:51 +0200 |
---|---|---|
committer | Tamir Duberstein <tamird@gmail.com> | 2015-04-02 14:48:43 -0700 |
commit | e84893f6768f136cc86e2db69fc1d40ff2be7e3b (patch) | |
tree | c36057efe7fc3c3bf50381c96bae16cf73234fa5 /java | |
parent | 7139d1eff739682a088ea2c2dbdfef2f108321f8 (diff) |
perf: String#getBytes(Charset) vs getBytes(String)
Diffstat (limited to 'java')
7 files changed, 117 insertions, 28 deletions
diff --git a/java/src/main/java/com/google/protobuf/ByteString.java b/java/src/main/java/com/google/protobuf/ByteString.java index 637df5f4..ee2eddb0 100644 --- a/java/src/main/java/com/google/protobuf/ByteString.java +++ b/java/src/main/java/com/google/protobuf/ByteString.java @@ -37,6 +37,8 @@ import java.io.OutputStream; import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.UnsupportedCharsetException; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; @@ -76,8 +78,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k - // Defined by java.nio.charset.Charset - protected static final String UTF_8 = "UTF-8"; + protected static final Charset UTF_8 = Charset.forName("UTF-8"); /** * Empty {@code ByteString}. @@ -269,11 +270,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { * @return new {@code ByteString} */ public static ByteString copyFromUtf8(String text) { - try { - return new LiteralByteString(text.getBytes(UTF_8)); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException("UTF-8 not supported?", e); - } + return new LiteralByteString(text.getBytes(UTF_8)); } // ================================================================= @@ -612,8 +609,36 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { * @return new string * @throws UnsupportedEncodingException if charset isn't recognized */ - public abstract String toString(String charsetName) - throws UnsupportedEncodingException; + public String toString(String charsetName) + throws UnsupportedEncodingException { + try { + return toString(Charset.forName(charsetName)); + } catch (UnsupportedCharsetException e) { + UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName); + exception.initCause(e); + throw exception; + } + } + + /** + * Constructs a new {@code String} by decoding the bytes using the + * specified charset. Returns the same empty String if empty. + * + * @param charset encode using this charset + * @return new string + */ + public String toString(Charset charset) { + return size() == 0 ? "" : toStringInternal(charset); + } + + /** + * Constructs a new {@code String} by decoding the bytes using the + * specified charset. + * + * @param charset encode using this charset + * @return new string + */ + protected abstract String toStringInternal(Charset charset); // ================================================================= // UTF-8 decoding @@ -624,11 +649,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { * @return new string using UTF-8 encoding */ public String toStringUtf8() { - try { - return toString(UTF_8); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException("UTF-8 not supported?", e); - } + return toString(UTF_8); } /** diff --git a/java/src/main/java/com/google/protobuf/LiteralByteString.java b/java/src/main/java/com/google/protobuf/LiteralByteString.java index 81d8e74b..3462c395 100644 --- a/java/src/main/java/com/google/protobuf/LiteralByteString.java +++ b/java/src/main/java/com/google/protobuf/LiteralByteString.java @@ -36,6 +36,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import java.util.NoSuchElementException; @@ -152,13 +153,8 @@ class LiteralByteString extends ByteString { } @Override - public String toString(String charsetName) - throws UnsupportedEncodingException { - // Optimize for empty strings, but ensure we don't silently ignore invalid - // encodings. - return size() == 0 && UTF_8.equals(charsetName) - ? "" - : new String(bytes, getOffsetIntoBytes(), size(), charsetName); + protected String toStringInternal(Charset charset) { + return new String(bytes, getOffsetIntoBytes(), size(), charset); } // ================================================================= diff --git a/java/src/main/java/com/google/protobuf/RopeByteString.java b/java/src/main/java/com/google/protobuf/RopeByteString.java index 168bcce2..0900a042 100644 --- a/java/src/main/java/com/google/protobuf/RopeByteString.java +++ b/java/src/main/java/com/google/protobuf/RopeByteString.java @@ -38,6 +38,7 @@ import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.io.ByteArrayInputStream; import java.nio.ByteBuffer; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; @@ -418,13 +419,8 @@ class RopeByteString extends ByteString { } @Override - public String toString(String charsetName) - throws UnsupportedEncodingException { - // Optimize for empty strings, but ensure we don't silently ignore invalid - // encodings. - return size() == 0 && UTF_8.equals(charsetName) - ? "" - : new String(toByteArray(), charsetName); + protected String toStringInternal(Charset charset) { + return new String(toByteArray(), charset); } // ================================================================= diff --git a/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java b/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java index 6c9596ca..a11bef2e 100644 --- a/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java +++ b/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java @@ -72,6 +72,19 @@ public class BoundedByteStringTest extends LiteralByteStringTest { testString.substring(2, testString.length() - 6), roundTripString); } + @Override + public void testCharsetToString() throws UnsupportedEncodingException { + String testString = "I love unicode \u1234\u5678 characters"; + LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8)); + ByteString chopped = unicode.substring(2, unicode.size() - 6); + assertEquals(classUnderTest + ".substring() must have the expected type", + classUnderTest, getActualClassName(chopped)); + + String roundTripString = chopped.toString(ByteString.UTF_8); + assertEquals(classUnderTest + " unicode bytes must match", + testString.substring(2, testString.length() - 6), roundTripString); + } + public void testJavaSerialization() throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(out); diff --git a/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java b/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java index f3ad774f..8607040e 100644 --- a/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java +++ b/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java @@ -298,6 +298,13 @@ public class LiteralByteStringTest extends TestCase { assertEquals(classUnderTest + " unicode must match", testString, roundTripString); } + public void testCharsetToString() throws UnsupportedEncodingException { + String testString = "I love unicode \u1234\u5678 characters"; + LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8)); + String roundTripString = unicode.toString(ByteString.UTF_8); + assertEquals(classUnderTest + " unicode must match", testString, roundTripString); + } + public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException{ assertSame(classUnderTest + " must be the same string references", ByteString.EMPTY.toString(UTF_8), new LiteralByteString(new byte[]{}).toString(UTF_8)); diff --git a/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java b/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java index 8106201d..43872d1d 100644 --- a/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java +++ b/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java @@ -94,4 +94,34 @@ public class RopeByteStringSubstringTest extends LiteralByteStringTest { assertEquals(classUnderTest + " string must must have same hashCode as the flat string", flatString.hashCode(), unicode.hashCode()); } + + @Override + public void testCharsetToString() throws UnsupportedEncodingException { + String sourceString = "I love unicode \u1234\u5678 characters"; + ByteString sourceByteString = ByteString.copyFromUtf8(sourceString); + int copies = 250; + + // By building the RopeByteString by concatenating, this is actually a fairly strenuous test. + StringBuilder builder = new StringBuilder(copies * sourceString.length()); + ByteString unicode = ByteString.EMPTY; + for (int i = 0; i < copies; ++i) { + builder.append(sourceString); + unicode = RopeByteString.concatenate(unicode, sourceByteString); + } + String testString = builder.toString(); + + // Do the substring part + testString = testString.substring(2, testString.length() - 6); + unicode = unicode.substring(2, unicode.size() - 6); + + assertEquals(classUnderTest + " from string must have the expected type", + classUnderTest, getActualClassName(unicode)); + String roundTripString = unicode.toString(ByteString.UTF_8); + assertEquals(classUnderTest + " unicode bytes must match", + testString, roundTripString); + ByteString flatString = ByteString.copyFromUtf8(testString); + assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode); + assertEquals(classUnderTest + " string must must have same hashCode as the flat string", + flatString.hashCode(), unicode.hashCode()); + } } diff --git a/java/src/test/java/com/google/protobuf/RopeByteStringTest.java b/java/src/test/java/com/google/protobuf/RopeByteStringTest.java index 4775f03a..54eb9683 100644 --- a/java/src/test/java/com/google/protobuf/RopeByteStringTest.java +++ b/java/src/test/java/com/google/protobuf/RopeByteStringTest.java @@ -119,6 +119,32 @@ public class RopeByteStringTest extends LiteralByteStringTest { } @Override + public void testCharsetToString() throws UnsupportedEncodingException { + String sourceString = "I love unicode \u1234\u5678 characters"; + ByteString sourceByteString = ByteString.copyFromUtf8(sourceString); + int copies = 250; + + // By building the RopeByteString by concatenating, this is actually a fairly strenuous test. + StringBuilder builder = new StringBuilder(copies * sourceString.length()); + ByteString unicode = ByteString.EMPTY; + for (int i = 0; i < copies; ++i) { + builder.append(sourceString); + unicode = RopeByteString.concatenate(unicode, sourceByteString); + } + String testString = builder.toString(); + + assertEquals(classUnderTest + " from string must have the expected type", + classUnderTest, getActualClassName(unicode)); + String roundTripString = unicode.toString(ByteString.UTF_8); + assertEquals(classUnderTest + " unicode bytes must match", + testString, roundTripString); + ByteString flatString = ByteString.copyFromUtf8(testString); + assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode); + assertEquals(classUnderTest + " string must must have same hashCode as the flat string", + flatString.hashCode(), unicode.hashCode()); + } + + @Override public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException { RopeByteString ropeByteString = RopeByteString.newInstanceForTest(ByteString.EMPTY, ByteString.EMPTY); |