aboutsummaryrefslogtreecommitdiffhomepage
path: root/java/core/src/main/java/com/google/protobuf
diff options
context:
space:
mode:
authorGravatar Jisi Liu <jisi.liu@gmail.com>2016-03-30 11:39:59 -0700
committerGravatar Jisi Liu <jisi.liu@gmail.com>2016-03-30 11:39:59 -0700
commit3b3c8abb9635eb3ea078a821a99c9ef29d66dff7 (patch)
tree7d2ec154f15c9f9153d890e76b6cf30e471ea488 /java/core/src/main/java/com/google/protobuf
parent78105897a8f01c7be9cf8502b6c58d47eb1ccdd7 (diff)
Integrate google internal changes.
Diffstat (limited to 'java/core/src/main/java/com/google/protobuf')
-rw-r--r--java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java145
-rw-r--r--java/core/src/main/java/com/google/protobuf/ByteOutput.java116
-rw-r--r--java/core/src/main/java/com/google/protobuf/ByteString.java120
-rw-r--r--java/core/src/main/java/com/google/protobuf/CodedInputStream.java42
-rw-r--r--java/core/src/main/java/com/google/protobuf/CodedOutputStream.java1206
-rw-r--r--java/core/src/main/java/com/google/protobuf/Descriptors.java4
-rw-r--r--java/core/src/main/java/com/google/protobuf/ExperimentalApi.java30
-rw-r--r--java/core/src/main/java/com/google/protobuf/GeneratedMessage.java4
-rw-r--r--java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java301
-rw-r--r--java/core/src/main/java/com/google/protobuf/Internal.java9
-rw-r--r--java/core/src/main/java/com/google/protobuf/LazyField.java4
-rw-r--r--java/core/src/main/java/com/google/protobuf/LazyFieldLite.java112
-rw-r--r--java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java4
-rw-r--r--java/core/src/main/java/com/google/protobuf/MessageLiteToString.java200
-rw-r--r--java/core/src/main/java/com/google/protobuf/NioByteString.java55
-rw-r--r--java/core/src/main/java/com/google/protobuf/Parser.java1
-rw-r--r--java/core/src/main/java/com/google/protobuf/RopeByteString.java26
-rw-r--r--java/core/src/main/java/com/google/protobuf/SmallSortedMap.java2
-rw-r--r--java/core/src/main/java/com/google/protobuf/TextFormat.java176
-rw-r--r--java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java137
-rw-r--r--java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java225
-rw-r--r--java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java104
-rw-r--r--java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java110
-rw-r--r--java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java25
-rw-r--r--java/core/src/main/java/com/google/protobuf/Utf8.java1664
25 files changed, 3651 insertions, 1171 deletions
diff --git a/java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java b/java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java
new file mode 100644
index 00000000..0cc38175
--- /dev/null
+++ b/java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java
@@ -0,0 +1,145 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.ref.SoftReference;
+import java.nio.ByteBuffer;
+
+/**
+ * Utility class to provide efficient writing of {@link ByteBuffer}s to {@link OutputStream}s.
+ */
+final class ByteBufferWriter {
+ private ByteBufferWriter() {}
+
+ /**
+ * Minimum size for a cached buffer. This prevents us from allocating buffers that are too
+ * small to be easily reused.
+ */
+ // TODO(nathanmittler): tune this property or allow configuration?
+ private static final int MIN_CACHED_BUFFER_SIZE = 1024;
+
+ /**
+ * Maximum size for a cached buffer. If a larger buffer is required, it will be allocated
+ * but not cached.
+ */
+ // TODO(nathanmittler): tune this property or allow configuration?
+ private static final int MAX_CACHED_BUFFER_SIZE = 16 * 1024;
+
+ /**
+ * The fraction of the requested buffer size under which the buffer will be reallocated.
+ */
+ // TODO(nathanmittler): tune this property or allow configuration?
+ private static final float BUFFER_REALLOCATION_THRESHOLD = 0.5f;
+
+ /**
+ * Keeping a soft reference to a thread-local buffer. This buffer is used for writing a
+ * {@link ByteBuffer} to an {@link OutputStream} when no zero-copy alternative was available.
+ * Using a "soft" reference since VMs may keep this reference around longer than "weak"
+ * (e.g. HotSpot will maintain soft references until memory pressure warrants collection).
+ */
+ private static final ThreadLocal<SoftReference<byte[]>> BUFFER =
+ new ThreadLocal<SoftReference<byte[]>>();
+
+ /**
+ * For testing purposes only. Clears the cached buffer to force a new allocation on the next
+ * invocation.
+ */
+ static void clearCachedBuffer() {
+ BUFFER.set(null);
+ }
+
+ /**
+ * Writes the remaining content of the buffer to the given stream. The buffer {@code position}
+ * will remain unchanged by this method.
+ */
+ static void write(ByteBuffer buffer, OutputStream output) throws IOException {
+ final int initialPos = buffer.position();
+ try {
+ if (buffer.hasArray()) {
+ // Optimized write for array-backed buffers.
+ // Note that we're taking the risk that a malicious OutputStream could modify the array.
+ output.write(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
+ } else if (output instanceof FileOutputStream) {
+ // Use a channel to write out the ByteBuffer. This will automatically empty the buffer.
+ ((FileOutputStream) output).getChannel().write(buffer);
+ } else {
+ // Read all of the data from the buffer to an array.
+ // TODO(nathanmittler): Consider performance improvements for other "known" stream types.
+ final byte[] array = getOrCreateBuffer(buffer.remaining());
+ while (buffer.hasRemaining()) {
+ int length = min(buffer.remaining(), array.length);
+ buffer.get(array, 0, length);
+ output.write(array, 0, length);
+ }
+ }
+ } finally {
+ // Restore the initial position.
+ buffer.position(initialPos);
+ }
+ }
+
+ private static byte[] getOrCreateBuffer(int requestedSize) {
+ requestedSize = max(requestedSize, MIN_CACHED_BUFFER_SIZE);
+
+ byte[] buffer = getBuffer();
+ // Only allocate if we need to.
+ if (buffer == null || needToReallocate(requestedSize, buffer.length)) {
+ buffer = new byte[requestedSize];
+
+ // Only cache the buffer if it's not too big.
+ if (requestedSize <= MAX_CACHED_BUFFER_SIZE) {
+ setBuffer(buffer);
+ }
+ }
+ return buffer;
+ }
+
+ private static boolean needToReallocate(int requestedSize, int bufferLength) {
+ // First check against just the requested length to avoid the multiply.
+ return bufferLength < requestedSize
+ && bufferLength < requestedSize * BUFFER_REALLOCATION_THRESHOLD;
+ }
+
+ private static byte[] getBuffer() {
+ SoftReference<byte[]> sr = BUFFER.get();
+ return sr == null ? null : sr.get();
+ }
+
+ private static void setBuffer(byte[] value) {
+ BUFFER.set(new SoftReference<byte[]>(value));
+ }
+}
diff --git a/java/core/src/main/java/com/google/protobuf/ByteOutput.java b/java/core/src/main/java/com/google/protobuf/ByteOutput.java
new file mode 100644
index 00000000..8b7b04c8
--- /dev/null
+++ b/java/core/src/main/java/com/google/protobuf/ByteOutput.java
@@ -0,0 +1,116 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * An output target for raw bytes. This interface provides semantics that support two types of
+ * writing:
+ *
+ * <p/><b>Traditional write operations:</b>
+ * (as defined by {@link java.io.OutputStream}) where the target method is responsible for either
+ * copying the data or completing the write before returning from the method call.
+ *
+ * <p/><b>Lazy write operations:</b> where the caller guarantees that it will never modify the
+ * provided buffer and it can therefore be considered immutable. The target method is free to
+ * maintain a reference to the buffer beyond the scope of the method call (e.g. until the write
+ * operation completes).
+ */
+@ExperimentalApi
+public abstract class ByteOutput {
+ /**
+ * Writes a single byte.
+ *
+ * @param value the byte to be written
+ * @throws IOException thrown if an error occurred while writing
+ */
+ public abstract void write(byte value) throws IOException;
+
+ /**
+ * Writes a sequence of bytes. The {@link ByteOutput} must copy {@code value} if it will
+ * not be processed prior to the return of this method call, since {@code value} may be
+ * reused/altered by the caller.
+ *
+ * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a
+ * programming error and will lead to data corruption which will be difficult to debug.
+ *
+ * @param value the bytes to be written
+ * @param offset the offset of the start of the writable range
+ * @param length the number of bytes to write starting from {@code offset}
+ * @throws IOException thrown if an error occurred while writing
+ */
+ public abstract void write(byte[] value, int offset, int length) throws IOException;
+
+ /**
+ * Writes a sequence of bytes. The {@link ByteOutput} is free to retain a reference to the value
+ * beyond the scope of this method call (e.g. write later) since it is considered immutable and is
+ * guaranteed not to change by the caller.
+ *
+ * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a
+ * programming error and will lead to data corruption which will be difficult to debug.
+ *
+ * @param value the bytes to be written
+ * @param offset the offset of the start of the writable range
+ * @param length the number of bytes to write starting from {@code offset}
+ * @throws IOException thrown if an error occurred while writing
+ */
+ public abstract void writeLazy(byte[] value, int offset, int length) throws IOException;
+
+ /**
+ * Writes a sequence of bytes. The {@link ByteOutput} must copy {@code value} if it will
+ * not be processed prior to the return of this method call, since {@code value} may be
+ * reused/altered by the caller.
+ *
+ * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a
+ * programming error and will lead to data corruption which will be difficult to debug.
+ *
+ * @param value the bytes to be written. Upon returning from this call, the {@code position} of
+ * this buffer will be set to the {@code limit}
+ * @throws IOException thrown if an error occurred while writing
+ */
+ public abstract void write(ByteBuffer value) throws IOException;
+
+ /**
+ * Writes a sequence of bytes. The {@link ByteOutput} is free to retain a reference to the value
+ * beyond the scope of this method call (e.g. write later) since it is considered immutable and is
+ * guaranteed not to change by the caller.
+ *
+ * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a
+ * programming error and will lead to data corruption which will be difficult to debug.
+ *
+ * @param value the bytes to be written. Upon returning from this call, the {@code position} of
+ * this buffer will be set to the {@code limit}
+ * @throws IOException thrown if an error occurred while writing
+ */
+ public abstract void writeLazy(ByteBuffer value) throws IOException;
+}
diff --git a/java/core/src/main/java/com/google/protobuf/ByteString.java b/java/core/src/main/java/com/google/protobuf/ByteString.java
index 305236f3..62c94508 100644
--- a/java/core/src/main/java/com/google/protobuf/ByteString.java
+++ b/java/core/src/main/java/com/google/protobuf/ByteString.java
@@ -1,4 +1,32 @@
-// Copyright 2007 Google Inc. All rights reserved.
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.protobuf;
@@ -15,6 +43,7 @@ import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
@@ -58,6 +87,54 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* Empty {@code ByteString}.
*/
public static final ByteString EMPTY = new LiteralByteString(Internal.EMPTY_BYTE_ARRAY);
+
+ /**
+ * An interface to efficiently copy {@code byte[]}.
+ *
+ * <p>One of the noticable costs of copying a byte[] into a new array using
+ * {@code System.arraycopy} is nullification of a new buffer before the copy. It has been shown
+ * the Hotspot VM is capable to intrisicfy {@code Arrays.copyOfRange} operation to avoid this
+ * expensive nullification and provide substantial performance gain. Unfortunately this does not
+ * hold on Android runtimes and could make the copy slightly slower due to additional code in
+ * the {@code Arrays.copyOfRange}. Thus we provide two different implementation for array copier
+ * for Hotspot and Android runtimes.
+ */
+ private interface ByteArrayCopier {
+ /**
+ * Copies the specified range of the specified array into a new array
+ */
+ byte[] copyFrom(byte[] bytes, int offset, int size);
+ }
+
+ /** Implementation of {@code ByteArrayCopier} which uses {@link System#arraycopy}. */
+ private static final class SystemByteArrayCopier implements ByteArrayCopier {
+ @Override
+ public byte[] copyFrom(byte[] bytes, int offset, int size) {
+ byte[] copy = new byte[size];
+ System.arraycopy(bytes, offset, copy, 0, size);
+ return copy;
+ }
+ }
+
+ /** Implementation of {@code ByteArrayCopier} which uses {@link Arrays#copyOfRange}. */
+ private static final class ArraysByteArrayCopier implements ByteArrayCopier {
+ @Override
+ public byte[] copyFrom(byte[] bytes, int offset, int size) {
+ return Arrays.copyOfRange(bytes, offset, offset + size);
+ }
+ }
+
+ private static final ByteArrayCopier byteArrayCopier;
+ static {
+ boolean isAndroid = true;
+ try {
+ Class.forName("android.content.Context");
+ } catch (ClassNotFoundException e) {
+ isAndroid = false;
+ }
+
+ byteArrayCopier = isAndroid ? new SystemByteArrayCopier() : new ArraysByteArrayCopier();
+ }
/**
* Cached hash value. Intentionally accessed via a data race, which
@@ -77,7 +154,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
*
* @param index index of byte
* @return the value
- * @throws ArrayIndexOutOfBoundsException {@code index < 0 or index >= size}
+ * @throws IndexOutOfBoundsException {@code index < 0 or index >= size}
*/
public abstract byte byteAt(int index);
@@ -109,7 +186,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
public byte nextByte() {
try {
return byteAt(position++);
- } catch (ArrayIndexOutOfBoundsException e) {
+ } catch (IndexOutOfBoundsException e) {
throw new NoSuchElementException(e.getMessage());
}
}
@@ -220,9 +297,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new {@code ByteString}
*/
public static ByteString copyFrom(byte[] bytes, int offset, int size) {
- byte[] copy = new byte[size];
- System.arraycopy(bytes, offset, copy, 0, size);
- return new LiteralByteString(copy);
+ return new LiteralByteString(byteArrayCopier.copyFrom(bytes, offset, size));
}
/**
@@ -559,12 +634,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
}
/**
- * Writes the complete contents of this byte string to
- * the specified output stream argument.
- *
- * <p>It is assumed that the {@link OutputStream} will not modify the contents passed it
- * it. It may be possible for a malicious {@link OutputStream} to corrupt
- * the data underlying the {@link ByteString}.
+ * Writes a copy of the contents of this byte string to the specified output stream argument.
*
* @param out the output stream to which to write the data.
* @throws IOException if an I/O error occurs.
@@ -578,8 +648,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @param sourceOffset offset within these bytes
* @param numberToWrite number of bytes to write
* @throws IOException if an I/O error occurs.
- * @throws IndexOutOfBoundsException if an offset or size is negative or too
- * large
+ * @throws IndexOutOfBoundsException if an offset or size is negative or too large
*/
final void writeTo(OutputStream out, int sourceOffset, int numberToWrite)
throws IOException {
@@ -597,6 +666,20 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
throws IOException;
/**
+ * Writes this {@link ByteString} to the provided {@link ByteOutput}. Calling
+ * this method may result in multiple operations on the target {@link ByteOutput}.
+ *
+ * <p>This method may expose internal backing buffers of the {@link ByteString} to the {@link
+ * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious
+ * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution!
+ *
+ * @param byteOutput the output target to receive the bytes
+ * @throws IOException if an I/O error occurs
+ * @see UnsafeByteOperations#unsafeWriteTo(ByteString, ByteOutput)
+ */
+ abstract void writeTo(ByteOutput byteOutput) throws IOException;
+
+ /**
* Constructs a read-only {@code java.nio.ByteBuffer} whose content
* is equal to the contents of this byte string.
* The result uses the same backing array as the byte string, if possible.
@@ -1102,7 +1185,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
*
* @param index the index position to be tested
* @param size the length of the array
- * @throws ArrayIndexOutOfBoundsException if the index does not fall within the array.
+ * @throws IndexOutOfBoundsException if the index does not fall within the array.
*/
static void checkIndex(int index, int size) {
if ((index | (size - (index + 1))) < 0) {
@@ -1120,7 +1203,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @param endIndex the end index of the range (exclusive)
* @param size the size of the array.
* @return the length of the range.
- * @throws ArrayIndexOutOfBoundsException some or all of the range falls outside of the array.
+ * @throws IndexOutOfBoundsException some or all of the range falls outside of the array.
*/
static int checkRange(int startIndex, int endIndex, int size) {
final int length = endIndex - startIndex;
@@ -1236,6 +1319,11 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
}
@Override
+ final void writeTo(ByteOutput output) throws IOException {
+ output.writeLazy(bytes, getOffsetIntoBytes(), size());
+ }
+
+ @Override
protected final String toStringInternal(Charset charset) {
return new String(bytes, getOffsetIntoBytes(), size(), charset);
}
diff --git a/java/core/src/main/java/com/google/protobuf/CodedInputStream.java b/java/core/src/main/java/com/google/protobuf/CodedInputStream.java
index b3118ee0..e8860651 100644
--- a/java/core/src/main/java/com/google/protobuf/CodedInputStream.java
+++ b/java/core/src/main/java/com/google/protobuf/CodedInputStream.java
@@ -55,7 +55,14 @@ public final class CodedInputStream {
* Create a new CodedInputStream wrapping the given InputStream.
*/
public static CodedInputStream newInstance(final InputStream input) {
- return new CodedInputStream(input);
+ return new CodedInputStream(input, BUFFER_SIZE);
+ }
+
+ /**
+ * Create a new CodedInputStream wrapping the given InputStream.
+ */
+ static CodedInputStream newInstance(final InputStream input, int bufferSize) {
+ return new CodedInputStream(input, bufferSize);
}
/**
@@ -70,14 +77,14 @@ public final class CodedInputStream {
*/
public static CodedInputStream newInstance(final byte[] buf, final int off,
final int len) {
- return newInstance(buf, off, len, false);
+ return newInstance(buf, off, len, false /* bufferIsImmutable */);
}
-
+
/**
* Create a new CodedInputStream wrapping the given byte array slice.
*/
- public static CodedInputStream newInstance(final byte[] buf, final int off,
- final int len, boolean bufferIsImmutable) {
+ static CodedInputStream newInstance(
+ final byte[] buf, final int off, final int len, final boolean bufferIsImmutable) {
CodedInputStream result = new CodedInputStream(buf, off, len, bufferIsImmutable);
try {
// Some uses of CodedInputStream can be more efficient if they know
@@ -361,6 +368,11 @@ public final class CodedInputStream {
return result;
} else if (size == 0) {
return "";
+ } else if (size <= bufferSize) {
+ refillBuffer(size);
+ String result = new String(buffer, bufferPos, size, Internal.UTF_8);
+ bufferPos += size;
+ return result;
} else {
// Slow path: Build a byte array first then copy it.
return new String(readRawBytesSlowPath(size), Internal.UTF_8);
@@ -375,14 +387,21 @@ public final class CodedInputStream {
public String readStringRequireUtf8() throws IOException {
final int size = readRawVarint32();
final byte[] bytes;
- int pos = bufferPos;
- if (size <= (bufferSize - pos) && size > 0) {
+ final int oldPos = bufferPos;
+ final int pos;
+ if (size <= (bufferSize - oldPos) && size > 0) {
// Fast path: We already have the bytes in a contiguous buffer, so
// just copy directly from it.
bytes = buffer;
- bufferPos = pos + size;
+ bufferPos = oldPos + size;
+ pos = oldPos;
} else if (size == 0) {
return "";
+ } else if (size <= bufferSize) {
+ refillBuffer(size);
+ bytes = buffer;
+ pos = 0;
+ bufferPos = pos + size;
} else {
// Slow path: Build a byte array first then copy it.
bytes = readRawBytesSlowPath(size);
@@ -869,7 +888,8 @@ public final class CodedInputStream {
private static final int DEFAULT_SIZE_LIMIT = 64 << 20; // 64MB
private static final int BUFFER_SIZE = 4096;
- private CodedInputStream(final byte[] buffer, final int off, final int len, boolean bufferIsImmutable) {
+ private CodedInputStream(
+ final byte[] buffer, final int off, final int len, boolean bufferIsImmutable) {
this.buffer = buffer;
bufferSize = off + len;
bufferPos = off;
@@ -878,8 +898,8 @@ public final class CodedInputStream {
this.bufferIsImmutable = bufferIsImmutable;
}
- private CodedInputStream(final InputStream input) {
- buffer = new byte[BUFFER_SIZE];
+ private CodedInputStream(final InputStream input, int bufferSize) {
+ buffer = new byte[bufferSize];
bufferSize = 0;
bufferPos = 0;
totalBytesRetired = 0;
diff --git a/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java b/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java
index d8ebad21..b92394b8 100644
--- a/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java
+++ b/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java
@@ -49,13 +49,17 @@ import java.util.logging.Logger;
* you are writing some other format of your own design, use the latter.
*
* <p>This class is totally unsynchronized.
- *
- * @author kneton@google.com Kenton Varda
*/
public final class CodedOutputStream {
-
private static final Logger logger = Logger.getLogger(CodedOutputStream.class.getName());
+ private static final int LITTLE_ENDIAN_64_SIZE = 8;
+
+ /**
+ * @deprecated Use {@link #computeFixed32SizeNoTag(int)} instead.
+ */
+ @Deprecated public static final int LITTLE_ENDIAN_32_SIZE = 4;
+
// TODO(dweis): Consider migrating to a ByteBuffer.
private final byte[] buffer;
private final int limit;
@@ -77,12 +81,13 @@ public final class CodedOutputStream {
* CodedOutputStream.
*/
static int computePreferredBufferSize(int dataLength) {
- if (dataLength > DEFAULT_BUFFER_SIZE) return DEFAULT_BUFFER_SIZE;
+ if (dataLength > DEFAULT_BUFFER_SIZE) {
+ return DEFAULT_BUFFER_SIZE;
+ }
return dataLength;
}
- private CodedOutputStream(final byte[] buffer, final int offset,
- final int length) {
+ private CodedOutputStream(final byte[] buffer, final int offset, final int length) {
output = null;
this.buffer = buffer;
position = offset;
@@ -108,8 +113,7 @@ public final class CodedOutputStream {
* Create a new {@code CodedOutputStream} wrapping the given
* {@code OutputStream} with a given buffer size.
*/
- public static CodedOutputStream newInstance(final OutputStream output,
- final int bufferSize) {
+ public static CodedOutputStream newInstance(final OutputStream output, final int bufferSize) {
return new CodedOutputStream(output, new byte[bufferSize]);
}
@@ -131,9 +135,8 @@ public final class CodedOutputStream {
* array is faster than writing to an {@code OutputStream}. See also
* {@link ByteString#newCodedBuilder}.
*/
- public static CodedOutputStream newInstance(final byte[] flatArray,
- final int offset,
- final int length) {
+ public static CodedOutputStream newInstance(
+ final byte[] flatArray, final int offset, final int length) {
return new CodedOutputStream(flatArray, offset, length);
}
@@ -147,13 +150,13 @@ public final class CodedOutputStream {
/**
* Create a new {@code CodedOutputStream} that writes to the given ByteBuffer.
*/
- public static CodedOutputStream newInstance(ByteBuffer byteBuffer,
- int bufferSize) {
+ public static CodedOutputStream newInstance(ByteBuffer byteBuffer, int bufferSize) {
return newInstance(new ByteBufferOutputStream(byteBuffer), bufferSize);
}
private static class ByteBufferOutputStream extends OutputStream {
private final ByteBuffer byteBuffer;
+
public ByteBufferOutputStream(ByteBuffer byteBuffer) {
this.byteBuffer = byteBuffer;
}
@@ -171,106 +174,120 @@ public final class CodedOutputStream {
// -----------------------------------------------------------------
- /** Write a {@code double} field, including tag, to the stream. */
- public void writeDouble(final int fieldNumber, final double value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64);
- writeDoubleNoTag(value);
+ /** Encode and write a tag. */
+ public void writeTag(final int fieldNumber, final int wireType) throws IOException {
+ writeRawVarint32(WireFormat.makeTag(fieldNumber, wireType));
}
- /** Write a {@code float} field, including tag, to the stream. */
- public void writeFloat(final int fieldNumber, final float value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32);
- writeFloatNoTag(value);
+ /** Write an {@code int32} field, including tag, to the stream. */
+ public void writeInt32(final int fieldNumber, final int value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
+ writeInt32NoTag(value);
}
- /** Write a {@code uint64} field, including tag, to the stream. */
- public void writeUInt64(final int fieldNumber, final long value)
- throws IOException {
+ /** Write a {@code uint32} field, including tag, to the stream. */
+ public void writeUInt32(final int fieldNumber, final int value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
- writeUInt64NoTag(value);
+ writeUInt32NoTag(value);
+ }
+
+ /** Write a {@code sint32} field, including tag, to the stream. */
+ public void writeSInt32(final int fieldNumber, final int value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
+ writeSInt32NoTag(value);
+ }
+
+ /** Write a {@code fixed32} field, including tag, to the stream. */
+ public void writeFixed32(final int fieldNumber, final int value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32);
+ writeFixed32NoTag(value);
+ }
+
+ /** Write an {@code sfixed32} field, including tag, to the stream. */
+ public void writeSFixed32(final int fieldNumber, final int value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32);
+ writeSFixed32NoTag(value);
}
/** Write an {@code int64} field, including tag, to the stream. */
- public void writeInt64(final int fieldNumber, final long value)
- throws IOException {
+ public void writeInt64(final int fieldNumber, final long value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
writeInt64NoTag(value);
}
- /** Write an {@code int32} field, including tag, to the stream. */
- public void writeInt32(final int fieldNumber, final int value)
- throws IOException {
+ /** Write a {@code uint64} field, including tag, to the stream. */
+ public void writeUInt64(final int fieldNumber, final long value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
- writeInt32NoTag(value);
+ writeUInt64NoTag(value);
+ }
+
+ /** Write an {@code sint64} field, including tag, to the stream. */
+ public void writeSInt64(final int fieldNumber, final long value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
+ writeSInt64NoTag(value);
}
/** Write a {@code fixed64} field, including tag, to the stream. */
- public void writeFixed64(final int fieldNumber, final long value)
- throws IOException {
+ public void writeFixed64(final int fieldNumber, final long value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64);
writeFixed64NoTag(value);
}
- /** Write a {@code fixed32} field, including tag, to the stream. */
- public void writeFixed32(final int fieldNumber, final int value)
- throws IOException {
+ /** Write an {@code sfixed64} field, including tag, to the stream. */
+ public void writeSFixed64(final int fieldNumber, final long value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64);
+ writeSFixed64NoTag(value);
+ }
+
+ /** Write a {@code float} field, including tag, to the stream. */
+ public void writeFloat(final int fieldNumber, final float value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32);
- writeFixed32NoTag(value);
+ writeFloatNoTag(value);
+ }
+
+ /** Write a {@code double} field, including tag, to the stream. */
+ public void writeDouble(final int fieldNumber, final double value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64);
+ writeDoubleNoTag(value);
}
/** Write a {@code bool} field, including tag, to the stream. */
- public void writeBool(final int fieldNumber, final boolean value)
- throws IOException {
+ public void writeBool(final int fieldNumber, final boolean value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
writeBoolNoTag(value);
}
- /** Write a {@code string} field, including tag, to the stream. */
- public void writeString(final int fieldNumber, final String value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED);
- writeStringNoTag(value);
- }
-
- /** Write a {@code group} field, including tag, to the stream. */
- public void writeGroup(final int fieldNumber, final MessageLite value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_START_GROUP);
- writeGroupNoTag(value);
- writeTag(fieldNumber, WireFormat.WIRETYPE_END_GROUP);
+ /**
+ * Write an enum field, including tag, to the stream. The provided value is the numeric
+ * value used to represent the enum value on the wire (not the enum ordinal value).
+ */
+ public void writeEnum(final int fieldNumber, final int value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
+ writeEnumNoTag(value);
}
-
- /** Write an embedded message field, including tag, to the stream. */
- public void writeMessage(final int fieldNumber, final MessageLite value)
- throws IOException {
+ /** Write a {@code string} field, including tag, to the stream. */
+ public void writeString(final int fieldNumber, final String value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED);
- writeMessageNoTag(value);
+ writeStringNoTag(value);
}
-
/** Write a {@code bytes} field, including tag, to the stream. */
- public void writeBytes(final int fieldNumber, final ByteString value)
- throws IOException {
+ public void writeBytes(final int fieldNumber, final ByteString value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED);
writeBytesNoTag(value);
}
/** Write a {@code bytes} field, including tag, to the stream. */
- public void writeByteArray(final int fieldNumber, final byte[] value)
- throws IOException {
+ public void writeByteArray(final int fieldNumber, final byte[] value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED);
writeByteArrayNoTag(value);
}
/** Write a {@code bytes} field, including tag, to the stream. */
- public void writeByteArray(final int fieldNumber,
- final byte[] value,
- final int offset,
- final int length)
- throws IOException {
+ public void writeByteArray(
+ final int fieldNumber, final byte[] value, final int offset, final int length)
+ throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED);
writeByteArrayNoTag(value, offset, length);
}
@@ -285,64 +302,100 @@ public final class CodedOutputStream {
* of a ByteBuffer, you can call
* {@code writeByteBuffer(fieldNumber, byteBuffer.slice())}.
*/
- public void writeByteBuffer(final int fieldNumber, final ByteBuffer value)
- throws IOException {
+ public void writeByteBuffer(final int fieldNumber, final ByteBuffer value) throws IOException {
writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED);
writeByteBufferNoTag(value);
}
- /** Write a {@code uint32} field, including tag, to the stream. */
- public void writeUInt32(final int fieldNumber, final int value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
- writeUInt32NoTag(value);
+ /** Write a single byte. */
+ public void writeRawByte(final byte value) throws IOException {
+ if (position == limit) {
+ refreshBuffer();
+ }
+
+ buffer[position++] = value;
+ ++totalBytesWritten;
}
- /**
- * Write an enum field, including tag, to the stream. Caller is responsible
- * for converting the enum value to its numeric value.
- */
- public void writeEnum(final int fieldNumber, final int value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
- writeEnumNoTag(value);
+ /** Write a single byte, represented by an integer value. */
+ public void writeRawByte(final int value) throws IOException {
+ writeRawByte((byte) value);
}
- /** Write an {@code sfixed32} field, including tag, to the stream. */
- public void writeSFixed32(final int fieldNumber, final int value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32);
- writeSFixed32NoTag(value);
+ /** Write an array of bytes. */
+ public void writeRawBytes(final byte[] value) throws IOException {
+ writeRawBytes(value, 0, value.length);
}
- /** Write an {@code sfixed64} field, including tag, to the stream. */
- public void writeSFixed64(final int fieldNumber, final long value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64);
- writeSFixed64NoTag(value);
+ /** Write part of an array of bytes. */
+ public void writeRawBytes(final byte[] value, int offset, int length) throws IOException {
+ if (limit - position >= length) {
+ // We have room in the current buffer.
+ System.arraycopy(value, offset, buffer, position, length);
+ position += length;
+ totalBytesWritten += length;
+ } else {
+ // Write extends past current buffer. Fill the rest of this buffer and
+ // flush.
+ final int bytesWritten = limit - position;
+ System.arraycopy(value, offset, buffer, position, bytesWritten);
+ offset += bytesWritten;
+ length -= bytesWritten;
+ position = limit;
+ totalBytesWritten += bytesWritten;
+ refreshBuffer();
+
+ // Now deal with the rest.
+ // Since we have an output stream, this is our buffer
+ // and buffer offset == 0
+ if (length <= limit) {
+ // Fits in new buffer.
+ System.arraycopy(value, offset, buffer, 0, length);
+ position = length;
+ } else {
+ // Write is very big. Let's do it all at once.
+ output.write(value, offset, length);
+ }
+ totalBytesWritten += length;
+ }
}
- /** Write an {@code sint32} field, including tag, to the stream. */
- public void writeSInt32(final int fieldNumber, final int value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
- writeSInt32NoTag(value);
+ /** Write a byte string. */
+ public void writeRawBytes(final ByteString value) throws IOException {
+ writeRawBytes(value, 0, value.size());
}
- /** Write an {@code sint64} field, including tag, to the stream. */
- public void writeSInt64(final int fieldNumber, final long value)
- throws IOException {
- writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT);
- writeSInt64NoTag(value);
+ /**
+ * Write a ByteBuffer. This method will write all content of the ByteBuffer
+ * regardless of the current position and limit (i.e., the number of bytes
+ * to be written is value.capacity(), not value.remaining()). Furthermore,
+ * this method doesn't alter the state of the passed-in ByteBuffer. Its
+ * position, limit, mark, etc. will remain unchanged. If you only want to
+ * write the remaining bytes of a ByteBuffer, you can call
+ * {@code writeRawBytes(byteBuffer.slice())}.
+ */
+ public void writeRawBytes(final ByteBuffer value) throws IOException {
+ if (value.hasArray()) {
+ writeRawBytes(value.array(), value.arrayOffset(), value.capacity());
+ } else {
+ ByteBuffer duplicated = value.duplicate();
+ duplicated.clear();
+ writeRawBytesInternal(duplicated);
+ }
+ }
+
+ /** Write an embedded message field, including tag, to the stream. */
+ public void writeMessage(final int fieldNumber, final MessageLite value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED);
+ writeMessageNoTag(value);
}
/**
* Write a MessageSet extension field to the stream. For historical reasons,
* the wire format differs from normal fields.
*/
- public void writeMessageSetExtension(final int fieldNumber,
- final MessageLite value)
- throws IOException {
+ public void writeMessageSetExtension(final int fieldNumber, final MessageLite value)
+ throws IOException {
writeTag(WireFormat.MESSAGE_SET_ITEM, WireFormat.WIRETYPE_START_GROUP);
writeUInt32(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber);
writeMessage(WireFormat.MESSAGE_SET_MESSAGE, value);
@@ -353,9 +406,8 @@ public final class CodedOutputStream {
* Write an unparsed MessageSet extension field to the stream. For
* historical reasons, the wire format differs from normal fields.
*/
- public void writeRawMessageSetExtension(final int fieldNumber,
- final ByteString value)
- throws IOException {
+ public void writeRawMessageSetExtension(final int fieldNumber, final ByteString value)
+ throws IOException {
writeTag(WireFormat.MESSAGE_SET_ITEM, WireFormat.WIRETYPE_START_GROUP);
writeUInt32(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber);
writeBytes(WireFormat.MESSAGE_SET_MESSAGE, value);
@@ -364,19 +416,34 @@ public final class CodedOutputStream {
// -----------------------------------------------------------------
- /** Write a {@code double} field to the stream. */
- public void writeDoubleNoTag(final double value) throws IOException {
- writeRawLittleEndian64(Double.doubleToRawLongBits(value));
+ /** Write an {@code int32} field to the stream. */
+ public void writeInt32NoTag(final int value) throws IOException {
+ if (value >= 0) {
+ writeRawVarint32(value);
+ } else {
+ // Must sign-extend.
+ writeRawVarint64(value);
+ }
}
- /** Write a {@code float} field to the stream. */
- public void writeFloatNoTag(final float value) throws IOException {
- writeRawLittleEndian32(Float.floatToRawIntBits(value));
+ /** Write a {@code uint32} field to the stream. */
+ public void writeUInt32NoTag(final int value) throws IOException {
+ writeRawVarint32(value);
}
- /** Write a {@code uint64} field to the stream. */
- public void writeUInt64NoTag(final long value) throws IOException {
- writeRawVarint64(value);
+ /** Write a {@code sint32} field to the stream. */
+ public void writeSInt32NoTag(final int value) throws IOException {
+ writeRawVarint32(encodeZigZag32(value));
+ }
+
+ /** Write a {@code fixed32} field to the stream. */
+ public void writeFixed32NoTag(final int value) throws IOException {
+ writeRawLittleEndian32(value);
+ }
+
+ /** Write a {@code sfixed32} field to the stream. */
+ public void writeSFixed32NoTag(final int value) throws IOException {
+ writeRawLittleEndian32(value);
}
/** Write an {@code int64} field to the stream. */
@@ -384,14 +451,14 @@ public final class CodedOutputStream {
writeRawVarint64(value);
}
- /** Write an {@code int32} field to the stream. */
- public void writeInt32NoTag(final int value) throws IOException {
- if (value >= 0) {
- writeRawVarint32(value);
- } else {
- // Must sign-extend.
- writeRawVarint64(value);
- }
+ /** Write a {@code uint64} field to the stream. */
+ public void writeUInt64NoTag(final long value) throws IOException {
+ writeRawVarint64(value);
+ }
+
+ /** Write a {@code sint64} field to the stream. */
+ public void writeSInt64NoTag(final long value) throws IOException {
+ writeRawVarint64(encodeZigZag64(value));
}
/** Write a {@code fixed64} field to the stream. */
@@ -399,9 +466,19 @@ public final class CodedOutputStream {
writeRawLittleEndian64(value);
}
- /** Write a {@code fixed32} field to the stream. */
- public void writeFixed32NoTag(final int value) throws IOException {
- writeRawLittleEndian32(value);
+ /** Write a {@code sfixed64} field to the stream. */
+ public void writeSFixed64NoTag(final long value) throws IOException {
+ writeRawLittleEndian64(value);
+ }
+
+ /** Write a {@code float} field to the stream. */
+ public void writeFloatNoTag(final float value) throws IOException {
+ writeRawLittleEndian32(Float.floatToRawIntBits(value));
+ }
+
+ /** Write a {@code double} field to the stream. */
+ public void writeDoubleNoTag(final double value) throws IOException {
+ writeRawLittleEndian64(Double.doubleToRawLongBits(value));
}
/** Write a {@code bool} field to the stream. */
@@ -409,6 +486,14 @@ public final class CodedOutputStream {
writeRawByte(value ? 1 : 0);
}
+ /**
+ * Write an enum field to the stream. The provided value is the numeric
+ * value used to represent the enum value on the wire (not the enum ordinal value).
+ */
+ public void writeEnumNoTag(final int value) throws IOException {
+ writeInt32NoTag(value);
+ }
+
/** Write a {@code string} field to the stream. */
// TODO(dweis): Document behavior on ill-formed UTF-16 input.
public void writeStringNoTag(final String value) throws IOException {
@@ -421,89 +506,6 @@ public final class CodedOutputStream {
}
}
- /** Write a {@code string} field to the stream. */
- private void inefficientWriteStringNoTag(final String value) throws IOException {
- // Unfortunately there does not appear to be any way to tell Java to encode
- // UTF-8 directly into our buffer, so we have to let it create its own byte
- // array and then copy.
- // TODO(dweis): Consider using nio Charset methods instead.
- final byte[] bytes = value.getBytes(Internal.UTF_8);
- writeRawVarint32(bytes.length);
- writeRawBytes(bytes);
- }
-
- /**
- * Write a {@code string} field to the stream efficiently. If the {@code string} is malformed,
- * this method rolls back its changes and throws an {@link UnpairedSurrogateException} with the
- * intent that the caller will catch and retry with {@link #inefficientWriteStringNoTag(String)}.
- *
- * @param value the string to write to the stream
- *
- * @throws UnpairedSurrogateException when {@code value} is ill-formed UTF-16.
- */
- private void efficientWriteStringNoTag(final String value) throws IOException {
- // UTF-8 byte length of the string is at least its UTF-16 code unit length (value.length()),
- // and at most 3 times of it. We take advantage of this in both branches below.
- final int maxLength = value.length() * Utf8.MAX_BYTES_PER_CHAR;
- final int maxLengthVarIntSize = computeRawVarint32Size(maxLength);
-
- // If we are streaming and the potential length is too big to fit in our buffer, we take the
- // slower path. Otherwise, we're good to try the fast path.
- if (output != null && maxLengthVarIntSize + maxLength > limit - position) {
- // Allocate a byte[] that we know can fit the string and encode into it. String.getBytes()
- // does the same internally and then does *another copy* to return a byte[] of exactly the
- // right size. We can skip that copy and just writeRawBytes up to the actualLength of the
- // UTF-8 encoded bytes.
- final byte[] encodedBytes = new byte[maxLength];
- int actualLength = Utf8.encode(value, encodedBytes, 0, maxLength);
- writeRawVarint32(actualLength);
- writeRawBytes(encodedBytes, 0, actualLength);
- } else {
- // Optimize for the case where we know this length results in a constant varint length as this
- // saves a pass for measuring the length of the string.
- final int minLengthVarIntSize = computeRawVarint32Size(value.length());
- int oldPosition = position;
- final int length;
- try {
- if (minLengthVarIntSize == maxLengthVarIntSize) {
- position = oldPosition + minLengthVarIntSize;
- int newPosition = Utf8.encode(value, buffer, position, limit - position);
- // Since this class is stateful and tracks the position, we rewind and store the state,
- // prepend the length, then reset it back to the end of the string.
- position = oldPosition;
- length = newPosition - oldPosition - minLengthVarIntSize;
- writeRawVarint32(length);
- position = newPosition;
- } else {
- length = Utf8.encodedLength(value);
- writeRawVarint32(length);
- position = Utf8.encode(value, buffer, position, limit - position);
- }
- } catch (UnpairedSurrogateException e) {
- // Be extra careful and restore the original position for retrying the write with the less
- // efficient path.
- position = oldPosition;
- throw e;
- } catch (ArrayIndexOutOfBoundsException e) {
- throw new OutOfSpaceException(e);
- }
- totalBytesWritten += length;
- }
- }
-
- /** Write a {@code group} field to the stream. */
- public void writeGroupNoTag(final MessageLite value) throws IOException {
- value.writeTo(this);
- }
-
-
- /** Write an embedded message field to the stream. */
- public void writeMessageNoTag(final MessageLite value) throws IOException {
- writeRawVarint32(value.getSerializedSize());
- value.writeTo(this);
- }
-
-
/** Write a {@code bytes} field to the stream. */
public void writeBytesNoTag(final ByteString value) throws IOException {
writeRawVarint32(value.size());
@@ -516,86 +518,53 @@ public final class CodedOutputStream {
writeRawBytes(value);
}
- /** Write a {@code bytes} field to the stream. */
- public void writeByteArrayNoTag(final byte[] value,
- final int offset,
- final int length) throws IOException {
- writeRawVarint32(length);
- writeRawBytes(value, offset, length);
+ /** Write an embedded message field to the stream. */
+ public void writeMessageNoTag(final MessageLite value) throws IOException {
+ writeRawVarint32(value.getSerializedSize());
+ value.writeTo(this);
}
+ // =================================================================
+ // =================================================================
+
/**
- * Write a {@code bytes} field to the stream. This method will write all
- * content of the ByteBuffer regardless of the current position and limit
- * (i.e., the number of bytes to be written is value.capacity(), not
- * value.remaining()). Furthermore, this method doesn't alter the state of
- * the passed-in ByteBuffer. Its position, limit, mark, etc. will remain
- * unchanged. If you only want to write the remaining bytes of a ByteBuffer,
- * you can call {@code writeByteBufferNoTag(byteBuffer.slice())}.
+ * Compute the number of bytes that would be needed to encode an
+ * {@code int32} field, including tag.
*/
- public void writeByteBufferNoTag(final ByteBuffer value) throws IOException {
- writeRawVarint32(value.capacity());
- writeRawBytes(value);
- }
-
- /** Write a {@code uint32} field to the stream. */
- public void writeUInt32NoTag(final int value) throws IOException {
- writeRawVarint32(value);
+ public static int computeInt32Size(final int fieldNumber, final int value) {
+ return computeTagSize(fieldNumber) + computeInt32SizeNoTag(value);
}
/**
- * Write an enum field to the stream. Caller is responsible
- * for converting the enum value to its numeric value.
+ * Compute the number of bytes that would be needed to encode a
+ * {@code uint32} field, including tag.
*/
- public void writeEnumNoTag(final int value) throws IOException {
- writeInt32NoTag(value);
- }
-
- /** Write an {@code sfixed32} field to the stream. */
- public void writeSFixed32NoTag(final int value) throws IOException {
- writeRawLittleEndian32(value);
- }
-
- /** Write an {@code sfixed64} field to the stream. */
- public void writeSFixed64NoTag(final long value) throws IOException {
- writeRawLittleEndian64(value);
- }
-
- /** Write an {@code sint32} field to the stream. */
- public void writeSInt32NoTag(final int value) throws IOException {
- writeRawVarint32(encodeZigZag32(value));
- }
-
- /** Write an {@code sint64} field to the stream. */
- public void writeSInt64NoTag(final long value) throws IOException {
- writeRawVarint64(encodeZigZag64(value));
+ public static int computeUInt32Size(final int fieldNumber, final int value) {
+ return computeTagSize(fieldNumber) + computeUInt32SizeNoTag(value);
}
- // =================================================================
-
/**
- * Compute the number of bytes that would be needed to encode a
- * {@code double} field, including tag.
+ * Compute the number of bytes that would be needed to encode an
+ * {@code sint32} field, including tag.
*/
- public static int computeDoubleSize(final int fieldNumber,
- final double value) {
- return computeTagSize(fieldNumber) + computeDoubleSizeNoTag(value);
+ public static int computeSInt32Size(final int fieldNumber, final int value) {
+ return computeTagSize(fieldNumber) + computeSInt32SizeNoTag(value);
}
/**
* Compute the number of bytes that would be needed to encode a
- * {@code float} field, including tag.
+ * {@code fixed32} field, including tag.
*/
- public static int computeFloatSize(final int fieldNumber, final float value) {
- return computeTagSize(fieldNumber) + computeFloatSizeNoTag(value);
+ public static int computeFixed32Size(final int fieldNumber, final int value) {
+ return computeTagSize(fieldNumber) + computeFixed32SizeNoTag(value);
}
/**
- * Compute the number of bytes that would be needed to encode a
- * {@code uint64} field, including tag.
+ * Compute the number of bytes that would be needed to encode an
+ * {@code sfixed32} field, including tag.
*/
- public static int computeUInt64Size(final int fieldNumber, final long value) {
- return computeTagSize(fieldNumber) + computeUInt64SizeNoTag(value);
+ public static int computeSFixed32Size(final int fieldNumber, final int value) {
+ return computeTagSize(fieldNumber) + computeSFixed32SizeNoTag(value);
}
/**
@@ -607,73 +576,83 @@ public final class CodedOutputStream {
}
/**
+ * Compute the number of bytes that would be needed to encode a
+ * {@code uint64} field, including tag.
+ */
+ public static int computeUInt64Size(final int fieldNumber, final long value) {
+ return computeTagSize(fieldNumber) + computeUInt64SizeNoTag(value);
+ }
+
+ /**
* Compute the number of bytes that would be needed to encode an
- * {@code int32} field, including tag.
+ * {@code sint64} field, including tag.
*/
- public static int computeInt32Size(final int fieldNumber, final int value) {
- return computeTagSize(fieldNumber) + computeInt32SizeNoTag(value);
+ public static int computeSInt64Size(final int fieldNumber, final long value) {
+ return computeTagSize(fieldNumber) + computeSInt64SizeNoTag(value);
}
/**
* Compute the number of bytes that would be needed to encode a
* {@code fixed64} field, including tag.
*/
- public static int computeFixed64Size(final int fieldNumber,
- final long value) {
+ public static int computeFixed64Size(final int fieldNumber, final long value) {
return computeTagSize(fieldNumber) + computeFixed64SizeNoTag(value);
}
/**
- * Compute the number of bytes that would be needed to encode a
- * {@code fixed32} field, including tag.
+ * Compute the number of bytes that would be needed to encode an
+ * {@code sfixed64} field, including tag.
*/
- public static int computeFixed32Size(final int fieldNumber,
- final int value) {
- return computeTagSize(fieldNumber) + computeFixed32SizeNoTag(value);
+ public static int computeSFixed64Size(final int fieldNumber, final long value) {
+ return computeTagSize(fieldNumber) + computeSFixed64SizeNoTag(value);
}
/**
* Compute the number of bytes that would be needed to encode a
- * {@code bool} field, including tag.
+ * {@code float} field, including tag.
*/
- public static int computeBoolSize(final int fieldNumber,
- final boolean value) {
- return computeTagSize(fieldNumber) + computeBoolSizeNoTag(value);
+ public static int computeFloatSize(final int fieldNumber, final float value) {
+ return computeTagSize(fieldNumber) + computeFloatSizeNoTag(value);
}
/**
* Compute the number of bytes that would be needed to encode a
- * {@code string} field, including tag.
+ * {@code double} field, including tag.
*/
- public static int computeStringSize(final int fieldNumber,
- final String value) {
- return computeTagSize(fieldNumber) + computeStringSizeNoTag(value);
+ public static int computeDoubleSize(final int fieldNumber, final double value) {
+ return computeTagSize(fieldNumber) + computeDoubleSizeNoTag(value);
}
/**
* Compute the number of bytes that would be needed to encode a
- * {@code group} field, including tag.
+ * {@code bool} field, including tag.
*/
- public static int computeGroupSize(final int fieldNumber,
- final MessageLite value) {
- return computeTagSize(fieldNumber) * 2 + computeGroupSizeNoTag(value);
+ public static int computeBoolSize(final int fieldNumber, final boolean value) {
+ return computeTagSize(fieldNumber) + computeBoolSizeNoTag(value);
}
/**
* Compute the number of bytes that would be needed to encode an
- * embedded message field, including tag.
+ * enum field, including tag. The provided value is the numeric
+ * value used to represent the enum value on the wire (not the enum ordinal value).
*/
- public static int computeMessageSize(final int fieldNumber,
- final MessageLite value) {
- return computeTagSize(fieldNumber) + computeMessageSizeNoTag(value);
+ public static int computeEnumSize(final int fieldNumber, final int value) {
+ return computeTagSize(fieldNumber) + computeEnumSizeNoTag(value);
+ }
+
+ /**
+ * Compute the number of bytes that would be needed to encode a
+ * {@code string} field, including tag.
+ */
+ public static int computeStringSize(final int fieldNumber, final String value) {
+ return computeTagSize(fieldNumber) + computeStringSizeNoTag(value);
}
/**
* Compute the number of bytes that would be needed to encode a
* {@code bytes} field, including tag.
*/
- public static int computeBytesSize(final int fieldNumber,
- final ByteString value) {
+ public static int computeBytesSize(final int fieldNumber, final ByteString value) {
return computeTagSize(fieldNumber) + computeBytesSizeNoTag(value);
}
@@ -681,8 +660,7 @@ public final class CodedOutputStream {
* Compute the number of bytes that would be needed to encode a
* {@code bytes} field, including tag.
*/
- public static int computeByteArraySize(final int fieldNumber,
- final byte[] value) {
+ public static int computeByteArraySize(final int fieldNumber, final byte[] value) {
return computeTagSize(fieldNumber) + computeByteArraySizeNoTag(value);
}
@@ -690,8 +668,7 @@ public final class CodedOutputStream {
* Compute the number of bytes that would be needed to encode a
* {@code bytes} field, including tag.
*/
- public static int computeByteBufferSize(final int fieldNumber,
- final ByteBuffer value) {
+ public static int computeByteBufferSize(final int fieldNumber, final ByteBuffer value) {
return computeTagSize(fieldNumber) + computeByteBufferSizeNoTag(value);
}
@@ -699,114 +676,111 @@ public final class CodedOutputStream {
* Compute the number of bytes that would be needed to encode an
* embedded message in lazy field, including tag.
*/
- public static int computeLazyFieldSize(final int fieldNumber,
- final LazyFieldLite value) {
+ public static int computeLazyFieldSize(final int fieldNumber, final LazyFieldLite value) {
return computeTagSize(fieldNumber) + computeLazyFieldSizeNoTag(value);
}
/**
- * Compute the number of bytes that would be needed to encode a
- * {@code uint32} field, including tag.
+ * Compute the number of bytes that would be needed to encode an
+ * embedded message field, including tag.
*/
- public static int computeUInt32Size(final int fieldNumber, final int value) {
- return computeTagSize(fieldNumber) + computeUInt32SizeNoTag(value);
+ public static int computeMessageSize(final int fieldNumber, final MessageLite value) {
+ return computeTagSize(fieldNumber) + computeMessageSizeNoTag(value);
}
/**
- * Compute the number of bytes that would be needed to encode an
- * enum field, including tag. Caller is responsible for converting the
- * enum value to its numeric value.
+ * Compute the number of bytes that would be needed to encode a
+ * MessageSet extension to the stream. For historical reasons,
+ * the wire format differs from normal fields.
*/
- public static int computeEnumSize(final int fieldNumber, final int value) {
- return computeTagSize(fieldNumber) + computeEnumSizeNoTag(value);
+ public static int computeMessageSetExtensionSize(final int fieldNumber, final MessageLite value) {
+ return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2
+ + computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber)
+ + computeMessageSize(WireFormat.MESSAGE_SET_MESSAGE, value);
}
/**
* Compute the number of bytes that would be needed to encode an
- * {@code sfixed32} field, including tag.
+ * unparsed MessageSet extension field to the stream. For
+ * historical reasons, the wire format differs from normal fields.
*/
- public static int computeSFixed32Size(final int fieldNumber,
- final int value) {
- return computeTagSize(fieldNumber) + computeSFixed32SizeNoTag(value);
+ public static int computeRawMessageSetExtensionSize(
+ final int fieldNumber, final ByteString value) {
+ return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2
+ + computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber)
+ + computeBytesSize(WireFormat.MESSAGE_SET_MESSAGE, value);
}
/**
* Compute the number of bytes that would be needed to encode an
- * {@code sfixed64} field, including tag.
+ * lazily parsed MessageSet extension field to the stream. For
+ * historical reasons, the wire format differs from normal fields.
*/
- public static int computeSFixed64Size(final int fieldNumber,
- final long value) {
- return computeTagSize(fieldNumber) + computeSFixed64SizeNoTag(value);
+ public static int computeLazyFieldMessageSetExtensionSize(
+ final int fieldNumber, final LazyFieldLite value) {
+ return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2
+ + computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber)
+ + computeLazyFieldSize(WireFormat.MESSAGE_SET_MESSAGE, value);
}
- /**
- * Compute the number of bytes that would be needed to encode an
- * {@code sint32} field, including tag.
- */
- public static int computeSInt32Size(final int fieldNumber, final int value) {
- return computeTagSize(fieldNumber) + computeSInt32SizeNoTag(value);
+ // -----------------------------------------------------------------
+
+ /** Compute the number of bytes that would be needed to encode a tag. */
+ public static int computeTagSize(final int fieldNumber) {
+ return computeRawVarint32Size(WireFormat.makeTag(fieldNumber, 0));
}
/**
* Compute the number of bytes that would be needed to encode an
- * {@code sint64} field, including tag.
+ * {@code int32} field, including tag.
*/
- public static int computeSInt64Size(final int fieldNumber, final long value) {
- return computeTagSize(fieldNumber) + computeSInt64SizeNoTag(value);
+ public static int computeInt32SizeNoTag(final int value) {
+ if (value >= 0) {
+ return computeRawVarint32Size(value);
+ } else {
+ // Must sign-extend.
+ return 10;
+ }
}
/**
* Compute the number of bytes that would be needed to encode a
- * MessageSet extension to the stream. For historical reasons,
- * the wire format differs from normal fields.
+ * {@code uint32} field.
*/
- public static int computeMessageSetExtensionSize(
- final int fieldNumber, final MessageLite value) {
- return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 +
- computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) +
- computeMessageSize(WireFormat.MESSAGE_SET_MESSAGE, value);
+ public static int computeUInt32SizeNoTag(final int value) {
+ return computeRawVarint32Size(value);
}
/**
* Compute the number of bytes that would be needed to encode an
- * unparsed MessageSet extension field to the stream. For
- * historical reasons, the wire format differs from normal fields.
+ * {@code sint32} field.
*/
- public static int computeRawMessageSetExtensionSize(
- final int fieldNumber, final ByteString value) {
- return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 +
- computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) +
- computeBytesSize(WireFormat.MESSAGE_SET_MESSAGE, value);
+ public static int computeSInt32SizeNoTag(final int value) {
+ return computeRawVarint32Size(encodeZigZag32(value));
}
/**
- * Compute the number of bytes that would be needed to encode an
- * lazily parsed MessageSet extension field to the stream. For
- * historical reasons, the wire format differs from normal fields.
+ * Compute the number of bytes that would be needed to encode a
+ * {@code fixed32} field.
*/
- public static int computeLazyFieldMessageSetExtensionSize(
- final int fieldNumber, final LazyFieldLite value) {
- return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 +
- computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) +
- computeLazyFieldSize(WireFormat.MESSAGE_SET_MESSAGE, value);
+ public static int computeFixed32SizeNoTag(@SuppressWarnings("unused") final int unused) {
+ return LITTLE_ENDIAN_32_SIZE;
}
- // -----------------------------------------------------------------
-
/**
- * Compute the number of bytes that would be needed to encode a
- * {@code double} field, including tag.
+ * Compute the number of bytes that would be needed to encode an
+ * {@code sfixed32} field.
*/
- public static int computeDoubleSizeNoTag(final double value) {
- return LITTLE_ENDIAN_64_SIZE;
+ public static int computeSFixed32SizeNoTag(@SuppressWarnings("unused") final int unused) {
+ return LITTLE_ENDIAN_32_SIZE;
}
/**
- * Compute the number of bytes that would be needed to encode a
- * {@code float} field, including tag.
+ * Compute the number of bytes that would be needed to encode an
+ * {@code int64} field, including tag.
*/
- public static int computeFloatSizeNoTag(final float value) {
- return LITTLE_ENDIAN_32_SIZE;
+ public static int computeInt64SizeNoTag(final long value) {
+ return computeRawVarint64Size(value);
}
/**
@@ -819,50 +793,62 @@ public final class CodedOutputStream {
/**
* Compute the number of bytes that would be needed to encode an
- * {@code int64} field, including tag.
+ * {@code sint64} field.
*/
- public static int computeInt64SizeNoTag(final long value) {
- return computeRawVarint64Size(value);
+ public static int computeSInt64SizeNoTag(final long value) {
+ return computeRawVarint64Size(encodeZigZag64(value));
}
/**
- * Compute the number of bytes that would be needed to encode an
- * {@code int32} field, including tag.
+ * Compute the number of bytes that would be needed to encode a
+ * {@code fixed64} field.
*/
- public static int computeInt32SizeNoTag(final int value) {
- if (value >= 0) {
- return computeRawVarint32Size(value);
- } else {
- // Must sign-extend.
- return 10;
- }
+ public static int computeFixed64SizeNoTag(@SuppressWarnings("unused") final long unused) {
+ return LITTLE_ENDIAN_64_SIZE;
}
/**
- * Compute the number of bytes that would be needed to encode a
- * {@code fixed64} field.
+ * Compute the number of bytes that would be needed to encode an
+ * {@code sfixed64} field.
*/
- public static int computeFixed64SizeNoTag(final long value) {
+ public static int computeSFixed64SizeNoTag(@SuppressWarnings("unused") final long unused) {
return LITTLE_ENDIAN_64_SIZE;
}
/**
* Compute the number of bytes that would be needed to encode a
- * {@code fixed32} field.
+ * {@code float} field, including tag.
*/
- public static int computeFixed32SizeNoTag(final int value) {
+ public static int computeFloatSizeNoTag(@SuppressWarnings("unused") final float unused) {
return LITTLE_ENDIAN_32_SIZE;
}
/**
* Compute the number of bytes that would be needed to encode a
+ * {@code double} field, including tag.
+ */
+ public static int computeDoubleSizeNoTag(@SuppressWarnings("unused") final double unused) {
+ return LITTLE_ENDIAN_64_SIZE;
+ }
+
+ /**
+ * Compute the number of bytes that would be needed to encode a
* {@code bool} field.
*/
- public static int computeBoolSizeNoTag(final boolean value) {
+ public static int computeBoolSizeNoTag(@SuppressWarnings("unused") final boolean unused) {
return 1;
}
/**
+ * Compute the number of bytes that would be needed to encode an enum field.
+ * The provided value is the numeric value used to represent the enum value on the wire
+ * (not the enum ordinal value).
+ */
+ public static int computeEnumSizeNoTag(final int value) {
+ return computeInt32SizeNoTag(value);
+ }
+
+ /**
* Compute the number of bytes that would be needed to encode a
* {@code string} field.
*/
@@ -880,23 +866,6 @@ public final class CodedOutputStream {
}
/**
- * Compute the number of bytes that would be needed to encode a
- * {@code group} field.
- */
- public static int computeGroupSizeNoTag(final MessageLite value) {
- return value.getSerializedSize();
- }
-
- /**
- * Compute the number of bytes that would be needed to encode an embedded
- * message field.
- */
- public static int computeMessageSizeNoTag(final MessageLite value) {
- final int size = value.getSerializedSize();
- return computeRawVarint32Size(size) + size;
- }
-
- /**
* Compute the number of bytes that would be needed to encode an embedded
* message stored in lazy field.
*/
@@ -910,8 +879,7 @@ public final class CodedOutputStream {
* {@code bytes} field.
*/
public static int computeBytesSizeNoTag(final ByteString value) {
- return computeRawVarint32Size(value.size()) +
- value.size();
+ return computeRawVarint32Size(value.size()) + value.size();
}
/**
@@ -931,72 +899,47 @@ public final class CodedOutputStream {
}
/**
- * Compute the number of bytes that would be needed to encode a
- * {@code uint32} field.
- */
- public static int computeUInt32SizeNoTag(final int value) {
- return computeRawVarint32Size(value);
- }
-
- /**
- * Compute the number of bytes that would be needed to encode an enum field.
- * Caller is responsible for converting the enum value to its numeric value.
- */
- public static int computeEnumSizeNoTag(final int value) {
- return computeInt32SizeNoTag(value);
- }
-
- /**
- * Compute the number of bytes that would be needed to encode an
- * {@code sfixed32} field.
- */
- public static int computeSFixed32SizeNoTag(final int value) {
- return LITTLE_ENDIAN_32_SIZE;
- }
-
- /**
- * Compute the number of bytes that would be needed to encode an
- * {@code sfixed64} field.
+ * Compute the number of bytes that would be needed to encode an embedded
+ * message field.
*/
- public static int computeSFixed64SizeNoTag(final long value) {
- return LITTLE_ENDIAN_64_SIZE;
+ public static int computeMessageSizeNoTag(final MessageLite value) {
+ final int size = value.getSerializedSize();
+ return computeRawVarint32Size(size) + size;
}
/**
- * Compute the number of bytes that would be needed to encode an
- * {@code sint32} field.
+ * Encode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers
+ * into values that can be efficiently encoded with varint. (Otherwise,
+ * negative values must be sign-extended to 64 bits to be varint encoded,
+ * thus always taking 10 bytes on the wire.)
+ *
+ * @param n A signed 32-bit integer.
+ * @return An unsigned 32-bit integer, stored in a signed int because
+ * Java has no explicit unsigned support.
*/
- public static int computeSInt32SizeNoTag(final int value) {
- return computeRawVarint32Size(encodeZigZag32(value));
+ public static int encodeZigZag32(final int n) {
+ // Note: the right-shift must be arithmetic
+ return (n << 1) ^ (n >> 31);
}
/**
- * Compute the number of bytes that would be needed to encode an
- * {@code sint64} field.
+ * Encode a ZigZag-encoded 64-bit value. ZigZag encodes signed integers
+ * into values that can be efficiently encoded with varint. (Otherwise,
+ * negative values must be sign-extended to 64 bits to be varint encoded,
+ * thus always taking 10 bytes on the wire.)
+ *
+ * @param n A signed 64-bit integer.
+ * @return An unsigned 64-bit integer, stored in a signed int because
+ * Java has no explicit unsigned support.
*/
- public static int computeSInt64SizeNoTag(final long value) {
- return computeRawVarint64Size(encodeZigZag64(value));
+ public static long encodeZigZag64(final long n) {
+ // Note: the right-shift must be arithmetic
+ return (n << 1) ^ (n >> 63);
}
// =================================================================
/**
- * Internal helper that writes the current buffer to the output. The
- * buffer position is reset to its initial value when this returns.
- */
- private void refreshBuffer() throws IOException {
- if (output == null) {
- // We're writing to a single buffer.
- throw new OutOfSpaceException();
- }
-
- // Since we have an output stream, this is our buffer
- // and buffer offset == 0
- output.write(buffer, 0, position);
- position = 0;
- }
-
- /**
* Flushes the stream and forces any buffered bytes to be written. This
* does not flush the underlying OutputStream.
*/
@@ -1015,8 +958,8 @@ public final class CodedOutputStream {
return limit - position;
} else {
throw new UnsupportedOperationException(
- "spaceLeft() can only be called on CodedOutputStreams that are " +
- "writing to a flat array.");
+ "spaceLeft() can only be called on CodedOutputStreams that are "
+ + "writing to a flat array.");
}
}
@@ -1029,8 +972,7 @@ public final class CodedOutputStream {
*/
public void checkNoSpaceLeft() {
if (spaceLeft() != 0) {
- throw new IllegalStateException(
- "Did not write as much data as expected.");
+ throw new IllegalStateException("Did not write as much data as expected.");
}
}
@@ -1063,53 +1005,96 @@ public final class CodedOutputStream {
return totalBytesWritten;
}
- /** Write a single byte. */
- public void writeRawByte(final byte value) throws IOException {
- if (position == limit) {
- refreshBuffer();
- }
-
- buffer[position++] = value;
- ++totalBytesWritten;
- }
+ // =================================================================
- /** Write a single byte, represented by an integer value. */
- public void writeRawByte(final int value) throws IOException {
- writeRawByte((byte) value);
- }
+ /**
+ * Internal helper that writes the current buffer to the output. The
+ * buffer position is reset to its initial value when this returns.
+ */
+ private void refreshBuffer() throws IOException {
+ if (output == null) {
+ // We're writing to a single buffer.
+ throw new OutOfSpaceException();
+ }
- /** Write a byte string. */
- public void writeRawBytes(final ByteString value) throws IOException {
- writeRawBytes(value, 0, value.size());
+ // Since we have an output stream, this is our buffer
+ // and buffer offset == 0
+ output.write(buffer, 0, position);
+ position = 0;
}
- /** Write an array of bytes. */
- public void writeRawBytes(final byte[] value) throws IOException {
- writeRawBytes(value, 0, value.length);
+ /** Write a {@code string} field to the stream. */
+ private void inefficientWriteStringNoTag(final String value) throws IOException {
+ // Unfortunately there does not appear to be any way to tell Java to encode
+ // UTF-8 directly into our buffer, so we have to let it create its own byte
+ // array and then copy.
+ // TODO(dweis): Consider using nio Charset methods instead.
+ final byte[] bytes = value.getBytes(Internal.UTF_8);
+ writeRawVarint32(bytes.length);
+ writeRawBytes(bytes);
}
/**
- * Write a ByteBuffer. This method will write all content of the ByteBuffer
- * regardless of the current position and limit (i.e., the number of bytes
- * to be written is value.capacity(), not value.remaining()). Furthermore,
- * this method doesn't alter the state of the passed-in ByteBuffer. Its
- * position, limit, mark, etc. will remain unchanged. If you only want to
- * write the remaining bytes of a ByteBuffer, you can call
- * {@code writeRawBytes(byteBuffer.slice())}.
+ * Write a {@code string} field to the stream efficiently. If the {@code string} is malformed,
+ * this method rolls back its changes and throws an {@link UnpairedSurrogateException} with the
+ * intent that the caller will catch and retry with {@link #inefficientWriteStringNoTag(String)}.
+ *
+ * @param value the string to write to the stream
+ *
+ * @throws UnpairedSurrogateException when {@code value} is ill-formed UTF-16.
*/
- public void writeRawBytes(final ByteBuffer value) throws IOException {
- if (value.hasArray()) {
- writeRawBytes(value.array(), value.arrayOffset(), value.capacity());
+ private void efficientWriteStringNoTag(final String value) throws IOException {
+ // UTF-8 byte length of the string is at least its UTF-16 code unit length (value.length()),
+ // and at most 3 times of it. We take advantage of this in both branches below.
+ final int maxLength = value.length() * Utf8.MAX_BYTES_PER_CHAR;
+ final int maxLengthVarIntSize = computeRawVarint32Size(maxLength);
+
+ // If we are streaming and the potential length is too big to fit in our buffer, we take the
+ // slower path. Otherwise, we're good to try the fast path.
+ if (output != null && maxLengthVarIntSize + maxLength > limit - position) {
+ // Allocate a byte[] that we know can fit the string and encode into it. String.getBytes()
+ // does the same internally and then does *another copy* to return a byte[] of exactly the
+ // right size. We can skip that copy and just writeRawBytes up to the actualLength of the
+ // UTF-8 encoded bytes.
+ final byte[] encodedBytes = new byte[maxLength];
+ int actualLength = Utf8.encode(value, encodedBytes, 0, maxLength);
+ writeRawVarint32(actualLength);
+ writeRawBytes(encodedBytes, 0, actualLength);
} else {
- ByteBuffer duplicated = value.duplicate();
- duplicated.clear();
- writeRawBytesInternal(duplicated);
+ // Optimize for the case where we know this length results in a constant varint length as this
+ // saves a pass for measuring the length of the string.
+ final int minLengthVarIntSize = computeRawVarint32Size(value.length());
+ int oldPosition = position;
+ final int length;
+ try {
+ if (minLengthVarIntSize == maxLengthVarIntSize) {
+ position = oldPosition + minLengthVarIntSize;
+ int newPosition = Utf8.encode(value, buffer, position, limit - position);
+ // Since this class is stateful and tracks the position, we rewind and store the state,
+ // prepend the length, then reset it back to the end of the string.
+ position = oldPosition;
+ length = newPosition - oldPosition - minLengthVarIntSize;
+ writeRawVarint32(length);
+ position = newPosition;
+ } else {
+ length = Utf8.encodedLength(value);
+ writeRawVarint32(length);
+ position = Utf8.encode(value, buffer, position, limit - position);
+ }
+ } catch (UnpairedSurrogateException e) {
+ // Be extra careful and restore the original position for retrying the write with the less
+ // efficient path.
+ position = oldPosition;
+ throw e;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ throw new OutOfSpaceException(e);
+ }
+ totalBytesWritten += length;
}
}
/** Write a ByteBuffer that isn't backed by an array. */
- private void writeRawBytesInternal(final ByteBuffer value)
- throws IOException {
+ private void writeRawBytesInternal(final ByteBuffer value) throws IOException {
int length = value.remaining();
if (limit - position >= length) {
// We have room in the current buffer.
@@ -1143,43 +1128,29 @@ public final class CodedOutputStream {
}
}
- /** Write part of an array of bytes. */
- public void writeRawBytes(final byte[] value, int offset, int length)
- throws IOException {
- if (limit - position >= length) {
- // We have room in the current buffer.
- System.arraycopy(value, offset, buffer, position, length);
- position += length;
- totalBytesWritten += length;
- } else {
- // Write extends past current buffer. Fill the rest of this buffer and
- // flush.
- final int bytesWritten = limit - position;
- System.arraycopy(value, offset, buffer, position, bytesWritten);
- offset += bytesWritten;
- length -= bytesWritten;
- position = limit;
- totalBytesWritten += bytesWritten;
- refreshBuffer();
+ /** Write a {@code bytes} field to the stream. Visible for testing. */
+ void writeByteArrayNoTag(final byte[] value, final int offset, final int length)
+ throws IOException {
+ writeRawVarint32(length);
+ writeRawBytes(value, offset, length);
+ }
- // Now deal with the rest.
- // Since we have an output stream, this is our buffer
- // and buffer offset == 0
- if (length <= limit) {
- // Fits in new buffer.
- System.arraycopy(value, offset, buffer, 0, length);
- position = length;
- } else {
- // Write is very big. Let's do it all at once.
- output.write(value, offset, length);
- }
- totalBytesWritten += length;
- }
+ /**
+ * Write a {@code bytes} field to the stream. This method will write all
+ * content of the ByteBuffer regardless of the current position and limit
+ * (i.e., the number of bytes to be written is value.capacity(), not
+ * value.remaining()). Furthermore, this method doesn't alter the state of
+ * the passed-in ByteBuffer. Its position, limit, mark, etc. will remain
+ * unchanged. If you only want to write the remaining bytes of a ByteBuffer,
+ * you can call {@code writeByteBufferNoTag(byteBuffer.slice())}.
+ */
+ private void writeByteBufferNoTag(final ByteBuffer value) throws IOException {
+ writeRawVarint32(value.capacity());
+ writeRawBytes(value);
}
/** Write part of a byte string. */
- public void writeRawBytes(final ByteString value, int offset, int length)
- throws IOException {
+ private void writeRawBytes(final ByteString value, int offset, int length) throws IOException {
if (limit - position >= length) {
// We have room in the current buffer.
value.copyTo(buffer, offset, position, length);
@@ -1210,21 +1181,57 @@ public final class CodedOutputStream {
}
}
- /** Encode and write a tag. */
- public void writeTag(final int fieldNumber, final int wireType)
- throws IOException {
- writeRawVarint32(WireFormat.makeTag(fieldNumber, wireType));
+ // =================================================================
+
+ /**
+ * Write a {@code group} field, including tag, to the stream.
+ *
+ * @deprecated groups are deprecated.
+ */
+ @Deprecated
+ public void writeGroup(final int fieldNumber, final MessageLite value) throws IOException {
+ writeTag(fieldNumber, WireFormat.WIRETYPE_START_GROUP);
+ writeGroupNoTag(value);
+ writeTag(fieldNumber, WireFormat.WIRETYPE_END_GROUP);
}
- /** Compute the number of bytes that would be needed to encode a tag. */
- public static int computeTagSize(final int fieldNumber) {
- return computeRawVarint32Size(WireFormat.makeTag(fieldNumber, 0));
+ /**
+ * Write a {@code group} field to the stream.
+ *
+ * @deprecated groups are deprecated.
+ */
+ @Deprecated
+ public void writeGroupNoTag(final MessageLite value) throws IOException {
+ value.writeTo(this);
+ }
+
+ /**
+ * Compute the number of bytes that would be needed to encode a
+ * {@code group} field, including tag.
+ *
+ * @deprecated groups are deprecated.
+ */
+ @Deprecated
+ public static int computeGroupSize(final int fieldNumber, final MessageLite value) {
+ return computeTagSize(fieldNumber) * 2 + computeGroupSizeNoTag(value);
+ }
+
+ /**
+ * Compute the number of bytes that would be needed to encode a
+ * {@code group} field.
+ */
+ @Deprecated
+ public static int computeGroupSizeNoTag(final MessageLite value) {
+ return value.getSerializedSize();
}
/**
* Encode and write a varint. {@code value} is treated as
* unsigned, so it won't be sign-extended if negative.
+ *
+ * @deprecated use {@link #writeUInt32NoTag} instead.
*/
+ @Deprecated
public void writeRawVarint32(int value) throws IOException {
while (true) {
if ((value & ~0x7F) == 0) {
@@ -1238,95 +1245,104 @@ public final class CodedOutputStream {
}
/**
- * Compute the number of bytes that would be needed to encode a varint.
- * {@code value} is treated as unsigned, so it won't be sign-extended if
- * negative.
+ * Encode and write a varint.
+ *
+ * @deprecated use {@link #writeUInt64NoTag} instead.
*/
- public static int computeRawVarint32Size(final int value) {
- if ((value & (~0 << 7)) == 0) return 1;
- if ((value & (~0 << 14)) == 0) return 2;
- if ((value & (~0 << 21)) == 0) return 3;
- if ((value & (~0 << 28)) == 0) return 4;
- return 5;
- }
-
- /** Encode and write a varint. */
+ @Deprecated
public void writeRawVarint64(long value) throws IOException {
while (true) {
if ((value & ~0x7FL) == 0) {
- writeRawByte((int)value);
+ writeRawByte((int) value);
return;
} else {
- writeRawByte(((int)value & 0x7F) | 0x80);
+ writeRawByte(((int) value & 0x7F) | 0x80);
value >>>= 7;
}
}
}
- /** Compute the number of bytes that would be needed to encode a varint. */
+ /**
+ * Compute the number of bytes that would be needed to encode a varint.
+ * {@code value} is treated as unsigned, so it won't be sign-extended if
+ * negative.
+ *
+ * @deprecated use {@link #computeUInt32SizeNoTag(int)} instead.
+ */
+ @Deprecated
+ public static int computeRawVarint32Size(final int value) {
+ if ((value & (~0 << 7)) == 0) {
+ return 1;
+ }
+ if ((value & (~0 << 14)) == 0) {
+ return 2;
+ }
+ if ((value & (~0 << 21)) == 0) {
+ return 3;
+ }
+ if ((value & (~0 << 28)) == 0) {
+ return 4;
+ }
+ return 5;
+ }
+
+ /**
+ * Compute the number of bytes that would be needed to encode a varint.
+ *
+ * @deprecated use {@link #computeUInt64SizeNoTag(long)} instead.
+ */
+ @Deprecated
public static int computeRawVarint64Size(long value) {
// handle two popular special cases up front ...
- if ((value & (~0L << 7)) == 0L) return 1;
- if (value < 0L) return 10;
+ if ((value & (~0L << 7)) == 0L) {
+ return 1;
+ }
+ if (value < 0L) {
+ return 10;
+ }
// ... leaving us with 8 remaining, which we can divide and conquer
int n = 2;
- if ((value & (~0L << 35)) != 0L) { n += 4; value >>>= 28; }
- if ((value & (~0L << 21)) != 0L) { n += 2; value >>>= 14; }
- if ((value & (~0L << 14)) != 0L) { n += 1; }
+ if ((value & (~0L << 35)) != 0L) {
+ n += 4;
+ value >>>= 28;
+ }
+ if ((value & (~0L << 21)) != 0L) {
+ n += 2;
+ value >>>= 14;
+ }
+ if ((value & (~0L << 14)) != 0L) {
+ n += 1;
+ }
return n;
}
- /** Write a little-endian 32-bit integer. */
- public void writeRawLittleEndian32(final int value) throws IOException {
- writeRawByte((value ) & 0xFF);
- writeRawByte((value >> 8) & 0xFF);
- writeRawByte((value >> 16) & 0xFF);
- writeRawByte((value >> 24) & 0xFF);
- }
-
- public static final int LITTLE_ENDIAN_32_SIZE = 4;
-
- /** Write a little-endian 64-bit integer. */
- public void writeRawLittleEndian64(final long value) throws IOException {
- writeRawByte((int)(value ) & 0xFF);
- writeRawByte((int)(value >> 8) & 0xFF);
- writeRawByte((int)(value >> 16) & 0xFF);
- writeRawByte((int)(value >> 24) & 0xFF);
- writeRawByte((int)(value >> 32) & 0xFF);
- writeRawByte((int)(value >> 40) & 0xFF);
- writeRawByte((int)(value >> 48) & 0xFF);
- writeRawByte((int)(value >> 56) & 0xFF);
- }
-
- public static final int LITTLE_ENDIAN_64_SIZE = 8;
-
/**
- * Encode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers
- * into values that can be efficiently encoded with varint. (Otherwise,
- * negative values must be sign-extended to 64 bits to be varint encoded,
- * thus always taking 10 bytes on the wire.)
+ * Write a little-endian 32-bit integer.
*
- * @param n A signed 32-bit integer.
- * @return An unsigned 32-bit integer, stored in a signed int because
- * Java has no explicit unsigned support.
+ * @deprecated Use {@link #writeFixed32NoTag} instead.
*/
- public static int encodeZigZag32(final int n) {
- // Note: the right-shift must be arithmetic
- return (n << 1) ^ (n >> 31);
+ @Deprecated
+ public void writeRawLittleEndian32(final int value) throws IOException {
+ writeRawByte((value) & 0xFF);
+ writeRawByte((value >> 8) & 0xFF);
+ writeRawByte((value >> 16) & 0xFF);
+ writeRawByte((value >> 24) & 0xFF);
}
/**
- * Encode a ZigZag-encoded 64-bit value. ZigZag encodes signed integers
- * into values that can be efficiently encoded with varint. (Otherwise,
- * negative values must be sign-extended to 64 bits to be varint encoded,
- * thus always taking 10 bytes on the wire.)
+ * Write a little-endian 64-bit integer.
*
- * @param n A signed 64-bit integer.
- * @return An unsigned 64-bit integer, stored in a signed int because
- * Java has no explicit unsigned support.
+ * @deprecated Use {@link #writeFixed64NoTag} instead.
*/
- public static long encodeZigZag64(final long n) {
- // Note: the right-shift must be arithmetic
- return (n << 1) ^ (n >> 63);
+ @Deprecated
+ public void writeRawLittleEndian64(final long value) throws IOException {
+ writeRawByte((int) (value) & 0xFF);
+ writeRawByte((int) (value >> 8) & 0xFF);
+ writeRawByte((int) (value >> 16) & 0xFF);
+ writeRawByte((int) (value >> 24) & 0xFF);
+ writeRawByte((int) (value >> 32) & 0xFF);
+ writeRawByte((int) (value >> 40) & 0xFF);
+ writeRawByte((int) (value >> 48) & 0xFF);
+ writeRawByte((int) (value >> 56) & 0xFF);
}
}
diff --git a/java/core/src/main/java/com/google/protobuf/Descriptors.java b/java/core/src/main/java/com/google/protobuf/Descriptors.java
index 5e15cfbe..e303e138 100644
--- a/java/core/src/main/java/com/google/protobuf/Descriptors.java
+++ b/java/core/src/main/java/com/google/protobuf/Descriptors.java
@@ -272,7 +272,7 @@ public final class Descriptors {
* because a field has an undefined type or because two messages
* were defined with the same name.
*/
- private static FileDescriptor buildFrom(
+ public static FileDescriptor buildFrom(
final FileDescriptorProto proto, final FileDescriptor[] dependencies,
final boolean allowUnknownDependencies)
throws DescriptorValidationException {
@@ -1123,7 +1123,7 @@ public final class Descriptors {
private JavaType javaType;
public FieldDescriptorProto.Type toProto() {
- return FieldDescriptorProto.Type.valueOf(ordinal() + 1);
+ return FieldDescriptorProto.Type.forNumber(ordinal() + 1);
}
public JavaType getJavaType() { return javaType; }
diff --git a/java/core/src/main/java/com/google/protobuf/ExperimentalApi.java b/java/core/src/main/java/com/google/protobuf/ExperimentalApi.java
index 6f41fb81..3cd4c884 100644
--- a/java/core/src/main/java/com/google/protobuf/ExperimentalApi.java
+++ b/java/core/src/main/java/com/google/protobuf/ExperimentalApi.java
@@ -1,3 +1,33 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
package com.google.protobuf;
import java.lang.annotation.Documented;
diff --git a/java/core/src/main/java/com/google/protobuf/GeneratedMessage.java b/java/core/src/main/java/com/google/protobuf/GeneratedMessage.java
index ceb97a4e..a50afe55 100644
--- a/java/core/src/main/java/com/google/protobuf/GeneratedMessage.java
+++ b/java/core/src/main/java/com/google/protobuf/GeneratedMessage.java
@@ -1019,7 +1019,9 @@ public abstract class GeneratedMessage extends AbstractMessage
verifyContainingType(field);
final Object value = extensions.getField(field);
if (value == null) {
- if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ if (field.isRepeated()) {
+ return Collections.emptyList();
+ } else if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
// Lacking an ExtensionRegistry, we have no way to determine the
// extension's real type, so we return a DynamicMessage.
return DynamicMessage.getDefaultInstance(field.getMessageType());
diff --git a/java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java b/java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java
index 81e1862c..12a1472d 100644
--- a/java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java
+++ b/java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java
@@ -30,6 +30,7 @@
package com.google.protobuf;
+import com.google.protobuf.AbstractMessageLite.Builder.LimitedInputStream;
import com.google.protobuf.Internal.BooleanList;
import com.google.protobuf.Internal.DoubleList;
import com.google.protobuf.Internal.FloatList;
@@ -39,6 +40,7 @@ import com.google.protobuf.Internal.ProtobufList;
import com.google.protobuf.WireFormat.FieldType;
import java.io.IOException;
+import java.io.InputStream;
import java.io.ObjectStreamException;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
@@ -57,10 +59,7 @@ import java.util.Map;
public abstract class GeneratedMessageLite<
MessageType extends GeneratedMessageLite<MessageType, BuilderType>,
BuilderType extends GeneratedMessageLite.Builder<MessageType, BuilderType>>
- extends AbstractMessageLite
- implements Serializable {
-
- private static final long serialVersionUID = 1L;
+ extends AbstractMessageLite {
/** For use by generated code only. Lazily initialized to reduce allocations. */
protected UnknownFieldSetLite unknownFields = null;
@@ -83,6 +82,24 @@ public abstract class GeneratedMessageLite<
return (BuilderType) dynamicMethod(MethodToInvoke.NEW_BUILDER);
}
+ /**
+ * A reflective toString function. This is primarily intended as a developer aid, while keeping
+ * binary size down. The first line of the {@code toString()} representation includes a commented
+ * version of {@code super.toString()} to act as an indicator that this should not be relied on
+ * for comparisons.
+ * <p>
+ * NOTE: This method relies on the field getter methods not being stripped or renamed by proguard.
+ * If they are, the fields will not be included in the returned string representation.
+ * <p>
+ * NOTE: This implementation is liable to change in the future, and should not be relied on in
+ * code.
+ */
+ @Override
+ public String toString() {
+ return MessageLiteToString.toString(this, super.toString());
+ }
+
+
// The general strategy for unknown fields is to use an UnknownFieldSetLite that is treated as
// mutable during the parsing constructor and immutable after. This allows us to avoid
// any unnecessary intermediary allocations while reducing the generated code size.
@@ -303,10 +320,9 @@ public abstract class GeneratedMessageLite<
throws java.io.IOException {
MessageType parsedMessage = null;
try {
- parsedMessage =
- (MessageType) getDefaultInstanceForType().getParserForType().parsePartialFrom(
- input, extensionRegistry);
- } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+ parsedMessage = parsePartialFrom(
+ (MessageType) getDefaultInstanceForType(), input, extensionRegistry);
+ } catch (InvalidProtocolBufferException e) {
parsedMessage = (MessageType) e.getUnfinishedMessage();
throw e;
} finally {
@@ -562,7 +578,6 @@ public abstract class GeneratedMessageLite<
return extensions.isInitialized();
}
-
@Override
protected final void doneParsing() {
super.doneParsing();
@@ -1049,7 +1064,12 @@ public abstract class GeneratedMessageLite<
* A serialized (serializable) form of the generated message. Stores the
* message as a class name and a byte array.
*/
- static final class SerializedForm implements Serializable {
+ protected static final class SerializedForm implements Serializable {
+
+ public static SerializedForm of(MessageLite message) {
+ return new SerializedForm(message);
+ }
+
private static final long serialVersionUID = 0L;
private final String messageClassName;
@@ -1093,16 +1113,6 @@ public abstract class GeneratedMessageLite<
}
}
}
-
- /**
- * Replaces this object in the output stream with a serialized form.
- * Part of Java's serialization magic. Generated sub-classes must override
- * this method by calling {@code return super.writeReplace();}
- * @return a SerializedForm of this message
- */
- protected Object writeReplace() throws ObjectStreamException {
- return new SerializedForm(this);
- }
/**
* Checks that the {@link Extension} is Lite and returns it as a
@@ -1135,45 +1145,6 @@ public abstract class GeneratedMessageLite<
message.dynamicMethod(MethodToInvoke.MAKE_IMMUTABLE);
}
- /**
- * A static helper method for parsing a partial from input using the extension registry and the
- * instance.
- */
- static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
- T instance, CodedInputStream input, ExtensionRegistryLite extensionRegistry)
- throws InvalidProtocolBufferException {
- try {
- return (T) instance.dynamicMethod(
- MethodToInvoke.PARSE_PARTIAL_FROM, input, extensionRegistry);
- } catch (RuntimeException e) {
- if (e.getCause() instanceof InvalidProtocolBufferException) {
- throw (InvalidProtocolBufferException) e.getCause();
- }
- throw e;
- }
- }
-
- /**
- * A {@link Parser} implementation that delegates to the default instance.
- * <p>
- * For use by generated code only.
- */
- protected static class DefaultInstanceBasedParser<T extends GeneratedMessageLite<T, ?>>
- extends AbstractParser<T> {
-
- private T defaultInstance;
-
- public DefaultInstanceBasedParser(T defaultInstance) {
- this.defaultInstance = defaultInstance;
- }
-
- @Override
- public T parsePartialFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
- throws InvalidProtocolBufferException {
- return GeneratedMessageLite.parsePartialFrom(defaultInstance, input, extensionRegistry);
- }
- }
-
protected static IntList newIntList() {
return new IntArrayList();
}
@@ -1269,8 +1240,218 @@ public abstract class GeneratedMessageLite<
protected static <E> ProtobufList<E> emptyProtobufList() {
return ProtobufArrayList.emptyList();
}
-
+
protected static LazyStringArrayList emptyLazyStringArrayList() {
return LazyStringArrayList.emptyList();
}
+
+ /**
+ * A {@link Parser} implementation that delegates to the default instance.
+ * <p>
+ * For use by generated code only.
+ */
+ protected static class DefaultInstanceBasedParser<T extends GeneratedMessageLite<T, ?>>
+ extends AbstractParser<T> {
+
+ private T defaultInstance;
+
+ public DefaultInstanceBasedParser(T defaultInstance) {
+ this.defaultInstance = defaultInstance;
+ }
+
+ @Override
+ public T parsePartialFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return GeneratedMessageLite.parsePartialFrom(defaultInstance, input, extensionRegistry);
+ }
+ }
+
+ /**
+ * A static helper method for parsing a partial from input using the extension registry and the
+ * instance.
+ */
+ // TODO(dweis): Should this verify that the last tag was 0?
+ static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
+ T instance, CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ T result;
+ try {
+ result = (T) instance.dynamicMethod(
+ MethodToInvoke.PARSE_PARTIAL_FROM, input, extensionRegistry);
+ } catch (RuntimeException e) {
+ if (e.getCause() instanceof InvalidProtocolBufferException) {
+ throw (InvalidProtocolBufferException) e.getCause();
+ }
+ throw e;
+ }
+ return result;
+ }
+
+ protected static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
+ T defaultInstance,
+ CodedInputStream input)
+ throws InvalidProtocolBufferException {
+ return parsePartialFrom(defaultInstance, input, ExtensionRegistryLite.getEmptyRegistry());
+ }
+
+ /**
+ * Helper method to check if message is initialized.
+ *
+ * @throws InvalidProtocolBufferException if it is not initialized.
+ * @return The message to check.
+ */
+ private static <T extends GeneratedMessageLite<T, ?>> T checkMessageInitialized(T message)
+ throws InvalidProtocolBufferException {
+ if (message != null && !message.isInitialized()) {
+ throw message.newUninitializedMessageException()
+ .asInvalidProtocolBufferException()
+ .setUnfinishedMessage(message);
+ }
+ return message;
+ }
+
+ // Validates last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+ T defaultInstance, ByteString data)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parseFrom(defaultInstance, data, ExtensionRegistryLite.getEmptyRegistry()));
+ }
+
+ // Validates last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+ T defaultInstance, ByteString data, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(parsePartialFrom(defaultInstance, data, extensionRegistry));
+ }
+
+ // This is a special case since we want to verify that the last tag is 0. We assume we exhaust the
+ // ByteString.
+ private static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
+ T defaultInstance, ByteString data, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ T message;
+ try {
+ CodedInputStream input = data.newCodedInput();
+ message = parsePartialFrom(defaultInstance, input, extensionRegistry);
+ try {
+ input.checkLastTagWas(0);
+ } catch (InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(message);
+ }
+ return message;
+ } catch (InvalidProtocolBufferException e) {
+ throw e;
+ }
+ }
+
+ // This is a special case since we want to verify that the last tag is 0. We assume we exhaust the
+ // ByteString.
+ private static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
+ T defaultInstance, byte[] data, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ T message;
+ try {
+ CodedInputStream input = CodedInputStream.newInstance(data);
+ message = parsePartialFrom(defaultInstance, input, extensionRegistry);
+ try {
+ input.checkLastTagWas(0);
+ } catch (InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(message);
+ }
+ return message;
+ } catch (InvalidProtocolBufferException e) {
+ throw e;
+ }
+ }
+
+ // Validates last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+ T defaultInstance, byte[] data)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialFrom(defaultInstance, data, ExtensionRegistryLite.getEmptyRegistry()));
+ }
+
+ // Validates last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+ T defaultInstance, byte[] data, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(parsePartialFrom(defaultInstance, data, extensionRegistry));
+ }
+
+ // Does not validate last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+ T defaultInstance, InputStream input)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialFrom(defaultInstance, CodedInputStream.newInstance(input),
+ ExtensionRegistryLite.getEmptyRegistry()));
+ }
+
+ // Does not validate last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+ T defaultInstance, InputStream input, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialFrom(defaultInstance, CodedInputStream.newInstance(input), extensionRegistry));
+ }
+
+ // Does not validate last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+ T defaultInstance, CodedInputStream input)
+ throws InvalidProtocolBufferException {
+ return parseFrom(defaultInstance, input, ExtensionRegistryLite.getEmptyRegistry());
+ }
+
+ // Does not validate last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+ T defaultInstance, CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialFrom(defaultInstance, input, extensionRegistry));
+ }
+
+ // Validates last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseDelimitedFrom(
+ T defaultInstance, InputStream input)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialDelimitedFrom(defaultInstance, input,
+ ExtensionRegistryLite.getEmptyRegistry()));
+ }
+
+ // Validates last tag.
+ protected static <T extends GeneratedMessageLite<T, ?>> T parseDelimitedFrom(
+ T defaultInstance, InputStream input, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialDelimitedFrom(defaultInstance, input, extensionRegistry));
+ }
+
+ private static <T extends GeneratedMessageLite<T, ?>> T parsePartialDelimitedFrom(
+ T defaultInstance,
+ InputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ int size;
+ try {
+ int firstByte = input.read();
+ if (firstByte == -1) {
+ return null;
+ }
+ size = CodedInputStream.readRawVarint32(firstByte, input);
+ } catch (IOException e) {
+ throw new InvalidProtocolBufferException(e.getMessage());
+ }
+ InputStream limitedInput = new LimitedInputStream(input, size);
+ CodedInputStream codedInput = CodedInputStream.newInstance(limitedInput);
+ T message = parsePartialFrom(defaultInstance, codedInput, extensionRegistry);
+ try {
+ codedInput.checkLastTagWas(0);
+ } catch (InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(message);
+ }
+ return message;
+ }
}
diff --git a/java/core/src/main/java/com/google/protobuf/Internal.java b/java/core/src/main/java/com/google/protobuf/Internal.java
index e19b6dca..abf7ddd6 100644
--- a/java/core/src/main/java/com/google/protobuf/Internal.java
+++ b/java/core/src/main/java/com/google/protobuf/Internal.java
@@ -51,10 +51,12 @@ import java.util.Set;
*
* @author kenton@google.com (Kenton Varda)
*/
-public class Internal {
+public final class Internal {
- protected static final Charset UTF_8 = Charset.forName("UTF-8");
- protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
+ private Internal() {}
+
+ static final Charset UTF_8 = Charset.forName("UTF-8");
+ static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
/**
* Helper called by generated code to construct default values for string
@@ -406,6 +408,7 @@ public class Internal {
public static final CodedInputStream EMPTY_CODED_INPUT_STREAM =
CodedInputStream.newInstance(EMPTY_BYTE_ARRAY);
+
/**
* Provides an immutable view of {@code List<T>} around a {@code List<F>}.
*
diff --git a/java/core/src/main/java/com/google/protobuf/LazyField.java b/java/core/src/main/java/com/google/protobuf/LazyField.java
index 5e0a485c..3da8b900 100644
--- a/java/core/src/main/java/com/google/protobuf/LazyField.java
+++ b/java/core/src/main/java/com/google/protobuf/LazyField.java
@@ -39,14 +39,14 @@ import java.util.Map.Entry;
*
* Most of key methods are implemented in {@link LazyFieldLite} but this class
* can contain default instance of the message to provide {@code hashCode()},
- * {@code equals()} and {@code toString()}.
+ * {@code euqals()} and {@code toString()}.
*
* @author xiangl@google.com (Xiang Li)
*/
public class LazyField extends LazyFieldLite {
/**
- * Carry a message's default instance which is used by {@code hashCode()}, {@code equals()} and
+ * Carry a message's default instance which is used by {@code hashCode()}, {@code euqals()} and
* {@code toString()}.
*/
private final MessageLite defaultInstance;
diff --git a/java/core/src/main/java/com/google/protobuf/LazyFieldLite.java b/java/core/src/main/java/com/google/protobuf/LazyFieldLite.java
index eea1fe3c..016ec20d 100644
--- a/java/core/src/main/java/com/google/protobuf/LazyFieldLite.java
+++ b/java/core/src/main/java/com/google/protobuf/LazyFieldLite.java
@@ -30,14 +30,26 @@
package com.google.protobuf;
+import java.io.IOException;
+
/**
* LazyFieldLite encapsulates the logic of lazily parsing message fields. It stores
- * the message in a ByteString initially and then parse it on-demand.
+ * the message in a ByteString initially and then parses it on-demand.
+ *
+ * LazyFieldLite is thread-compatible: concurrent reads are safe once the proto that this
+ * LazyFieldLite is a part of is no longer being mutated by its Builder. However, explicit
+ * synchronization is needed under read/write situations.
*
- * LazyField is thread-compatible e.g. concurrent read are safe, however,
- * synchronizations are needed under read/write situations.
+ * When a LazyFieldLite is used in the context of a MessageLite object, its behavior is considered
+ * to be immutable and none of the setter methods in its API are expected to be invoked. All of the
+ * getters are expected to be thread-safe. When used in the context of a MessageLite.Builder,
+ * setters can be invoked, but there is no guarantee of thread safety.
+ *
+ * TODO(yatin,dweis): Consider splitting this class's functionality and put the mutable methods
+ * into a separate builder class to allow us to give stronger compile-time guarantees.
*
- * This class is internal implementation detail, so you don't need to use it directly.
+ * This class is internal implementation detail of the protobuf library, so you don't need to use it
+ * directly.
*
* @author xiangl@google.com (Xiang Li)
*/
@@ -46,8 +58,34 @@ public class LazyFieldLite {
ExtensionRegistryLite.getEmptyRegistry();
/**
- * A delayed-parsed version of the bytes. When this is non-null then {@code extensionRegistry } is
- * also non-null and {@code value} and {@code memoizedBytes} are null.
+ * The value associated with the LazyFieldLite object is stored in one or more of the following
+ * three fields (delayedBytes, value, memoizedBytes). They should together be interpreted as
+ * follows.
+ * 1) delayedBytes can be non-null, while value and memoizedBytes is null. The object will be in
+ * this state while the value for the object has not yet been parsed.
+ * 2) Both delayedBytes and value are non-null. The object transitions to this state as soon as
+ * some caller needs to access the value (by invoking getValue()).
+ * 3) memoizedBytes is merely an optimization for calls to LazyFieldLite.toByteString() to avoid
+ * recomputing the ByteString representation on each call. Instead, when the value is parsed
+ * from delayedBytes, we will also assign the contents of delayedBytes to memoizedBytes (since
+ * that is the ByteString representation of value).
+ * 4) Finally, if the LazyFieldLite was created directly with a parsed MessageLite value, then
+ * delayedBytes will be null, and memoizedBytes will be initialized only upon the first call to
+ * LazyFieldLite.toByteString().
+ *
+ * Given the above conditions, any caller that needs a serialized representation of this object
+ * must first check if the memoizedBytes or delayedBytes ByteString is non-null and use it
+ * directly; if both of those are null, it can look at the parsed value field. Similarly, any
+ * caller that needs a parsed value must first check if the value field is already non-null, if
+ * not it must parse the value from delayedBytes.
+ */
+
+ /**
+ * A delayed-parsed version of the contents of this field. When this field is non-null, then the
+ * "value" field is allowed to be null until the time that the value needs to be read.
+ *
+ * When delayedBytes is non-null then {@code extensionRegistry} is required to also be non-null.
+ * {@code value} and {@code memoizedBytes} will be initialized lazily.
*/
private ByteString delayedBytes;
@@ -60,12 +98,15 @@ public class LazyFieldLite {
private ExtensionRegistryLite extensionRegistry;
/**
- * The parsed value. When this is non-null then {@code delayedBytes} will be null.
+ * The parsed value. When this is null and a caller needs access to the MessageLite value, then
+ * {@code delayedBytes} will be parsed lazily at that time.
*/
protected volatile MessageLite value;
/**
- * The memoized bytes for {@code value}. Will be null when {@code value} is null.
+ * The memoized bytes for {@code value}. This is an optimization for the toByteString() method to
+ * not have to recompute its return-value on each invocation.
+ * TODO(yatin): Figure out whether this optimization is actually necessary.
*/
private volatile ByteString memoizedBytes;
@@ -230,6 +271,46 @@ public class LazyFieldLite {
return;
}
}
+
+ /**
+ * Merges another instance's contents from a stream.
+ *
+ * <p>LazyField is not thread-safe for write access. Synchronizations are needed
+ * under read/write situations.
+ */
+ public void mergeFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+ throws IOException {
+ if (this.containsDefaultInstance()) {
+ setByteString(input.readBytes(), extensionRegistry);
+ return;
+ }
+
+ // If the other field has an extension registry but this does not, copy over the other extension
+ // registry.
+ if (this.extensionRegistry == null) {
+ this.extensionRegistry = extensionRegistry;
+ }
+
+ // In the case that both of them are not parsed we simply concatenate the bytes to save time. In
+ // the (probably rare) case that they have different extension registries there is a chance that
+ // some of the extensions may be dropped, but the tradeoff of making this operation fast seems
+ // to outway the benefits of combining the extension registries, which is not normally done for
+ // lite protos anyways.
+ if (this.delayedBytes != null) {
+ setByteString(this.delayedBytes.concat(input.readBytes()), this.extensionRegistry);
+ return;
+ }
+
+ // We are parsed and both contain data. We won't drop any extensions here directly, but in the
+ // case that the extension registries are not the same then we might in the future if we
+ // need to serialize and parse a message again.
+ try {
+ setValue(value.toBuilder().mergeFrom(input, extensionRegistry).build());
+ } catch (InvalidProtocolBufferException e) {
+ // Nothing is logged and no exceptions are thrown. Clients will be unaware that a proto
+ // was invalid.
+ }
+ }
private static MessageLite mergeValueAndBytes(
MessageLite value, ByteString otherBytes, ExtensionRegistryLite extensionRegistry) {
@@ -259,10 +340,10 @@ public class LazyFieldLite {
* parsed. Be careful when using this method.
*/
public int getSerializedSize() {
- if (delayedBytes != null) {
- return delayedBytes.size();
- } else if (memoizedBytes != null) {
+ if (memoizedBytes != null) {
return memoizedBytes.size();
+ } else if (delayedBytes != null) {
+ return delayedBytes.size();
} else if (value != null) {
return value.getSerializedSize();
} else {
@@ -274,12 +355,12 @@ public class LazyFieldLite {
* Returns a BytesString for this field in a thread-safe way.
*/
public ByteString toByteString() {
- if (delayedBytes != null) {
- return delayedBytes;
- }
if (memoizedBytes != null) {
return memoizedBytes;
}
+ if (delayedBytes != null) {
+ return delayedBytes;
+ }
synchronized (this) {
if (memoizedBytes != null) {
return memoizedBytes;
@@ -311,18 +392,15 @@ public class LazyFieldLite {
.parseFrom(delayedBytes, extensionRegistry);
this.value = parsedValue;
this.memoizedBytes = delayedBytes;
- this.delayedBytes = null;
} else {
this.value = defaultInstance;
this.memoizedBytes = ByteString.EMPTY;
- this.delayedBytes = null;
}
} catch (InvalidProtocolBufferException e) {
// Nothing is logged and no exceptions are thrown. Clients will be unaware that this proto
// was invalid.
this.value = defaultInstance;
this.memoizedBytes = ByteString.EMPTY;
- this.delayedBytes = null;
}
}
}
diff --git a/java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java b/java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java
index c3be3cca..68c430cf 100644
--- a/java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java
+++ b/java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java
@@ -30,12 +30,12 @@
package com.google.protobuf;
-import java.util.Arrays;
-import java.util.List;
import java.util.AbstractList;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.List;
import java.util.RandomAccess;
/**
diff --git a/java/core/src/main/java/com/google/protobuf/MessageLiteToString.java b/java/core/src/main/java/com/google/protobuf/MessageLiteToString.java
index e69de29b..2a6e0e30 100644
--- a/java/core/src/main/java/com/google/protobuf/MessageLiteToString.java
+++ b/java/core/src/main/java/com/google/protobuf/MessageLiteToString.java
@@ -0,0 +1,200 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Helps generate {@link String} representations of {@link MessageLite} protos.
+ */
+final class MessageLiteToString {
+ /**
+ * Suffix for *_FIELD_NUMBER fields. This is used to reflectively detect proto fields that should
+ * be toString()ed.
+ */
+ private static final String FIELD_NUMBER_NAME_SUFFIX = "_FIELD_NUMBER";
+
+ /**
+ * Returns a {@link String} representation of the {@link MessageLite} object. The first line of
+ * the {@code String} representation representation includes a comment string to uniquely identify
+ * the objcet instance. This acts as an indicator that this should not be relied on for
+ * comparisons.
+ *
+ * <p>For use by generated code only.
+ */
+ static String toString(MessageLite messageLite, String commentString) {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append("# ").append(commentString);
+ reflectivePrintWithIndent(messageLite, buffer, 0);
+ return buffer.toString();
+ }
+
+ /**
+ * Reflectively prints the {@link MessageLite} to the buffer at given {@code indent} level.
+ *
+ * @param buffer the buffer to write to
+ * @param indent the number of spaces to indent the proto by
+ */
+ private static void reflectivePrintWithIndent(
+ MessageLite messageLite, StringBuilder buffer, int indent) {
+ // Build a map of method name to method. We're looking for methods like getFoo(), hasFoo(), and
+ // getFooList() which might be useful for building an object's string representation.
+ Map<String, Method> nameToNoArgMethod = new HashMap<String, Method>();
+ for (Method method : messageLite.getClass().getDeclaredMethods()) {
+ if (method.getParameterTypes().length == 0) {
+ nameToNoArgMethod.put(method.getName(), method);
+ }
+ }
+
+ for (Field field : messageLite.getClass().getDeclaredFields()) {
+ String fieldName = field.getName();
+ // Skip all fields that aren't in a format like "FOO_BAR_FIELD_NUMBER"
+ if (!fieldName.endsWith(FIELD_NUMBER_NAME_SUFFIX)) {
+ continue;
+ }
+
+ // For "FOO_BAR_FIELD_NUMBER" his would be "FOO_BAR"
+ String upperUnderscore =
+ fieldName.substring(0, fieldName.length() - FIELD_NUMBER_NAME_SUFFIX.length());
+
+ // For "FOO_BAR_FIELD_NUMBER" his would be "FooBar"
+ String upperCamelCaseName = upperUnderscoreToUpperCamel(upperUnderscore);
+
+ // Try to reflectively get the value and toString() the field as if it were optional. This
+ // only works if the method names have not be proguarded out or renamed.
+ Method getMethod = nameToNoArgMethod.get("get" + upperCamelCaseName);
+ Method hasMethod = nameToNoArgMethod.get("has" + upperCamelCaseName);
+ if (getMethod != null && hasMethod != null) {
+ if ((Boolean) GeneratedMessageLite.invokeOrDie(hasMethod, messageLite)) {
+ printField(
+ buffer,
+ indent,
+ upperUnderscore.toLowerCase(),
+ GeneratedMessageLite.invokeOrDie(getMethod, messageLite));
+ }
+ continue;
+ }
+
+ // Try to reflectively get the value and toString() the field as if it were repeated. This
+ // only works if the method names have not be proguarded out or renamed.
+ Method listMethod = nameToNoArgMethod.get("get" + upperCamelCaseName + "List");
+ if (listMethod != null) {
+ printField(
+ buffer,
+ indent,
+ upperUnderscore.toLowerCase(),
+ GeneratedMessageLite.invokeOrDie(listMethod, messageLite));
+ continue;
+ }
+ }
+
+ if (messageLite instanceof GeneratedMessageLite.ExtendableMessage) {
+ Iterator<Map.Entry<GeneratedMessageLite.ExtensionDescriptor, Object>> iter =
+ ((GeneratedMessageLite.ExtendableMessage<?, ?>) messageLite).extensions.iterator();
+ while (iter.hasNext()) {
+ Map.Entry<GeneratedMessageLite.ExtensionDescriptor, Object> entry = iter.next();
+ printField(buffer, indent, "[" + entry.getKey().getNumber() + "]", entry.getValue());
+ }
+ }
+
+ if (((GeneratedMessageLite) messageLite).unknownFields != null) {
+ ((GeneratedMessageLite) messageLite).unknownFields.printWithIndent(buffer, indent);
+ }
+ }
+
+ /**
+ * Formats a text proto field.
+ *
+ * <p>For use by generated code only.
+ *
+ * @param buffer the buffer to write to
+ * @param indent the number of spaces the proto should be indented by
+ * @param name the field name (in lower underscore case)
+ * @param object the object value of the field
+ */
+ static final void printField(StringBuilder buffer, int indent, String name, Object object) {
+ if (object instanceof List<?>) {
+ List<?> list = (List<?>) object;
+ for (Object entry : list) {
+ printField(buffer, indent, name, entry);
+ }
+ return;
+ }
+
+ buffer.append('\n');
+ for (int i = 0; i < indent; i++) {
+ buffer.append(' ');
+ }
+ buffer.append(name);
+
+ if (object instanceof String) {
+ buffer.append(": \"").append(TextFormatEscaper.escapeText((String) object)).append('"');
+ } else if (object instanceof ByteString) {
+ buffer.append(": \"").append(TextFormatEscaper.escapeBytes((ByteString) object)).append('"');
+ } else if (object instanceof GeneratedMessageLite) {
+ buffer.append(" {");
+ reflectivePrintWithIndent((GeneratedMessageLite) object, buffer, indent + 2);
+ buffer.append("\n");
+ for (int i = 0; i < indent; i++) {
+ buffer.append(' ');
+ }
+ buffer.append("}");
+ } else {
+ buffer.append(": ").append(object.toString());
+ }
+ }
+
+ /**
+ * A Guava-less implementation of:
+ * {@code CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, upperUnderscore)}
+ */
+ private static String upperUnderscoreToUpperCamel(String upperUnderscore) {
+ String upperCamelCaseName = "";
+ boolean nextCharacterShouldBeUpper = true;
+ for (int i = 0; i < upperUnderscore.length(); i++) {
+ char ch = upperUnderscore.charAt(i);
+ if (ch == '_') {
+ nextCharacterShouldBeUpper = true;
+ } else if (nextCharacterShouldBeUpper){
+ upperCamelCaseName += Character.toUpperCase(ch);
+ nextCharacterShouldBeUpper = false;
+ } else {
+ upperCamelCaseName += Character.toLowerCase(ch);
+ }
+ }
+ return upperCamelCaseName;
+ }
+}
diff --git a/java/core/src/main/java/com/google/protobuf/NioByteString.java b/java/core/src/main/java/com/google/protobuf/NioByteString.java
index f71e41b2..6163c7b1 100644
--- a/java/core/src/main/java/com/google/protobuf/NioByteString.java
+++ b/java/core/src/main/java/com/google/protobuf/NioByteString.java
@@ -30,15 +30,14 @@
package com.google.protobuf;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InvalidObjectException;
import java.io.ObjectInputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.nio.InvalidMarkException;
-import java.nio.channels.Channels;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.List;
@@ -54,7 +53,7 @@ final class NioByteString extends ByteString.LeafByteString {
throw new NullPointerException("buffer");
}
- this.buffer = buffer.slice();
+ this.buffer = buffer.slice().order(ByteOrder.nativeOrder());
}
// =================================================================
@@ -119,7 +118,7 @@ final class NioByteString extends ByteString.LeafByteString {
@Override
public void writeTo(OutputStream out) throws IOException {
- writeToInternal(out, buffer.position(), buffer.remaining());
+ out.write(toByteArray());
}
@Override
@@ -137,14 +136,12 @@ final class NioByteString extends ByteString.LeafByteString {
return;
}
- // Slow path
- if (out instanceof FileOutputStream || numberToWrite >= 8192) {
- // Use a channel to write out the ByteBuffer.
- Channels.newChannel(out).write(slice(sourceOffset, sourceOffset + numberToWrite));
- } else {
- // Just copy the data to an array and write it.
- out.write(toByteArray());
- }
+ ByteBufferWriter.write(slice(sourceOffset, sourceOffset + numberToWrite), out);
+ }
+
+ @Override
+ void writeTo(ByteOutput output) throws IOException {
+ output.writeLazy(buffer.slice());
}
@Override
@@ -159,46 +156,30 @@ final class NioByteString extends ByteString.LeafByteString {
@Override
protected String toStringInternal(Charset charset) {
- byte[] bytes;
- int offset;
+ final byte[] bytes;
+ final int offset;
+ final int length;
if (buffer.hasArray()) {
bytes = buffer.array();
offset = buffer.arrayOffset() + buffer.position();
+ length = buffer.remaining();
} else {
+ // TODO(nathanmittler): Can we optimize this?
bytes = toByteArray();
offset = 0;
+ length = bytes.length;
}
- return new String(bytes, offset, size(), charset);
+ return new String(bytes, offset, length, charset);
}
@Override
public boolean isValidUtf8() {
- // TODO(nathanmittler): add a ByteBuffer fork for Utf8.isValidUtf8 to avoid the copy
- byte[] bytes;
- int startIndex;
- if (buffer.hasArray()) {
- bytes = buffer.array();
- startIndex = buffer.arrayOffset() + buffer.position();
- } else {
- bytes = toByteArray();
- startIndex = 0;
- }
- return Utf8.isValidUtf8(bytes, startIndex, startIndex + size());
+ return Utf8.isValidUtf8(buffer);
}
@Override
protected int partialIsValidUtf8(int state, int offset, int length) {
- // TODO(nathanmittler): TODO add a ByteBuffer fork for Utf8.partialIsValidUtf8 to avoid the copy
- byte[] bytes;
- int startIndex;
- if (buffer.hasArray()) {
- bytes = buffer.array();
- startIndex = buffer.arrayOffset() + buffer.position();
- } else {
- bytes = toByteArray();
- startIndex = 0;
- }
- return Utf8.partialIsValidUtf8(state, bytes, startIndex, startIndex + size());
+ return Utf8.partialIsValidUtf8(state, buffer, offset, offset + length);
}
@Override
diff --git a/java/core/src/main/java/com/google/protobuf/Parser.java b/java/core/src/main/java/com/google/protobuf/Parser.java
index 3fa11c3b..6db69247 100644
--- a/java/core/src/main/java/com/google/protobuf/Parser.java
+++ b/java/core/src/main/java/com/google/protobuf/Parser.java
@@ -30,7 +30,6 @@
package com.google.protobuf;
-import java.io.IOException;
import java.io.InputStream;
/**
diff --git a/java/core/src/main/java/com/google/protobuf/RopeByteString.java b/java/core/src/main/java/com/google/protobuf/RopeByteString.java
index 8badfabd..3f3e9bd1 100644
--- a/java/core/src/main/java/com/google/protobuf/RopeByteString.java
+++ b/java/core/src/main/java/com/google/protobuf/RopeByteString.java
@@ -48,10 +48,11 @@ import java.util.Stack;
/**
* Class to represent {@code ByteStrings} formed by concatenation of other
* ByteStrings, without copying the data in the pieces. The concatenation is
- * represented as a tree whose leaf nodes are each a {@link LiteralByteString}.
+ * represented as a tree whose leaf nodes are each a
+ * {@link com.google.protobuf.ByteString.LeafByteString}.
*
* <p>Most of the operation here is inspired by the now-famous paper <a
- * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
+ * href="https://web.archive.org/web/20060202015456/http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
* BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and
* michael plass
*
@@ -139,8 +140,9 @@ final class RopeByteString extends ByteString {
/**
* Concatenate the given strings while performing various optimizations to
* slow the growth rate of tree depth and tree node count. The result is
- * either a {@link LiteralByteString} or a {@link RopeByteString}
- * depending on which optimizations, if any, were applied.
+ * either a {@link com.google.protobuf.ByteString.LeafByteString} or a
+ * {@link RopeByteString} depending on which optimizations, if any, were
+ * applied.
*
* <p>Small pieces of length less than {@link
* ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in
@@ -294,8 +296,7 @@ final class RopeByteString extends ByteString {
*
* <p>Substrings of {@code length < 2} should result in at most a single
* recursive call chain, terminating at a leaf node. Thus the result will be a
- * {@link LiteralByteString}. {@link #RopeByteString(ByteString,
- * ByteString)}.
+ * {@link com.google.protobuf.ByteString.LeafByteString}.
*
* @param beginIndex start at this index
* @param endIndex the last character is the one before this index
@@ -368,7 +369,7 @@ final class RopeByteString extends ByteString {
@Override
public List<ByteBuffer> asReadOnlyByteBufferList() {
- // Walk through the list of LiteralByteString's that make up this
+ // Walk through the list of LeafByteString's that make up this
// rope, and add each one as a read-only ByteBuffer.
List<ByteBuffer> result = new ArrayList<ByteBuffer>();
PieceIterator pieces = new PieceIterator(this);
@@ -400,6 +401,12 @@ final class RopeByteString extends ByteString {
}
@Override
+ void writeTo(ByteOutput output) throws IOException {
+ left.writeTo(output);
+ right.writeTo(output);
+ }
+
+ @Override
protected String toStringInternal(Charset charset) {
return new String(toByteArray(), charset);
}
@@ -709,9 +716,10 @@ final class RopeByteString extends ByteString {
}
/**
- * Returns the next item and advances one {@code LiteralByteString}.
+ * Returns the next item and advances one
+ * {@link com.google.protobuf.ByteString.LeafByteString}.
*
- * @return next non-empty LiteralByteString or {@code null}
+ * @return next non-empty LeafByteString or {@code null}
*/
@Override
public LeafByteString next() {
diff --git a/java/core/src/main/java/com/google/protobuf/SmallSortedMap.java b/java/core/src/main/java/com/google/protobuf/SmallSortedMap.java
index 0674d2e2..dff19328 100644
--- a/java/core/src/main/java/com/google/protobuf/SmallSortedMap.java
+++ b/java/core/src/main/java/com/google/protobuf/SmallSortedMap.java
@@ -35,12 +35,12 @@ import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
-import java.util.TreeMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.SortedMap;
+import java.util.TreeMap;
/**
* A custom map implementation from FieldDescriptor to Object optimized to
diff --git a/java/core/src/main/java/com/google/protobuf/TextFormat.java b/java/core/src/main/java/com/google/protobuf/TextFormat.java
index c99b5285..edf114fa 100644
--- a/java/core/src/main/java/com/google/protobuf/TextFormat.java
+++ b/java/core/src/main/java/com/google/protobuf/TextFormat.java
@@ -425,7 +425,7 @@ public final class TextFormat {
case STRING:
generator.print("\"");
generator.print(escapeNonAscii
- ? escapeText((String) value)
+ ? TextFormatEscaper.escapeText((String) value)
: escapeDoubleQuotesAndBackslashes((String) value)
.replace("\n", "\\n"));
generator.print("\"");
@@ -661,6 +661,14 @@ public final class TextFormat {
nextToken();
}
+ int getLine() {
+ return line;
+ }
+
+ int getColumn() {
+ return column;
+ }
+
/** Are we at the end of the input? */
public boolean atEnd() {
return currentToken.length() == 0;
@@ -1074,7 +1082,7 @@ public final class TextFormat {
private ParseException floatParseException(final NumberFormatException e) {
return parseException("Couldn't parse number: " + e.getMessage());
}
-
+
/**
* Returns a {@link UnknownFieldParseException} with the line and column
* numbers of the previous token in the description, and the unknown field
@@ -1133,7 +1141,7 @@ public final class TextFormat {
return column;
}
}
-
+
/**
* Thrown when encountering an unknown field while parsing
* a text format message.
@@ -1257,11 +1265,14 @@ public final class TextFormat {
private final boolean allowUnknownFields;
private final SingularOverwritePolicy singularOverwritePolicy;
+ private TextFormatParseInfoTree.Builder parseInfoTreeBuilder;
- private Parser(boolean allowUnknownFields,
- SingularOverwritePolicy singularOverwritePolicy) {
+ private Parser(
+ boolean allowUnknownFields, SingularOverwritePolicy singularOverwritePolicy,
+ TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
this.allowUnknownFields = allowUnknownFields;
this.singularOverwritePolicy = singularOverwritePolicy;
+ this.parseInfoTreeBuilder = parseInfoTreeBuilder;
}
/**
@@ -1278,6 +1289,7 @@ public final class TextFormat {
private boolean allowUnknownFields = false;
private SingularOverwritePolicy singularOverwritePolicy =
SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
+ private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null;
/**
@@ -1288,8 +1300,15 @@ public final class TextFormat {
return this;
}
+ public Builder setParseInfoTreeBuilder(
+ TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
+ this.parseInfoTreeBuilder = parseInfoTreeBuilder;
+ return this;
+ }
+
public Parser build() {
- return new Parser(allowUnknownFields, singularOverwritePolicy);
+ return new Parser(
+ allowUnknownFields, singularOverwritePolicy, parseInfoTreeBuilder);
}
}
@@ -1380,7 +1399,21 @@ public final class TextFormat {
final ExtensionRegistry extensionRegistry,
final MessageReflection.MergeTarget target)
throws ParseException {
+ mergeField(tokenizer, extensionRegistry, target, parseInfoTreeBuilder);
+ }
+
+ /**
+ * Parse a single field from {@code tokenizer} and merge it into
+ * {@code builder}.
+ */
+ private void mergeField(final Tokenizer tokenizer,
+ final ExtensionRegistry extensionRegistry,
+ final MessageReflection.MergeTarget target,
+ TextFormatParseInfoTree.Builder parseTreeBuilder)
+ throws ParseException {
FieldDescriptor field = null;
+ int startLine = tokenizer.getLine();
+ int startColumn = tokenizer.getColumn();
final Descriptor type = target.getDescriptorForType();
ExtensionRegistry.ExtensionInfo extension = null;
@@ -1472,14 +1505,51 @@ public final class TextFormat {
// Handle potential ':'.
if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
tokenizer.tryConsume(":"); // optional
+ if (parseTreeBuilder != null) {
+ TextFormatParseInfoTree.Builder childParseTreeBuilder =
+ parseTreeBuilder.getBuilderForSubMessageField(field);
+ consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
+ childParseTreeBuilder);
+ } else {
+ consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
+ parseTreeBuilder);
+ }
} else {
tokenizer.consume(":"); // required
+ consumeFieldValues(
+ tokenizer, extensionRegistry, target, field, extension, parseTreeBuilder);
}
+
+ if (parseTreeBuilder != null) {
+ parseTreeBuilder.setLocation(
+ field, TextFormatParseLocation.create(startLine, startColumn));
+ }
+
+ // For historical reasons, fields may optionally be separated by commas or
+ // semicolons.
+ if (!tokenizer.tryConsume(";")) {
+ tokenizer.tryConsume(",");
+ }
+ }
+
+ /**
+ * Parse a one or more field values from {@code tokenizer} and merge it into
+ * {@code builder}.
+ */
+ private void consumeFieldValues(
+ final Tokenizer tokenizer,
+ final ExtensionRegistry extensionRegistry,
+ final MessageReflection.MergeTarget target,
+ final FieldDescriptor field,
+ final ExtensionRegistry.ExtensionInfo extension,
+ final TextFormatParseInfoTree.Builder parseTreeBuilder)
+ throws ParseException {
// Support specifying repeated field values as a comma-separated list.
// Ex."foo: [1, 2, 3]"
if (field.isRepeated() && tokenizer.tryConsume("[")) {
while (true) {
- consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
+ consumeFieldValue(tokenizer, extensionRegistry, target, field, extension,
+ parseTreeBuilder);
if (tokenizer.tryConsume("]")) {
// End of list.
break;
@@ -1487,13 +1557,8 @@ public final class TextFormat {
tokenizer.consume(",");
}
} else {
- consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
- }
-
- // For historical reasons, fields may optionally be separated by commas or
- // semicolons.
- if (!tokenizer.tryConsume(";")) {
- tokenizer.tryConsume(",");
+ consumeFieldValue(
+ tokenizer, extensionRegistry, target, field, extension, parseTreeBuilder);
}
}
@@ -1506,7 +1571,8 @@ public final class TextFormat {
final ExtensionRegistry extensionRegistry,
final MessageReflection.MergeTarget target,
final FieldDescriptor field,
- final ExtensionRegistry.ExtensionInfo extension)
+ final ExtensionRegistry.ExtensionInfo extension,
+ final TextFormatParseInfoTree.Builder parseTreeBuilder)
throws ParseException {
Object value = null;
@@ -1528,7 +1594,7 @@ public final class TextFormat {
throw tokenizer.parseException(
"Expected \"" + endToken + "\".");
}
- mergeField(tokenizer, extensionRegistry, subField);
+ mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder);
}
value = subField.finish();
@@ -1704,52 +1770,6 @@ public final class TextFormat {
// Some of these methods are package-private because Descriptors.java uses
// them.
- private interface ByteSequence {
- int size();
- byte byteAt(int offset);
- }
-
- /**
- * Escapes bytes in the format used in protocol buffer text format, which
- * is the same as the format used for C string literals. All bytes
- * that are not printable 7-bit ASCII characters are escaped, as well as
- * backslash, single-quote, and double-quote characters. Characters for
- * which no defined short-hand escape sequence is defined will be escaped
- * using 3-digit octal sequences.
- */
- public static String escapeBytes(final ByteSequence input) {
- final StringBuilder builder = new StringBuilder(input.size());
- for (int i = 0; i < input.size(); i++) {
- final byte b = input.byteAt(i);
- switch (b) {
- // Java does not recognize \a or \v, apparently.
- case 0x07: builder.append("\\a"); break;
- case '\b': builder.append("\\b"); break;
- case '\f': builder.append("\\f"); break;
- case '\n': builder.append("\\n"); break;
- case '\r': builder.append("\\r"); break;
- case '\t': builder.append("\\t"); break;
- case 0x0b: builder.append("\\v"); break;
- case '\\': builder.append("\\\\"); break;
- case '\'': builder.append("\\\'"); break;
- case '"' : builder.append("\\\""); break;
- default:
- // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are
- // printable. Other byte values must be escaped.
- if (b >= 0x20 && b <= 0x7e) {
- builder.append((char) b);
- } else {
- builder.append('\\');
- builder.append((char) ('0' + ((b >>> 6) & 3)));
- builder.append((char) ('0' + ((b >>> 3) & 7)));
- builder.append((char) ('0' + (b & 7)));
- }
- break;
- }
- }
- return builder.toString();
- }
-
/**
* Escapes bytes in the format used in protocol buffer text format, which
* is the same as the format used for C string literals. All bytes
@@ -1758,33 +1778,15 @@ public final class TextFormat {
* which no defined short-hand escape sequence is defined will be escaped
* using 3-digit octal sequences.
*/
- public static String escapeBytes(final ByteString input) {
- return escapeBytes(new ByteSequence() {
- @Override
- public int size() {
- return input.size();
- }
- @Override
- public byte byteAt(int offset) {
- return input.byteAt(offset);
- }
- });
+ public static String escapeBytes(ByteString input) {
+ return TextFormatEscaper.escapeBytes(input);
}
/**
* Like {@link #escapeBytes(ByteString)}, but used for byte array.
*/
- public static String escapeBytes(final byte[] input) {
- return escapeBytes(new ByteSequence() {
- @Override
- public int size() {
- return input.length;
- }
- @Override
- public byte byteAt(int offset) {
- return input[offset];
- }
- });
+ public static String escapeBytes(byte[] input) {
+ return TextFormatEscaper.escapeBytes(input);
}
/**
@@ -1868,7 +1870,9 @@ public final class TextFormat {
}
}
- return ByteString.copyFrom(result, 0, pos);
+ return result.length == pos
+ ? ByteString.wrap(result) // This reference has not been out of our control.
+ : ByteString.copyFrom(result, 0, pos);
}
/**
@@ -1896,7 +1900,7 @@ public final class TextFormat {
* Escape double quotes and backslashes in a String for unicode output of a message.
*/
public static String escapeDoubleQuotesAndBackslashes(final String input) {
- return input.replace("\\", "\\\\").replace("\"", "\\\"");
+ return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input);
}
/**
diff --git a/java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java b/java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java
index e69de29b..da9ceadd 100644
--- a/java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java
+++ b/java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java
@@ -0,0 +1,137 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+/**
+ * Provide text format escaping support for proto2 instances.
+ */
+final class TextFormatEscaper {
+ private TextFormatEscaper() {}
+
+ private interface ByteSequence {
+ int size();
+ byte byteAt(int offset);
+ }
+
+ /**
+ * Escapes bytes in the format used in protocol buffer text format, which
+ * is the same as the format used for C string literals. All bytes
+ * that are not printable 7-bit ASCII characters are escaped, as well as
+ * backslash, single-quote, and double-quote characters. Characters for
+ * which no defined short-hand escape sequence is defined will be escaped
+ * using 3-digit octal sequences.
+ */
+ static String escapeBytes(final ByteSequence input) {
+ final StringBuilder builder = new StringBuilder(input.size());
+ for (int i = 0; i < input.size(); i++) {
+ final byte b = input.byteAt(i);
+ switch (b) {
+ // Java does not recognize \a or \v, apparently.
+ case 0x07: builder.append("\\a"); break;
+ case '\b': builder.append("\\b"); break;
+ case '\f': builder.append("\\f"); break;
+ case '\n': builder.append("\\n"); break;
+ case '\r': builder.append("\\r"); break;
+ case '\t': builder.append("\\t"); break;
+ case 0x0b: builder.append("\\v"); break;
+ case '\\': builder.append("\\\\"); break;
+ case '\'': builder.append("\\\'"); break;
+ case '"' : builder.append("\\\""); break;
+ default:
+ // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are
+ // printable. Other byte values must be escaped.
+ if (b >= 0x20 && b <= 0x7e) {
+ builder.append((char) b);
+ } else {
+ builder.append('\\');
+ builder.append((char) ('0' + ((b >>> 6) & 3)));
+ builder.append((char) ('0' + ((b >>> 3) & 7)));
+ builder.append((char) ('0' + (b & 7)));
+ }
+ break;
+ }
+ }
+ return builder.toString();
+ }
+
+ /**
+ * Escapes bytes in the format used in protocol buffer text format, which
+ * is the same as the format used for C string literals. All bytes
+ * that are not printable 7-bit ASCII characters are escaped, as well as
+ * backslash, single-quote, and double-quote characters. Characters for
+ * which no defined short-hand escape sequence is defined will be escaped
+ * using 3-digit octal sequences.
+ */
+ static String escapeBytes(final ByteString input) {
+ return escapeBytes(new ByteSequence() {
+ @Override
+ public int size() {
+ return input.size();
+ }
+ @Override
+ public byte byteAt(int offset) {
+ return input.byteAt(offset);
+ }
+ });
+ }
+
+ /**
+ * Like {@link #escapeBytes(ByteString)}, but used for byte array.
+ */
+ static String escapeBytes(final byte[] input) {
+ return escapeBytes(new ByteSequence() {
+ @Override
+ public int size() {
+ return input.length;
+ }
+ @Override
+ public byte byteAt(int offset) {
+ return input[offset];
+ }
+ });
+ }
+
+ /**
+ * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
+ * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
+ * individually as a 3-digit octal escape. Yes, it's weird.
+ */
+ static String escapeText(final String input) {
+ return escapeBytes(ByteString.copyFromUtf8(input));
+ }
+
+ /**
+ * Escape double quotes and backslashes in a String for unicode output of a message.
+ */
+ static String escapeDoubleQuotesAndBackslashes(final String input) {
+ return input.replace("\\", "\\\\").replace("\"", "\\\"");
+ }
+}
diff --git a/java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java b/java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java
new file mode 100644
index 00000000..2ecf912e
--- /dev/null
+++ b/java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java
@@ -0,0 +1,225 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.Descriptors.FieldDescriptor;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+
+/**
+ * Data structure which is populated with the locations of each field value parsed from the text.
+ *
+ * <p>The locations of primary fields values are retrieved by {@code getLocation} or
+ * {@code getLocations}. The locations of sub message values are within nested
+ * {@code TextFormatParseInfoTree}s and are retrieve by {@getNestedTree} or {code @getNestedTrees}.
+ *
+ * <p>The {@code TextFormatParseInfoTree} is created by a Builder.
+ */
+public class TextFormatParseInfoTree {
+
+ // Defines a mapping between each field's descriptor to the list of locations where
+ // its value(s) were was encountered.
+ private Map<FieldDescriptor, List<TextFormatParseLocation>> locationsFromField;
+
+ // Defines a mapping between a field's descriptor to a list of TextFormatParseInfoTrees for
+ // sub message location information.
+ Map<FieldDescriptor, List<TextFormatParseInfoTree>> subtreesFromField;
+
+ /**
+ * Construct a {@code TextFormatParseInfoTree}.
+ *
+ * @param locationsFromField a map of fields to location in the source code
+ * @param subtreeBuildersFromField a map of fields to parse tree location information builders
+ */
+ private TextFormatParseInfoTree(
+ Map<FieldDescriptor, List<TextFormatParseLocation>> locationsFromField,
+ Map<FieldDescriptor, List<TextFormatParseInfoTree.Builder>> subtreeBuildersFromField) {
+
+ // The maps are unmodifiable. The values in the maps are unmodifiable.
+ Map<FieldDescriptor, List<TextFormatParseLocation>> locs =
+ new HashMap<FieldDescriptor, List<TextFormatParseLocation>>();
+ for (Entry<FieldDescriptor, List<TextFormatParseLocation>> kv : locationsFromField.entrySet()) {
+ locs.put(kv.getKey(), Collections.unmodifiableList(kv.getValue()));
+ }
+ this.locationsFromField = Collections.unmodifiableMap(locs);
+
+ Map<FieldDescriptor, List<TextFormatParseInfoTree>> subs =
+ new HashMap<FieldDescriptor, List<TextFormatParseInfoTree>>();
+ for (Entry<FieldDescriptor, List<Builder>> kv : subtreeBuildersFromField.entrySet()) {
+ List<TextFormatParseInfoTree> submessagesOfField = new ArrayList<TextFormatParseInfoTree>();
+ for (Builder subBuilder : kv.getValue()) {
+ submessagesOfField.add(subBuilder.build());
+ }
+ subs.put(kv.getKey(), Collections.unmodifiableList(submessagesOfField));
+ }
+ this.subtreesFromField = Collections.unmodifiableMap(subs);
+ }
+
+ /**
+ * Retrieve all the locations of a field.
+ *
+ * @param fieldDescriptor the the @{link FieldDescriptor} of the desired field
+ * @return a list of the locations of values of the field. If there are not values
+ * or the field doesn't exist, an empty list is returned.
+ */
+ public List<TextFormatParseLocation> getLocations(final FieldDescriptor fieldDescriptor) {
+ List<TextFormatParseLocation> result = locationsFromField.get(fieldDescriptor);
+ return (result == null) ? Collections.<TextFormatParseLocation>emptyList() : result;
+ }
+
+ /**
+ * Get the location in the source of a field's value.
+ *
+ * <p>Returns the {@link TextFormatParseLocation} for index-th value of the field in the parsed
+ * text.
+ *
+ * @param fieldDescriptor the @{link FieldDescriptor} of the desired field
+ * @param index the index of the value.
+ * @return the {@link TextFormatParseLocation} of the value
+ * @throws IllegalArgumentException index is out of range
+ */
+ public TextFormatParseLocation getLocation(final FieldDescriptor fieldDescriptor, int index) {
+ return getFromList(getLocations(fieldDescriptor), index, fieldDescriptor);
+ }
+
+ /**
+ * Retrieve a list of all the location information trees for a sub message field.
+ *
+ * @param fieldDescriptor the @{link FieldDescriptor} of the desired field
+ * @return A list of {@link TextFormatParseInfoTree}
+ */
+ public List<TextFormatParseInfoTree> getNestedTrees(final FieldDescriptor fieldDescriptor) {
+ List<TextFormatParseInfoTree> result = subtreesFromField.get(fieldDescriptor);
+ return result == null ? Collections.<TextFormatParseInfoTree>emptyList() : result;
+ }
+
+ /**
+ * Returns the parse info tree for the given field, which must be a message type.
+ *
+ * @param fieldDescriptor the @{link FieldDescriptor} of the desired sub message
+ * @param index the index of message value.
+ * @return the {@code ParseInfoTree} of the message value. {@code null} is returned if the field
+ * doesn't exist or the index is out of range.
+ * @throws IllegalArgumentException if index is out of range
+ */
+ public TextFormatParseInfoTree getNestedTree(final FieldDescriptor fieldDescriptor, int index) {
+ return getFromList(getNestedTrees(fieldDescriptor), index, fieldDescriptor);
+ }
+
+ /**
+ * Create a builder for a {@code ParseInfoTree}.
+ *
+ * @return the builder
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ private static <T> T getFromList(List<T> list, int index, FieldDescriptor fieldDescriptor) {
+ if (index >= list.size() || index < 0) {
+ throw new IllegalArgumentException(String.format("Illegal index field: %s, index %d",
+ fieldDescriptor == null ? "<null>" : fieldDescriptor.getName(), index));
+ }
+ return list.get(index);
+ }
+
+ /**
+ * Builder for a {@link TextFormatParseInfoTree}.
+ */
+ public static class Builder {
+
+ private Map<FieldDescriptor, List<TextFormatParseLocation>> locationsFromField;
+
+ // Defines a mapping between a field's descriptor to a list of ParseInfoTrees builders for
+ // sub message location information.
+ private Map<FieldDescriptor, List<Builder>> subtreeBuildersFromField;
+
+ /**
+ * Create a root level {@ParseInfoTree} builder.
+ */
+ private Builder() {
+ locationsFromField = new HashMap<FieldDescriptor, List<TextFormatParseLocation>>();
+ subtreeBuildersFromField = new HashMap<FieldDescriptor, List<Builder>>();
+ }
+
+ /**
+ * Record the starting location of a single value for a field.
+ *
+ * @param fieldDescriptor the field
+ * @param location source code location information
+ */
+ public Builder setLocation(
+ final FieldDescriptor fieldDescriptor, TextFormatParseLocation location) {
+ List<TextFormatParseLocation> fieldLocations = locationsFromField.get(fieldDescriptor);
+ if (fieldLocations == null) {
+ fieldLocations = new ArrayList<TextFormatParseLocation>();
+ locationsFromField.put(fieldDescriptor, fieldLocations);
+ }
+ fieldLocations.add(location);
+ return this;
+ }
+
+ /**
+ * Set for a sub message.
+ *
+ * <p>A new builder is created for a sub message. The builder that is returned is a new builder.
+ * The return is <emph>not</emph> the invoked {@code builder.getBuilderForSubMessageField}.
+ *
+ * @param fieldDescriptor the field whose value is the submessage
+ * @return a new Builder for the sub message
+ */
+ public Builder getBuilderForSubMessageField(final FieldDescriptor fieldDescriptor) {
+ List<Builder> submessageBuilders = subtreeBuildersFromField.get(fieldDescriptor);
+ if (submessageBuilders == null) {
+ submessageBuilders = new ArrayList<Builder>();
+ subtreeBuildersFromField.put(fieldDescriptor, submessageBuilders);
+ }
+ Builder subtreeBuilder = new Builder();
+ submessageBuilders.add(subtreeBuilder);
+ return subtreeBuilder;
+ }
+
+ /**
+ * Build the {@code TextFormatParseInfoTree}.
+ *
+ * @return the {@code TextFormatParseInfoTree}
+ */
+ public TextFormatParseInfoTree build() {
+ return new TextFormatParseInfoTree(locationsFromField, subtreeBuildersFromField);
+ }
+ }
+}
diff --git a/java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java b/java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java
new file mode 100644
index 00000000..cce286e1
--- /dev/null
+++ b/java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java
@@ -0,0 +1,104 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.Arrays;
+
+/**
+ * A location in the source code.
+ *
+ * <p>A location is the starting line number and starting column number.
+ */
+public final class TextFormatParseLocation {
+
+ /**
+ * The empty location.
+ */
+ public static final TextFormatParseLocation EMPTY = new TextFormatParseLocation(-1, -1);
+
+ /**
+ * Create a location.
+ *
+ * @param line the starting line number
+ * @param column the starting column number
+ * @return a {@code ParseLocation}
+ */
+ static TextFormatParseLocation create(int line, int column) {
+ if (line == -1 && column == -1) {
+ return EMPTY;
+ }
+ if (line < 0 || column < 0) {
+ throw new IllegalArgumentException(
+ String.format("line and column values must be >= 0: line %d, column: %d", line, column));
+ }
+ return new TextFormatParseLocation(line, column);
+ }
+
+ private final int line;
+ private final int column;
+
+ private TextFormatParseLocation(int line, int column) {
+ this.line = line;
+ this.column = column;
+ }
+
+ public int getLine() {
+ return line;
+ }
+
+ public int getColumn() {
+ return column;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("ParseLocation{line=%d, column=%d}", line, column);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) {
+ return true;
+ }
+ if (!(o instanceof TextFormatParseLocation)) {
+ return false;
+ }
+ TextFormatParseLocation that = (TextFormatParseLocation) o;
+ return (this.line == that.getLine())
+ && (this.column == that.getColumn());
+ }
+
+ @Override
+ public int hashCode() {
+ int[] values = {line, column};
+ return Arrays.hashCode(values);
+ }
+}
diff --git a/java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java b/java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java
index 435ad4d4..9500f905 100644
--- a/java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java
+++ b/java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java
@@ -61,15 +61,6 @@ public final class UnknownFieldSetLite {
public static UnknownFieldSetLite getDefaultInstance() {
return DEFAULT_INSTANCE;
}
-
- /**
- * Returns an empty {@code UnknownFieldSetLite.Builder}.
- *
- * <p>For use by generated code only.
- */
- public static Builder newBuilder() {
- return new Builder();
- }
/**
* Returns a new mutable instance.
@@ -262,6 +253,21 @@ public final class UnknownFieldSetLite {
return hashCode;
}
+ /**
+ * Prints a String representation of the unknown field set.
+ *
+ * <p>For use by generated code only.
+ *
+ * @param buffer the buffer to write to
+ * @param indent the number of spaces the fields should be indented by
+ */
+ final void printWithIndent(StringBuilder buffer, int indent) {
+ for (int i = 0; i < count; i++) {
+ int fieldNumber = WireFormat.getTagFieldNumber(tags[i]);
+ MessageLiteToString.printField(buffer, indent, String.valueOf(fieldNumber), objects[i]);
+ }
+ }
+
private void storeField(int tag, Object value) {
ensureCapacity();
@@ -369,90 +375,4 @@ public final class UnknownFieldSetLite {
}
return this;
}
-
- /**
- * Builder for {@link UnknownFieldSetLite}s.
- *
- * <p>Use {@link UnknownFieldSet#newBuilder()} to construct a {@code Builder}.
- *
- * <p>For use by generated code only.
- */
- // TODO(dweis): Update the mutable API to no longer need this builder and delete.
- public static final class Builder {
-
- private UnknownFieldSetLite set;
-
- private Builder() {
- this.set = null;
- }
-
- /**
- * Ensures internal state is initialized for use.
- */
- private void ensureNotBuilt() {
- if (set == null) {
- set = new UnknownFieldSetLite();
- }
-
- set.checkMutable();
- }
-
- /**
- * Parse a single field from {@code input} and merge it into this set.
- *
- * <p>For use by generated code only.
- *
- * @param tag The field's tag number, which was already parsed.
- * @return {@code false} if the tag is an end group tag.
- */
- boolean mergeFieldFrom(final int tag, final CodedInputStream input) throws IOException {
- ensureNotBuilt();
- return set.mergeFieldFrom(tag, input);
- }
-
- /**
- * Convenience method for merging a new field containing a single varint
- * value. This is used in particular when an unknown enum value is
- * encountered.
- *
- * <p>For use by generated code only.
- */
- Builder mergeVarintField(int fieldNumber, int value) {
- ensureNotBuilt();
- set.mergeVarintField(fieldNumber, value);
- return this;
- }
-
- /**
- * Convenience method for merging a length-delimited field.
- *
- * <p>For use by generated code only.
- */
- public Builder mergeLengthDelimitedField(final int fieldNumber, final ByteString value) {
- ensureNotBuilt();
- set.mergeLengthDelimitedField(fieldNumber, value);
- return this;
- }
-
- /**
- * Build the {@link UnknownFieldSetLite} and return it.
- *
- * <p>Once {@code build()} has been called, the {@code Builder} will no
- * longer be usable. Calling any method after {@code build()} will result
- * in undefined behavior and can cause an
- * {@code UnsupportedOperationException} to be thrown.
- *
- * <p>For use by generated code only.
- */
- public UnknownFieldSetLite build() {
- if (set == null) {
- return DEFAULT_INSTANCE;
- }
-
- set.checkMutable();
- set.makeImmutable();
-
- return set;
- }
- }
}
diff --git a/java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java b/java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java
index f443ee39..0fbf4d40 100644
--- a/java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java
+++ b/java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java
@@ -30,6 +30,7 @@
package com.google.protobuf;
+import java.io.IOException;
import java.nio.ByteBuffer;
/**
@@ -49,8 +50,8 @@ public final class UnsafeByteOperations {
/**
* An unsafe operation that returns a {@link ByteString} that is backed by the provided buffer.
*
- * @param buffer the Java NIO buffer to be wrapped.
- * @return a {@link ByteString} backed by the provided buffer.
+ * @param buffer the Java NIO buffer to be wrapped
+ * @return a {@link ByteString} backed by the provided buffer
*/
public static ByteString unsafeWrap(ByteBuffer buffer) {
if (buffer.hasArray()) {
@@ -60,4 +61,24 @@ public final class UnsafeByteOperations {
return new NioByteString(buffer);
}
}
+
+ /**
+ * Writes the given {@link ByteString} to the provided {@link ByteOutput}. Calling this method may
+ * result in multiple operations on the target {@link ByteOutput}
+ * (i.e. for roped {@link ByteString}s).
+ *
+ * <p>This method exposes the internal backing buffer(s) of the {@link ByteString} to the {@link
+ * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious
+ * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution!
+ *
+ * <p> NOTE: The {@link ByteOutput} <strong>MUST NOT</strong> modify the provided buffers. Doing
+ * so may result in corrupted data, which would be difficult to debug.
+ *
+ * @param bytes the {@link ByteString} to be written
+ * @param output the output to receive the bytes
+ * @throws IOException if an I/O error occurs
+ */
+ public static void unsafeWriteTo(ByteString bytes, ByteOutput output) throws IOException {
+ bytes.writeTo(output);
+ }
}
diff --git a/java/core/src/main/java/com/google/protobuf/Utf8.java b/java/core/src/main/java/com/google/protobuf/Utf8.java
index 48c7e9e6..308c69e9 100644
--- a/java/core/src/main/java/com/google/protobuf/Utf8.java
+++ b/java/core/src/main/java/com/google/protobuf/Utf8.java
@@ -30,6 +30,19 @@
package com.google.protobuf;
+import static java.lang.Character.MAX_SURROGATE;
+import static java.lang.Character.MIN_SURROGATE;
+import static java.lang.Character.isSurrogatePair;
+import static java.lang.Character.toCodePoint;
+
+import java.lang.reflect.Field;
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+import java.security.AccessController;
+import java.security.PrivilegedExceptionAction;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
/**
* A set of low-level, high-performance static utility methods related
* to the UTF-8 character encoding. This class has no dependencies
@@ -64,9 +77,24 @@ package com.google.protobuf;
*
* @author martinrb@google.com (Martin Buchholz)
*/
+// TODO(nathanmittler): Copy changes in this class back to Guava
final class Utf8 {
- private Utf8() {}
-
+ private static final Logger logger = Logger.getLogger(Utf8.class.getName());
+
+ /**
+ * UTF-8 is a runtime hot spot so we attempt to provide heavily optimized implementations
+ * depending on what is available on the platform. The processor is the platform-optimized
+ * delegate for which all methods are delegated directly to.
+ */
+ private static final Processor processor =
+ UnsafeProcessor.isAvailable() ? new UnsafeProcessor() : new SafeProcessor();
+
+ /**
+ * A mask used when performing unsafe reads to determine if a long value contains any non-ASCII
+ * characters (i.e. any byte >= 0x80).
+ */
+ private static final long ASCII_MASK_LONG = 0x8080808080808080L;
+
/**
* Maximum number of bytes per Java UTF-16 char in UTF-8.
* @see java.nio.charset.CharsetEncoder#maxBytesPerChar()
@@ -85,6 +113,18 @@ final class Utf8 {
*/
public static final int MALFORMED = -1;
+ /**
+ * Used by {@code Unsafe} UTF-8 string validation logic to determine the minimum string length
+ * above which to employ an optimized algorithm for counting ASCII characters. The reason for this
+ * threshold is that for small strings, the optimization may not be beneficial or may even
+ * negatively impact performance since it requires additional logic to avoid unaligned reads
+ * (when calling {@code Unsafe.getLong}). This threshold guarantees that even if the initial
+ * offset is unaligned, we're guaranteed to make at least one call to {@code Unsafe.getLong()}
+ * which provides a performance improvement that entirely subsumes the cost of the additional
+ * logic.
+ */
+ private static final int UNSAFE_COUNT_ASCII_THRESHOLD = 16;
+
// Other state values include the partial bytes of the incomplete
// character to be decoded in the simplest way: we pack the bytes
// into the state int in little-endian order. For example:
@@ -112,7 +152,7 @@ final class Utf8 {
* isValidUtf8(bytes, 0, bytes.length)}.
*/
public static boolean isValidUtf8(byte[] bytes) {
- return isValidUtf8(bytes, 0, bytes.length);
+ return processor.isValidUtf8(bytes, 0, bytes.length);
}
/**
@@ -125,7 +165,7 @@ final class Utf8 {
* partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}.
*/
public static boolean isValidUtf8(byte[] bytes, int index, int limit) {
- return partialIsValidUtf8(bytes, index, limit) == COMPLETE;
+ return processor.isValidUtf8(bytes, index, limit);
}
/**
@@ -146,183 +186,8 @@ final class Utf8 {
* decode the character when passed to a subsequent invocation of a
* partial decoding method.
*/
- public static int partialIsValidUtf8(
- int state, byte[] bytes, int index, int limit) {
- if (state != COMPLETE) {
- // The previous decoding operation was incomplete (or malformed).
- // We look for a well-formed sequence consisting of bytes from
- // the previous decoding operation (stored in state) together
- // with bytes from the array slice.
- //
- // We expect such "straddler characters" to be rare.
-
- if (index >= limit) { // No bytes? No progress.
- return state;
- }
- int byte1 = (byte) state;
- // byte1 is never ASCII.
- if (byte1 < (byte) 0xE0) {
- // two-byte form
-
- // Simultaneously checks for illegal trailing-byte in
- // leading position and overlong 2-byte form.
- if (byte1 < (byte) 0xC2 ||
- // byte2 trailing-byte test
- bytes[index++] > (byte) 0xBF) {
- return MALFORMED;
- }
- } else if (byte1 < (byte) 0xF0) {
- // three-byte form
-
- // Get byte2 from saved state or array
- int byte2 = (byte) ~(state >> 8);
- if (byte2 == 0) {
- byte2 = bytes[index++];
- if (index >= limit) {
- return incompleteStateFor(byte1, byte2);
- }
- }
- if (byte2 > (byte) 0xBF ||
- // overlong? 5 most significant bits must not all be zero
- (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
- // illegal surrogate codepoint?
- (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
- // byte3 trailing-byte test
- bytes[index++] > (byte) 0xBF) {
- return MALFORMED;
- }
- } else {
- // four-byte form
-
- // Get byte2 and byte3 from saved state or array
- int byte2 = (byte) ~(state >> 8);
- int byte3 = 0;
- if (byte2 == 0) {
- byte2 = bytes[index++];
- if (index >= limit) {
- return incompleteStateFor(byte1, byte2);
- }
- } else {
- byte3 = (byte) (state >> 16);
- }
- if (byte3 == 0) {
- byte3 = bytes[index++];
- if (index >= limit) {
- return incompleteStateFor(byte1, byte2, byte3);
- }
- }
-
- // If we were called with state == MALFORMED, then byte1 is 0xFF,
- // which never occurs in well-formed UTF-8, and so we will return
- // MALFORMED again below.
-
- if (byte2 > (byte) 0xBF ||
- // Check that 1 <= plane <= 16. Tricky optimized form of:
- // if (byte1 > (byte) 0xF4 ||
- // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
- // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
- (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
- // byte3 trailing-byte test
- byte3 > (byte) 0xBF ||
- // byte4 trailing-byte test
- bytes[index++] > (byte) 0xBF) {
- return MALFORMED;
- }
- }
- }
-
- return partialIsValidUtf8(bytes, index, limit);
- }
-
- /**
- * Tells whether the given byte array slice is a well-formed,
- * malformed, or incomplete UTF-8 byte sequence. The range of bytes
- * to be checked extends from index {@code index}, inclusive, to
- * {@code limit}, exclusive.
- *
- * <p>This is a convenience method, equivalent to a call to {@code
- * partialIsValidUtf8(Utf8.COMPLETE, bytes, index, limit)}.
- *
- * @return {@link #MALFORMED} if the partial byte sequence is
- * definitely not well-formed, {@link #COMPLETE} if it is well-formed
- * (no additional input needed), or if the byte sequence is
- * "incomplete", i.e. apparently terminated in the middle of a character,
- * an opaque integer "state" value containing enough information to
- * decode the character when passed to a subsequent invocation of a
- * partial decoding method.
- */
- public static int partialIsValidUtf8(
- byte[] bytes, int index, int limit) {
- // Optimize for 100% ASCII.
- // Hotspot loves small simple top-level loops like this.
- while (index < limit && bytes[index] >= 0) {
- index++;
- }
-
- return (index >= limit) ? COMPLETE :
- partialIsValidUtf8NonAscii(bytes, index, limit);
- }
-
- private static int partialIsValidUtf8NonAscii(
- byte[] bytes, int index, int limit) {
- for (;;) {
- int byte1, byte2;
-
- // Optimize for interior runs of ASCII bytes.
- do {
- if (index >= limit) {
- return COMPLETE;
- }
- } while ((byte1 = bytes[index++]) >= 0);
-
- if (byte1 < (byte) 0xE0) {
- // two-byte form
-
- if (index >= limit) {
- return byte1;
- }
-
- // Simultaneously checks for illegal trailing-byte in
- // leading position and overlong 2-byte form.
- if (byte1 < (byte) 0xC2 ||
- bytes[index++] > (byte) 0xBF) {
- return MALFORMED;
- }
- } else if (byte1 < (byte) 0xF0) {
- // three-byte form
-
- if (index >= limit - 1) { // incomplete sequence
- return incompleteStateFor(bytes, index, limit);
- }
- if ((byte2 = bytes[index++]) > (byte) 0xBF ||
- // overlong? 5 most significant bits must not all be zero
- (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
- // check for illegal surrogate codepoints
- (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
- // byte3 trailing-byte test
- bytes[index++] > (byte) 0xBF) {
- return MALFORMED;
- }
- } else {
- // four-byte form
-
- if (index >= limit - 2) { // incomplete sequence
- return incompleteStateFor(bytes, index, limit);
- }
- if ((byte2 = bytes[index++]) > (byte) 0xBF ||
- // Check that 1 <= plane <= 16. Tricky optimized form of:
- // if (byte1 > (byte) 0xF4 ||
- // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
- // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
- (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
- // byte3 trailing-byte test
- bytes[index++] > (byte) 0xBF ||
- // byte4 trailing-byte test
- bytes[index++] > (byte) 0xBF) {
- return MALFORMED;
- }
- }
- }
+ public static int partialIsValidUtf8(int state, byte[] bytes, int index, int limit) {
+ return processor.partialIsValidUtf8(state, bytes, index, limit);
}
private static int incompleteStateFor(int byte1) {
@@ -352,19 +217,31 @@ final class Utf8 {
default: throw new AssertionError();
}
}
-
+
+ private static int incompleteStateFor(
+ final ByteBuffer buffer, final int byte1, final int index, final int remaining) {
+ switch (remaining) {
+ case 0:
+ return incompleteStateFor(byte1);
+ case 1:
+ return incompleteStateFor(byte1, buffer.get(index));
+ case 2:
+ return incompleteStateFor(byte1, buffer.get(index), buffer.get(index + 1));
+ default:
+ throw new AssertionError();
+ }
+ }
// These UTF-8 handling methods are copied from Guava's Utf8 class with a modification to throw
// a protocol buffer local exception. This exception is then caught in CodedOutputStream so it can
// fallback to more lenient behavior.
static class UnpairedSurrogateException extends IllegalArgumentException {
-
private UnpairedSurrogateException(int index, int length) {
super("Unpaired surrogate at index " + index + " of " + length);
}
}
-
+
/**
* Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string,
* this method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in
@@ -426,56 +303,1381 @@ final class Utf8 {
return utf8Length;
}
- static int encode(CharSequence sequence, byte[] bytes, int offset, int length) {
- int utf16Length = sequence.length();
- int j = offset;
- int i = 0;
- int limit = offset + length;
- // Designed to take advantage of
- // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
- for (char c; i < utf16Length && i + j < limit && (c = sequence.charAt(i)) < 0x80; i++) {
- bytes[j + i] = (byte) c;
- }
- if (i == utf16Length) {
- return j + utf16Length;
- }
- j += i;
- for (char c; i < utf16Length; i++) {
- c = sequence.charAt(i);
- if (c < 0x80 && j < limit) {
- bytes[j++] = (byte) c;
- } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes
- bytes[j++] = (byte) ((0xF << 6) | (c >>> 6));
- bytes[j++] = (byte) (0x80 | (0x3F & c));
- } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) {
- // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
- bytes[j++] = (byte) ((0xF << 5) | (c >>> 12));
- bytes[j++] = (byte) (0x80 | (0x3F & (c >>> 6)));
- bytes[j++] = (byte) (0x80 | (0x3F & c));
- } else if (j <= limit - 4) {
- // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 bytes
- final char low;
- if (i + 1 == sequence.length()
- || !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) {
- throw new UnpairedSurrogateException((i - 1), utf16Length);
- }
- int codePoint = Character.toCodePoint(c, low);
- bytes[j++] = (byte) ((0xF << 4) | (codePoint >>> 18));
- bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12)));
- bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6)));
- bytes[j++] = (byte) (0x80 | (0x3F & codePoint));
+ static int encode(CharSequence in, byte[] out, int offset, int length) {
+ return processor.encodeUtf8(in, out, offset, length);
+ }
+ // End Guava UTF-8 methods.
+
+ /**
+ * Determines if the given {@link ByteBuffer} is a valid UTF-8 string.
+ *
+ * <p>Selects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct)
+ * and the capabilities of the platform.
+ *
+ * @param buffer the buffer to check.
+ * @see Utf8#isValidUtf8(byte[], int, int)
+ */
+ static boolean isValidUtf8(ByteBuffer buffer) {
+ return processor.isValidUtf8(buffer, buffer.position(), buffer.remaining());
+ }
+
+ /**
+ * Determines if the given {@link ByteBuffer} is a partially valid UTF-8 string.
+ *
+ * <p>Selects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct)
+ * and the capabilities of the platform.
+ *
+ * @param buffer the buffer to check.
+ * @see Utf8#partialIsValidUtf8(int, byte[], int, int)
+ */
+ static int partialIsValidUtf8(int state, ByteBuffer buffer, int index, int limit) {
+ return processor.partialIsValidUtf8(state, buffer, index, limit);
+ }
+
+ /**
+ * Encodes the given characters to the target {@link ByteBuffer} using UTF-8 encoding.
+ *
+ * <p>Selects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct)
+ * and the capabilities of the platform.
+ *
+ * @param in the source string to be encoded
+ * @param out the target buffer to receive the encoded string.
+ * @see Utf8#encode(CharSequence, byte[], int, int)
+ */
+ static void encodeUtf8(CharSequence in, ByteBuffer out) {
+ processor.encodeUtf8(in, out);
+ }
+
+ /**
+ * Counts (approximately) the number of consecutive ASCII characters in the given buffer.
+ * The byte order of the {@link ByteBuffer} does not matter, so performance can be improved if
+ * native byte order is used (i.e. no byte-swapping in {@link ByteBuffer#getLong(int)}).
+ *
+ * @param buffer the buffer to be scanned for ASCII chars
+ * @param index the starting index of the scan
+ * @param limit the limit within buffer for the scan
+ * @return the number of ASCII characters found. The stopping position will be at or
+ * before the first non-ASCII byte.
+ */
+ private static int estimateConsecutiveAscii(ByteBuffer buffer, int index, int limit) {
+ int i = index;
+ final int lim = limit - 7;
+ // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
+ // To speed things up further, we're reading longs instead of bytes so we use a mask to
+ // determine if any byte in the current long is non-ASCII.
+ for (; i < lim && (buffer.getLong(i) & ASCII_MASK_LONG) == 0; i += 8) {}
+ return i - index;
+ }
+
+ /**
+ * A processor of UTF-8 strings, providing methods for checking validity and encoding.
+ */
+ // TODO(nathanmittler): Add support for Memory/MemoryBlock on Android.
+ abstract static class Processor {
+ /**
+ * Returns {@code true} if the given byte array slice is a
+ * well-formed UTF-8 byte sequence. The range of bytes to be
+ * checked extends from index {@code index}, inclusive, to {@code
+ * limit}, exclusive.
+ *
+ * <p>This is a convenience method, equivalent to {@code
+ * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}.
+ */
+ final boolean isValidUtf8(byte[] bytes, int index, int limit) {
+ return partialIsValidUtf8(COMPLETE, bytes, index, limit) == COMPLETE;
+ }
+
+ /**
+ * Tells whether the given byte array slice is a well-formed,
+ * malformed, or incomplete UTF-8 byte sequence. The range of bytes
+ * to be checked extends from index {@code index}, inclusive, to
+ * {@code limit}, exclusive.
+ *
+ * @param state either {@link Utf8#COMPLETE} (if this is the initial decoding
+ * operation) or the value returned from a call to a partial decoding method
+ * for the previous bytes
+ *
+ * @return {@link #MALFORMED} if the partial byte sequence is
+ * definitely not well-formed, {@link #COMPLETE} if it is well-formed
+ * (no additional input needed), or if the byte sequence is
+ * "incomplete", i.e. apparently terminated in the middle of a character,
+ * an opaque integer "state" value containing enough information to
+ * decode the character when passed to a subsequent invocation of a
+ * partial decoding method.
+ */
+ abstract int partialIsValidUtf8(int state, byte[] bytes, int index, int limit);
+
+ /**
+ * Returns {@code true} if the given portion of the {@link ByteBuffer} is a
+ * well-formed UTF-8 byte sequence. The range of bytes to be
+ * checked extends from index {@code index}, inclusive, to {@code
+ * limit}, exclusive.
+ *
+ * <p>This is a convenience method, equivalent to {@code
+ * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}.
+ */
+ final boolean isValidUtf8(ByteBuffer buffer, int index, int limit) {
+ return partialIsValidUtf8(COMPLETE, buffer, index, limit) == COMPLETE;
+ }
+
+ /**
+ * Indicates whether or not the given buffer contains a valid UTF-8 string.
+ *
+ * @param buffer the buffer to check.
+ * @return {@code true} if the given buffer contains a valid UTF-8 string.
+ */
+ final int partialIsValidUtf8(
+ final int state, final ByteBuffer buffer, int index, final int limit) {
+ if (buffer.hasArray()) {
+ final int offset = buffer.arrayOffset();
+ return partialIsValidUtf8(state, buffer.array(), offset + index, offset + limit);
+ } else if (buffer.isDirect()){
+ return partialIsValidUtf8Direct(state, buffer, index, limit);
+ }
+ return partialIsValidUtf8Default(state, buffer, index, limit);
+ }
+
+ /**
+ * Performs validation for direct {@link ByteBuffer} instances.
+ */
+ abstract int partialIsValidUtf8Direct(
+ final int state, final ByteBuffer buffer, int index, final int limit);
+
+ /**
+ * Performs validation for {@link ByteBuffer} instances using the {@link ByteBuffer} API rather
+ * than potentially faster approaches. This first completes validation for the current
+ * character (provided by {@code state}) and then finishes validation for the sequence.
+ */
+ final int partialIsValidUtf8Default(
+ final int state, final ByteBuffer buffer, int index, final int limit) {
+ if (state != COMPLETE) {
+ // The previous decoding operation was incomplete (or malformed).
+ // We look for a well-formed sequence consisting of bytes from
+ // the previous decoding operation (stored in state) together
+ // with bytes from the array slice.
+ //
+ // We expect such "straddler characters" to be rare.
+
+ if (index >= limit) { // No bytes? No progress.
+ return state;
+ }
+
+ byte byte1 = (byte) state;
+ // byte1 is never ASCII.
+ if (byte1 < (byte) 0xE0) {
+ // two-byte form
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2
+ // byte2 trailing-byte test
+ || buffer.get(index++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else if (byte1 < (byte) 0xF0) {
+ // three-byte form
+
+ // Get byte2 from saved state or array
+ byte byte2 = (byte) ~(state >> 8);
+ if (byte2 == 0) {
+ byte2 = buffer.get(index++);
+ if (index >= limit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ }
+ if (byte2 > (byte) 0xBF
+ // overlong? 5 most significant bits must not all be zero
+ || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+ // illegal surrogate codepoint?
+ || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+ // byte3 trailing-byte test
+ || buffer.get(index++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else {
+ // four-byte form
+
+ // Get byte2 and byte3 from saved state or array
+ byte byte2 = (byte) ~(state >> 8);
+ byte byte3 = 0;
+ if (byte2 == 0) {
+ byte2 = buffer.get(index++);
+ if (index >= limit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ } else {
+ byte3 = (byte) (state >> 16);
+ }
+ if (byte3 == 0) {
+ byte3 = buffer.get(index++);
+ if (index >= limit) {
+ return incompleteStateFor(byte1, byte2, byte3);
+ }
+ }
+
+ // If we were called with state == MALFORMED, then byte1 is 0xFF,
+ // which never occurs in well-formed UTF-8, and so we will return
+ // MALFORMED again below.
+
+ if (byte2 > (byte) 0xBF
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+ // byte3 trailing-byte test
+ || byte3 > (byte) 0xBF
+ // byte4 trailing-byte test
+ || buffer.get(index++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+
+ // Finish validation for the sequence.
+ return partialIsValidUtf8(buffer, index, limit);
+ }
+
+ /**
+ * Performs validation for {@link ByteBuffer} instances using the {@link ByteBuffer} API rather
+ * than potentially faster approaches.
+ */
+ private static int partialIsValidUtf8(final ByteBuffer buffer, int index, final int limit) {
+ index += estimateConsecutiveAscii(buffer, index, limit);
+
+ for (;;) {
+ // Optimize for interior runs of ASCII bytes.
+ // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold?
+ // Maybe after seeing a few in a row that are ASCII, go back to fast mode?
+ int byte1;
+ do {
+ if (index >= limit) {
+ return COMPLETE;
+ }
+ } while ((byte1 = buffer.get(index++)) >= 0);
+
+ // If we're here byte1 is not ASCII. Only need to handle 2-4 byte forms.
+ if (byte1 < (byte) 0xE0) {
+ // Two-byte form (110xxxxx 10xxxxxx)
+ if (index >= limit) {
+ // Incomplete sequence
+ return byte1;
+ }
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2 || buffer.get(index) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ index++;
+ } else if (byte1 < (byte) 0xF0) {
+ // Three-byte form (1110xxxx 10xxxxxx 10xxxxxx)
+ if (index >= limit - 1) {
+ // Incomplete sequence
+ return incompleteStateFor(buffer, byte1, index, limit - index);
+ }
+
+ final byte byte2 = buffer.get(index++);
+ if (byte2 > (byte) 0xBF
+ // overlong? 5 most significant bits must not all be zero
+ || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+ // check for illegal surrogate codepoints
+ || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+ // byte3 trailing-byte test
+ || buffer.get(index) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ index++;
+ } else {
+ // Four-byte form (1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ if (index >= limit - 2) {
+ // Incomplete sequence
+ return incompleteStateFor(buffer, byte1, index, limit - index);
+ }
+
+ // TODO(nathanmittler): Consider using getInt() to improve performance.
+ final int byte2 = buffer.get(index++);
+ if (byte2 > (byte) 0xBF
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+ // byte3 trailing-byte test
+ || buffer.get(index++) > (byte) 0xBF
+ // byte4 trailing-byte test
+ || buffer.get(index++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+ }
+
+ /**
+ * Encodes an input character sequence ({@code in}) to UTF-8 in the target array ({@code out}).
+ * For a string, this method is similar to
+ * <pre>{@code
+ * byte[] a = string.getBytes(UTF_8);
+ * System.arraycopy(a, 0, bytes, offset, a.length);
+ * return offset + a.length;
+ * }</pre>
+ *
+ * but is more efficient in both time and space. One key difference is that this method
+ * requires paired surrogates, and therefore does not support chunking.
+ * While {@code String.getBytes(UTF_8)} replaces unpaired surrogates with the default
+ * replacement character, this method throws {@link UnpairedSurrogateException}.
+ *
+ * <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to
+ * compute the exact amount needed, or leave room for
+ * {@code Utf8.MAX_BYTES_PER_CHAR * sequence.length()}, which is the largest possible number
+ * of bytes that any input can be encoded to.
+ *
+ * @param in the input character sequence to be encoded
+ * @param out the target array
+ * @param offset the starting offset in {@code bytes} to start writing at
+ * @param length the length of the {@code bytes}, starting from {@code offset}
+ * @throws UnpairedSurrogateException if {@code sequence} contains ill-formed UTF-16 (unpaired
+ * surrogates)
+ * @throws ArrayIndexOutOfBoundsException if {@code sequence} encoded in UTF-8 is longer than
+ * {@code bytes.length - offset}
+ * @return the new offset, equivalent to {@code offset + Utf8.encodedLength(sequence)}
+ */
+ abstract int encodeUtf8(CharSequence in, byte[] out, int offset, int length);
+
+ /**
+ * Encodes an input character sequence ({@code in}) to UTF-8 in the target buffer ({@code out}).
+ * Upon returning from this method, the {@code out} position will point to the position after
+ * the last encoded byte. This method requires paired surrogates, and therefore does not
+ * support chunking.
+ *
+ * <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to
+ * compute the exact amount needed, or leave room for
+ * {@code Utf8.MAX_BYTES_PER_CHAR * in.length()}, which is the largest possible number
+ * of bytes that any input can be encoded to.
+ *
+ * @param in the source character sequence to be encoded
+ * @param out the target buffer
+ * @throws UnpairedSurrogateException if {@code in} contains ill-formed UTF-16 (unpaired
+ * surrogates)
+ * @throws ArrayIndexOutOfBoundsException if {@code in} encoded in UTF-8 is longer than
+ * {@code out.remaining()}
+ */
+ final void encodeUtf8(CharSequence in, ByteBuffer out) {
+ if (out.hasArray()) {
+ final int offset = out.arrayOffset();
+ int endIndex =
+ Utf8.encode(in, out.array(), offset + out.position(), out.remaining());
+ out.position(endIndex - offset);
+ } else if (out.isDirect()) {
+ encodeUtf8Direct(in, out);
} else {
- // If we are surrogates and we're not a surrogate pair, always throw an
- // IllegalArgumentException instead of an ArrayOutOfBoundsException.
- if ((Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE)
- && (i + 1 == sequence.length()
- || !Character.isSurrogatePair(c, sequence.charAt(i + 1)))) {
- throw new UnpairedSurrogateException(i, utf16Length);
+ encodeUtf8Default(in, out);
+ }
+ }
+
+ /**
+ * Encodes the input character sequence to a direct {@link ByteBuffer} instance.
+ */
+ abstract void encodeUtf8Direct(CharSequence in, ByteBuffer out);
+
+ /**
+ * Encodes the input character sequence to a {@link ByteBuffer} instance using the {@link
+ * ByteBuffer} API, rather than potentially faster approaches.
+ */
+ final void encodeUtf8Default(CharSequence in, ByteBuffer out) {
+ final int inLength = in.length();
+ int outIx = out.position();
+ int inIx = 0;
+
+ // Since ByteBuffer.putXXX() already checks boundaries for us, no need to explicitly check
+ // access. Assume the buffer is big enough and let it handle the out of bounds exception
+ // if it occurs.
+ try {
+ // Designed to take advantage of
+ // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
+ for (char c; inIx < inLength && (c = in.charAt(inIx)) < 0x80; ++inIx) {
+ out.put(outIx + inIx, (byte) c);
}
- throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j);
+ if (inIx == inLength) {
+ // Successfully encoded the entire string.
+ out.position(outIx + inIx);
+ return;
+ }
+
+ outIx += inIx;
+ for (char c; inIx < inLength; ++inIx, ++outIx) {
+ c = in.charAt(inIx);
+ if (c < 0x80) {
+ // One byte (0xxx xxxx)
+ out.put(outIx, (byte) c);
+ } else if (c < 0x800) {
+ // Two bytes (110x xxxx 10xx xxxx)
+
+ // Benchmarks show put performs better than putShort here (for HotSpot).
+ out.put(outIx++, (byte) (0xC0 | (c >>> 6)));
+ out.put(outIx, (byte) (0x80 | (0x3F & c)));
+ } else if (c < MIN_SURROGATE || MAX_SURROGATE < c) {
+ // Three bytes (1110 xxxx 10xx xxxx 10xx xxxx)
+ // Maximum single-char code point is 0xFFFF, 16 bits.
+
+ // Benchmarks show put performs better than putShort here (for HotSpot).
+ out.put(outIx++, (byte) (0xE0 | (c >>> 12)));
+ out.put(outIx++, (byte) (0x80 | (0x3F & (c >>> 6))));
+ out.put(outIx, (byte) (0x80 | (0x3F & c)));
+ } else {
+ // Four bytes (1111 xxxx 10xx xxxx 10xx xxxx 10xx xxxx)
+
+ // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8
+ // bytes
+ final char low;
+ if (inIx + 1 == inLength || !isSurrogatePair(c, (low = in.charAt(++inIx)))) {
+ throw new UnpairedSurrogateException(inIx, inLength);
+ }
+ // TODO(nathanmittler): Consider using putInt() to improve performance.
+ int codePoint = toCodePoint(c, low);
+ out.put(outIx++, (byte) ((0xF << 4) | (codePoint >>> 18)));
+ out.put(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12))));
+ out.put(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6))));
+ out.put(outIx, (byte) (0x80 | (0x3F & codePoint)));
+ }
+ }
+
+ // Successfully encoded the entire string.
+ out.position(outIx);
+ } catch (IndexOutOfBoundsException e) {
+ // TODO(nathanmittler): Consider making the API throw IndexOutOfBoundsException instead.
+
+ // If we failed in the outer ASCII loop, outIx will not have been updated. In this case,
+ // use inIx to determine the bad write index.
+ int badWriteIndex = out.position() + Math.max(inIx, outIx - out.position() + 1);
+ throw new ArrayIndexOutOfBoundsException(
+ "Failed writing " + in.charAt(inIx) + " at index " + badWriteIndex);
}
}
- return j;
}
- // End Guava UTF-8 methods.
+
+ /**
+ * {@link Processor} implementation that does not use any {@code sun.misc.Unsafe} methods.
+ */
+ static final class SafeProcessor extends Processor {
+ @Override
+ int partialIsValidUtf8(int state, byte[] bytes, int index, int limit) {
+ if (state != COMPLETE) {
+ // The previous decoding operation was incomplete (or malformed).
+ // We look for a well-formed sequence consisting of bytes from
+ // the previous decoding operation (stored in state) together
+ // with bytes from the array slice.
+ //
+ // We expect such "straddler characters" to be rare.
+
+ if (index >= limit) { // No bytes? No progress.
+ return state;
+ }
+ int byte1 = (byte) state;
+ // byte1 is never ASCII.
+ if (byte1 < (byte) 0xE0) {
+ // two-byte form
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2
+ // byte2 trailing-byte test
+ || bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else if (byte1 < (byte) 0xF0) {
+ // three-byte form
+
+ // Get byte2 from saved state or array
+ int byte2 = (byte) ~(state >> 8);
+ if (byte2 == 0) {
+ byte2 = bytes[index++];
+ if (index >= limit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ }
+ if (byte2 > (byte) 0xBF
+ // overlong? 5 most significant bits must not all be zero
+ || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+ // illegal surrogate codepoint?
+ || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+ // byte3 trailing-byte test
+ || bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else {
+ // four-byte form
+
+ // Get byte2 and byte3 from saved state or array
+ int byte2 = (byte) ~(state >> 8);
+ int byte3 = 0;
+ if (byte2 == 0) {
+ byte2 = bytes[index++];
+ if (index >= limit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ } else {
+ byte3 = (byte) (state >> 16);
+ }
+ if (byte3 == 0) {
+ byte3 = bytes[index++];
+ if (index >= limit) {
+ return incompleteStateFor(byte1, byte2, byte3);
+ }
+ }
+
+ // If we were called with state == MALFORMED, then byte1 is 0xFF,
+ // which never occurs in well-formed UTF-8, and so we will return
+ // MALFORMED again below.
+
+ if (byte2 > (byte) 0xBF
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+ // byte3 trailing-byte test
+ || byte3 > (byte) 0xBF
+ // byte4 trailing-byte test
+ || bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+
+ return partialIsValidUtf8(bytes, index, limit);
+ }
+
+ @Override
+ int partialIsValidUtf8Direct(int state, ByteBuffer buffer, int index, int limit) {
+ // For safe processing, we have to use the ByteBuffer API.
+ return partialIsValidUtf8Default(state, buffer, index, limit);
+ }
+
+ @Override
+ int encodeUtf8(CharSequence in, byte[] out, int offset, int length) {
+ int utf16Length = in.length();
+ int j = offset;
+ int i = 0;
+ int limit = offset + length;
+ // Designed to take advantage of
+ // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
+ for (char c; i < utf16Length && i + j < limit && (c = in.charAt(i)) < 0x80; i++) {
+ out[j + i] = (byte) c;
+ }
+ if (i == utf16Length) {
+ return j + utf16Length;
+ }
+ j += i;
+ for (char c; i < utf16Length; i++) {
+ c = in.charAt(i);
+ if (c < 0x80 && j < limit) {
+ out[j++] = (byte) c;
+ } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes
+ out[j++] = (byte) ((0xF << 6) | (c >>> 6));
+ out[j++] = (byte) (0x80 | (0x3F & c));
+ } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) {
+ // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
+ out[j++] = (byte) ((0xF << 5) | (c >>> 12));
+ out[j++] = (byte) (0x80 | (0x3F & (c >>> 6)));
+ out[j++] = (byte) (0x80 | (0x3F & c));
+ } else if (j <= limit - 4) {
+ // Minimum code point represented by a surrogate pair is 0x10000, 17 bits,
+ // four UTF-8 bytes
+ final char low;
+ if (i + 1 == in.length()
+ || !Character.isSurrogatePair(c, (low = in.charAt(++i)))) {
+ throw new UnpairedSurrogateException((i - 1), utf16Length);
+ }
+ int codePoint = Character.toCodePoint(c, low);
+ out[j++] = (byte) ((0xF << 4) | (codePoint >>> 18));
+ out[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12)));
+ out[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6)));
+ out[j++] = (byte) (0x80 | (0x3F & codePoint));
+ } else {
+ // If we are surrogates and we're not a surrogate pair, always throw an
+ // UnpairedSurrogateException instead of an ArrayOutOfBoundsException.
+ if ((Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE)
+ && (i + 1 == in.length()
+ || !Character.isSurrogatePair(c, in.charAt(i + 1)))) {
+ throw new UnpairedSurrogateException(i, utf16Length);
+ }
+ throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j);
+ }
+ }
+ return j;
+ }
+
+ @Override
+ void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
+ // For safe processing, we have to use the ByteBuffer API.
+ encodeUtf8Default(in, out);
+ }
+
+ private static int partialIsValidUtf8(byte[] bytes, int index, int limit) {
+ // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this).
+ // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
+ while (index < limit && bytes[index] >= 0) {
+ index++;
+ }
+
+ return (index >= limit) ? COMPLETE : partialIsValidUtf8NonAscii(bytes, index, limit);
+ }
+
+ private static int partialIsValidUtf8NonAscii(byte[] bytes, int index, int limit) {
+ for (;;) {
+ int byte1, byte2;
+
+ // Optimize for interior runs of ASCII bytes.
+ do {
+ if (index >= limit) {
+ return COMPLETE;
+ }
+ } while ((byte1 = bytes[index++]) >= 0);
+
+ if (byte1 < (byte) 0xE0) {
+ // two-byte form
+
+ if (index >= limit) {
+ // Incomplete sequence
+ return byte1;
+ }
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2
+ || bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else if (byte1 < (byte) 0xF0) {
+ // three-byte form
+
+ if (index >= limit - 1) { // incomplete sequence
+ return incompleteStateFor(bytes, index, limit);
+ }
+ if ((byte2 = bytes[index++]) > (byte) 0xBF
+ // overlong? 5 most significant bits must not all be zero
+ || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+ // check for illegal surrogate codepoints
+ || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+ // byte3 trailing-byte test
+ || bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else {
+ // four-byte form
+
+ if (index >= limit - 2) { // incomplete sequence
+ return incompleteStateFor(bytes, index, limit);
+ }
+ if ((byte2 = bytes[index++]) > (byte) 0xBF
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+ // byte3 trailing-byte test
+ || bytes[index++] > (byte) 0xBF
+ // byte4 trailing-byte test
+ || bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * {@link Processor} that uses {@code sun.misc.Unsafe} where possible to improve performance.
+ */
+ static final class UnsafeProcessor extends Processor {
+ private static final sun.misc.Unsafe UNSAFE = getUnsafe();
+ private static final long BUFFER_ADDRESS_OFFSET =
+ fieldOffset(field(Buffer.class, "address"));
+ private static final int ARRAY_BASE_OFFSET = byteArrayBaseOffset();
+
+ /**
+ * We only use Unsafe operations if we have access to direct {@link ByteBuffer}'s address
+ * and the array base offset is a multiple of 8 (needed by Unsafe.getLong()).
+ */
+ private static final boolean AVAILABLE =
+ BUFFER_ADDRESS_OFFSET != -1 && ARRAY_BASE_OFFSET % 8 == 0;
+
+ /**
+ * Indicates whether or not all required unsafe operations are supported on this platform.
+ */
+ static boolean isAvailable() {
+ return AVAILABLE;
+ }
+
+ @Override
+ int partialIsValidUtf8(int state, byte[] bytes, final int index, final int limit) {
+ if ((index | limit | bytes.length - limit) < 0) {
+ throw new ArrayIndexOutOfBoundsException(
+ String.format("Array length=%d, index=%d, limit=%d", bytes.length, index, limit));
+ }
+ long offset = ARRAY_BASE_OFFSET + index;
+ final long offsetLimit = ARRAY_BASE_OFFSET + limit;
+ if (state != COMPLETE) {
+ // The previous decoding operation was incomplete (or malformed).
+ // We look for a well-formed sequence consisting of bytes from
+ // the previous decoding operation (stored in state) together
+ // with bytes from the array slice.
+ //
+ // We expect such "straddler characters" to be rare.
+
+ if (offset >= offsetLimit) { // No bytes? No progress.
+ return state;
+ }
+ int byte1 = (byte) state;
+ // byte1 is never ASCII.
+ if (byte1 < (byte) 0xE0) {
+ // two-byte form
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2
+ // byte2 trailing-byte test
+ || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else if (byte1 < (byte) 0xF0) {
+ // three-byte form
+
+ // Get byte2 from saved state or array
+ int byte2 = (byte) ~(state >> 8);
+ if (byte2 == 0) {
+ byte2 = UNSAFE.getByte(bytes, offset++);
+ if (offset >= offsetLimit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ }
+ if (byte2 > (byte) 0xBF
+ // overlong? 5 most significant bits must not all be zero
+ || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+ // illegal surrogate codepoint?
+ || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+ // byte3 trailing-byte test
+ || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else {
+ // four-byte form
+
+ // Get byte2 and byte3 from saved state or array
+ int byte2 = (byte) ~(state >> 8);
+ int byte3 = 0;
+ if (byte2 == 0) {
+ byte2 = UNSAFE.getByte(bytes, offset++);
+ if (offset >= offsetLimit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ } else {
+ byte3 = (byte) (state >> 16);
+ }
+ if (byte3 == 0) {
+ byte3 = UNSAFE.getByte(bytes, offset++);
+ if (offset >= offsetLimit) {
+ return incompleteStateFor(byte1, byte2, byte3);
+ }
+ }
+
+ // If we were called with state == MALFORMED, then byte1 is 0xFF,
+ // which never occurs in well-formed UTF-8, and so we will return
+ // MALFORMED again below.
+
+ if (byte2 > (byte) 0xBF
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+ // byte3 trailing-byte test
+ || byte3 > (byte) 0xBF
+ // byte4 trailing-byte test
+ || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+
+ return partialIsValidUtf8(bytes, offset, (int) (offsetLimit - offset));
+ }
+
+ @Override
+ int partialIsValidUtf8Direct(
+ final int state, ByteBuffer buffer, final int index, final int limit) {
+ if ((index | limit | buffer.limit() - limit) < 0) {
+ throw new ArrayIndexOutOfBoundsException(
+ String.format("buffer limit=%d, index=%d, limit=%d", buffer.limit(), index, limit));
+ }
+ long address = addressOffset(buffer) + index;
+ final long addressLimit = address + (limit - index);
+ if (state != COMPLETE) {
+ // The previous decoding operation was incomplete (or malformed).
+ // We look for a well-formed sequence consisting of bytes from
+ // the previous decoding operation (stored in state) together
+ // with bytes from the array slice.
+ //
+ // We expect such "straddler characters" to be rare.
+
+ if (address >= addressLimit) { // No bytes? No progress.
+ return state;
+ }
+
+ final int byte1 = (byte) state;
+ // byte1 is never ASCII.
+ if (byte1 < (byte) 0xE0) {
+ // two-byte form
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2
+ // byte2 trailing-byte test
+ || UNSAFE.getByte(address++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else if (byte1 < (byte) 0xF0) {
+ // three-byte form
+
+ // Get byte2 from saved state or array
+ int byte2 = (byte) ~(state >> 8);
+ if (byte2 == 0) {
+ byte2 = UNSAFE.getByte(address++);
+ if (address >= addressLimit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ }
+ if (byte2 > (byte) 0xBF
+ // overlong? 5 most significant bits must not all be zero
+ || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+ // illegal surrogate codepoint?
+ || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+ // byte3 trailing-byte test
+ || UNSAFE.getByte(address++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else {
+ // four-byte form
+
+ // Get byte2 and byte3 from saved state or array
+ int byte2 = (byte) ~(state >> 8);
+ int byte3 = 0;
+ if (byte2 == 0) {
+ byte2 = UNSAFE.getByte(address++);
+ if (address >= addressLimit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ } else {
+ byte3 = (byte) (state >> 16);
+ }
+ if (byte3 == 0) {
+ byte3 = UNSAFE.getByte(address++);
+ if (address >= addressLimit) {
+ return incompleteStateFor(byte1, byte2, byte3);
+ }
+ }
+
+ // If we were called with state == MALFORMED, then byte1 is 0xFF,
+ // which never occurs in well-formed UTF-8, and so we will return
+ // MALFORMED again below.
+
+ if (byte2 > (byte) 0xBF
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+ // byte3 trailing-byte test
+ || byte3 > (byte) 0xBF
+ // byte4 trailing-byte test
+ || UNSAFE.getByte(address++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+
+ return partialIsValidUtf8(address, (int) (addressLimit - address));
+ }
+
+ @Override
+ int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final int length) {
+ long outIx = ARRAY_BASE_OFFSET + offset;
+ final long outLimit = outIx + length;
+ final int inLimit = in.length();
+ if (inLimit > length || out.length - length < offset) {
+ // Not even enough room for an ASCII-encoded string.
+ throw new ArrayIndexOutOfBoundsException(
+ "Failed writing " + in.charAt(inLimit - 1) + " at index " + (offset + length));
+ }
+
+ // Designed to take advantage of
+ // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
+ int inIx = 0;
+ for (char c; inIx < inLimit && (c = in.charAt(inIx)) < 0x80; ++inIx) {
+ UNSAFE.putByte(out, outIx++, (byte) c);
+ }
+ if (inIx == inLimit) {
+ // We're done, it was ASCII encoded.
+ return (int) (outIx - ARRAY_BASE_OFFSET);
+ }
+
+ for (char c; inIx < inLimit; ++inIx) {
+ c = in.charAt(inIx);
+ if (c < 0x80 && outIx < outLimit) {
+ UNSAFE.putByte(out, outIx++, (byte) c);
+ } else if (c < 0x800 && outIx <= outLimit - 2L) { // 11 bits, two UTF-8 bytes
+ UNSAFE.putByte(out, outIx++, (byte) ((0xF << 6) | (c >>> 6)));
+ UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & c)));
+ } else if ((c < MIN_SURROGATE || MAX_SURROGATE < c) && outIx <= outLimit - 3L) {
+ // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
+ UNSAFE.putByte(out, outIx++, (byte) ((0xF << 5) | (c >>> 12)));
+ UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & (c >>> 6))));
+ UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & c)));
+ } else if (outIx <= outLimit - 4L) {
+ // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8
+ // bytes
+ final char low;
+ if (inIx + 1 == inLimit || !isSurrogatePair(c, (low = in.charAt(++inIx)))) {
+ throw new UnpairedSurrogateException((inIx - 1), inLimit);
+ }
+ int codePoint = toCodePoint(c, low);
+ UNSAFE.putByte(out, outIx++, (byte) ((0xF << 4) | (codePoint >>> 18)));
+ UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12))));
+ UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6))));
+ UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & codePoint)));
+ } else {
+ if ((MIN_SURROGATE <= c && c <= MAX_SURROGATE)
+ && (inIx + 1 == inLimit || !isSurrogatePair(c, in.charAt(inIx + 1)))) {
+ // We are surrogates and we're not a surrogate pair.
+ throw new UnpairedSurrogateException(inIx, inLimit);
+ }
+ // Not enough space in the output buffer.
+ throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + outIx);
+ }
+ }
+
+ // All bytes have been encoded.
+ return (int) (outIx - ARRAY_BASE_OFFSET);
+ }
+
+ @Override
+ void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
+ final long address = addressOffset(out);
+ long outIx = address + out.position();
+ final long outLimit = address + out.limit();
+ final int inLimit = in.length();
+ if (inLimit > outLimit - outIx) {
+ // Not even enough room for an ASCII-encoded string.
+ throw new ArrayIndexOutOfBoundsException(
+ "Failed writing " + in.charAt(inLimit - 1) + " at index " + out.limit());
+ }
+
+ // Designed to take advantage of
+ // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
+ int inIx = 0;
+ for (char c; inIx < inLimit && (c = in.charAt(inIx)) < 0x80; ++inIx) {
+ UNSAFE.putByte(outIx++, (byte) c);
+ }
+ if (inIx == inLimit) {
+ // We're done, it was ASCII encoded.
+ out.position((int) (outIx - address));
+ return;
+ }
+
+ for (char c; inIx < inLimit; ++inIx) {
+ c = in.charAt(inIx);
+ if (c < 0x80 && outIx < outLimit) {
+ UNSAFE.putByte(outIx++, (byte) c);
+ } else if (c < 0x800 && outIx <= outLimit - 2L) { // 11 bits, two UTF-8 bytes
+ UNSAFE.putByte(outIx++, (byte) ((0xF << 6) | (c >>> 6)));
+ UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & c)));
+ } else if ((c < MIN_SURROGATE || MAX_SURROGATE < c) && outIx <= outLimit - 3L) {
+ // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
+ UNSAFE.putByte(outIx++, (byte) ((0xF << 5) | (c >>> 12)));
+ UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & (c >>> 6))));
+ UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & c)));
+ } else if (outIx <= outLimit - 4L) {
+ // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8
+ // bytes
+ final char low;
+ if (inIx + 1 == inLimit || !isSurrogatePair(c, (low = in.charAt(++inIx)))) {
+ throw new UnpairedSurrogateException((inIx - 1), inLimit);
+ }
+ int codePoint = toCodePoint(c, low);
+ UNSAFE.putByte(outIx++, (byte) ((0xF << 4) | (codePoint >>> 18)));
+ UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12))));
+ UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6))));
+ UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & codePoint)));
+ } else {
+ if ((MIN_SURROGATE <= c && c <= MAX_SURROGATE)
+ && (inIx + 1 == inLimit || !isSurrogatePair(c, in.charAt(inIx + 1)))) {
+ // We are surrogates and we're not a surrogate pair.
+ throw new UnpairedSurrogateException(inIx, inLimit);
+ }
+ // Not enough space in the output buffer.
+ throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + outIx);
+ }
+ }
+
+ // All bytes have been encoded.
+ out.position((int) (outIx - address));
+ }
+
+ /**
+ * Counts (approximately) the number of consecutive ASCII characters starting from the given
+ * position, using the most efficient method available to the platform.
+ *
+ * @param bytes the array containing the character sequence
+ * @param offset the offset position of the index (same as index + arrayBaseOffset)
+ * @param maxChars the maximum number of characters to count
+ * @return the number of ASCII characters found. The stopping position will be at or
+ * before the first non-ASCII byte.
+ */
+ private static int unsafeEstimateConsecutiveAscii(
+ byte[] bytes, long offset, final int maxChars) {
+ int remaining = maxChars;
+ if (remaining < UNSAFE_COUNT_ASCII_THRESHOLD) {
+ // Don't bother with small strings.
+ return 0;
+ }
+
+ // Read bytes until 8-byte aligned so that we can read longs in the loop below.
+ // Byte arrays are already either 8 or 16-byte aligned, so we just need to make sure that
+ // the index (relative to the start of the array) is also 8-byte aligned. We do this by
+ // ANDing the index with 7 to determine the number of bytes that need to be read before
+ // we're 8-byte aligned.
+ final int unaligned = (int) offset & 7;
+ for (int j = unaligned; j > 0; j--) {
+ if (UNSAFE.getByte(bytes, offset++) < 0) {
+ return unaligned - j;
+ }
+ }
+
+ // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
+ // To speed things up further, we're reading longs instead of bytes so we use a mask to
+ // determine if any byte in the current long is non-ASCII.
+ remaining -= unaligned;
+ for (; remaining >= 8 && (UNSAFE.getLong(bytes, offset) & ASCII_MASK_LONG) == 0;
+ offset += 8, remaining -= 8) {}
+ return maxChars - remaining;
+ }
+
+ /**
+ * Same as {@link Utf8#estimateConsecutiveAscii(ByteBuffer, int, int)} except that it uses the
+ * most efficient method available to the platform.
+ */
+ private static int unsafeEstimateConsecutiveAscii(long address, final int maxChars) {
+ int remaining = maxChars;
+ if (remaining < UNSAFE_COUNT_ASCII_THRESHOLD) {
+ // Don't bother with small strings.
+ return 0;
+ }
+
+ // Read bytes until 8-byte aligned so that we can read longs in the loop below.
+ // We do this by ANDing the address with 7 to determine the number of bytes that need to
+ // be read before we're 8-byte aligned.
+ final int unaligned = (int) address & 7;
+ for (int j = unaligned; j > 0; j--) {
+ if (UNSAFE.getByte(address++) < 0) {
+ return unaligned - j;
+ }
+ }
+
+ // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
+ // To speed things up further, we're reading longs instead of bytes so we use a mask to
+ // determine if any byte in the current long is non-ASCII.
+ remaining -= unaligned;
+ for (; remaining >= 8 && (UNSAFE.getLong(address) & ASCII_MASK_LONG) == 0;
+ address += 8, remaining -= 8) {}
+ return maxChars - remaining;
+ }
+
+ private static int partialIsValidUtf8(final byte[] bytes, long offset, int remaining) {
+ // Skip past ASCII characters as quickly as possible.
+ final int skipped = unsafeEstimateConsecutiveAscii(bytes, offset, remaining);
+ remaining -= skipped;
+ offset += skipped;
+
+ for (;;) {
+ // Optimize for interior runs of ASCII bytes.
+ // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold?
+ // Maybe after seeing a few in a row that are ASCII, go back to fast mode?
+ int byte1 = 0;
+ for (; remaining > 0 && (byte1 = UNSAFE.getByte(bytes, offset++)) >= 0; --remaining) {
+ }
+ if (remaining == 0) {
+ return COMPLETE;
+ }
+ remaining--;
+
+ // If we're here byte1 is not ASCII. Only need to handle 2-4 byte forms.
+ if (byte1 < (byte) 0xE0) {
+ // Two-byte form (110xxxxx 10xxxxxx)
+ if (remaining == 0) {
+ // Incomplete sequence
+ return byte1;
+ }
+ remaining--;
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2
+ || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else if (byte1 < (byte) 0xF0) {
+ // Three-byte form (1110xxxx 10xxxxxx 10xxxxxx)
+ if (remaining < 2) {
+ // Incomplete sequence
+ return unsafeIncompleteStateFor(bytes, byte1, offset, remaining);
+ }
+ remaining -= 2;
+
+ final int byte2;
+ if ((byte2 = UNSAFE.getByte(bytes, offset++)) > (byte) 0xBF
+ // overlong? 5 most significant bits must not all be zero
+ || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+ // check for illegal surrogate codepoints
+ || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+ // byte3 trailing-byte test
+ || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else {
+ // Four-byte form (1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ if (remaining < 3) {
+ // Incomplete sequence
+ return unsafeIncompleteStateFor(bytes, byte1, offset, remaining);
+ }
+ remaining -= 3;
+
+ final int byte2;
+ if ((byte2 = UNSAFE.getByte(bytes, offset++)) > (byte) 0xBF
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+ // byte3 trailing-byte test
+ || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF
+ // byte4 trailing-byte test
+ || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+ }
+
+ private static int partialIsValidUtf8(long address, int remaining) {
+ // Skip past ASCII characters as quickly as possible.
+ final int skipped = unsafeEstimateConsecutiveAscii(address, remaining);
+ address += skipped;
+ remaining -= skipped;
+
+ for (;;) {
+ // Optimize for interior runs of ASCII bytes.
+ // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold?
+ // Maybe after seeing a few in a row that are ASCII, go back to fast mode?
+ int byte1 = 0;
+ for (; remaining > 0 && (byte1 = UNSAFE.getByte(address++)) >= 0; --remaining) {
+ }
+ if (remaining == 0) {
+ return COMPLETE;
+ }
+ remaining--;
+
+ if (byte1 < (byte) 0xE0) {
+ // Two-byte form
+
+ if (remaining == 0) {
+ // Incomplete sequence
+ return byte1;
+ }
+ remaining--;
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2 || UNSAFE.getByte(address++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else if (byte1 < (byte) 0xF0) {
+ // Three-byte form
+
+ if (remaining < 2) {
+ // Incomplete sequence
+ return unsafeIncompleteStateFor(address, byte1, remaining);
+ }
+ remaining -= 2;
+
+ final byte byte2 = UNSAFE.getByte(address++);
+ if (byte2 > (byte) 0xBF
+ // overlong? 5 most significant bits must not all be zero
+ || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+ // check for illegal surrogate codepoints
+ || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+ // byte3 trailing-byte test
+ || UNSAFE.getByte(address++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else {
+ // Four-byte form
+
+ if (remaining < 3) {
+ // Incomplete sequence
+ return unsafeIncompleteStateFor(address, byte1, remaining);
+ }
+ remaining -= 3;
+
+ final byte byte2 = UNSAFE.getByte(address++);
+ if (byte2 > (byte) 0xBF
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+ // byte3 trailing-byte test
+ || UNSAFE.getByte(address++) > (byte) 0xBF
+ // byte4 trailing-byte test
+ || UNSAFE.getByte(address++) > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+ }
+
+ private static int unsafeIncompleteStateFor(byte[] bytes, int byte1, long offset,
+ int remaining) {
+ switch (remaining) {
+ case 0: {
+ return incompleteStateFor(byte1);
+ }
+ case 1: {
+ return incompleteStateFor(byte1, UNSAFE.getByte(bytes, offset));
+ }
+ case 2: {
+ return incompleteStateFor(byte1, UNSAFE.getByte(bytes, offset),
+ UNSAFE.getByte(bytes, offset + 1));
+ }
+ default: {
+ throw new AssertionError();
+ }
+ }
+ }
+
+ private static int unsafeIncompleteStateFor(long address, final int byte1, int remaining) {
+ switch (remaining) {
+ case 0: {
+ return incompleteStateFor(byte1);
+ }
+ case 1: {
+ return incompleteStateFor(byte1, UNSAFE.getByte(address));
+ }
+ case 2: {
+ return incompleteStateFor(byte1, UNSAFE.getByte(address), UNSAFE.getByte(address + 1));
+ }
+ default: {
+ throw new AssertionError();
+ }
+ }
+ }
+
+ /**
+ * Gets the field with the given name within the class, or {@code null} if not found. If
+ * found, the field is made accessible.
+ */
+ private static Field field(Class<?> clazz, String fieldName) {
+ Field field;
+ try {
+ field = clazz.getDeclaredField(fieldName);
+ field.setAccessible(true);
+ } catch (Throwable t) {
+ // Failed to access the fields.
+ field = null;
+ }
+ logger.log(Level.FINEST, "{0}.{1}: {2}",
+ new Object[] {clazz.getName(), fieldName, (field != null ? "available" : "unavailable")});
+ return field;
+ }
+
+ /**
+ * Returns the offset of the provided field, or {@code -1} if {@code sun.misc.Unsafe} is not
+ * available.
+ */
+ private static long fieldOffset(Field field) {
+ return field == null || UNSAFE == null ? -1 : UNSAFE.objectFieldOffset(field);
+ }
+
+ /**
+ * Get the base offset for byte arrays, or {@code -1} if {@code sun.misc.Unsafe} is not
+ * available.
+ */
+ private static <T> int byteArrayBaseOffset() {
+ return UNSAFE == null ? -1 : UNSAFE.arrayBaseOffset(byte[].class);
+ }
+
+ /**
+ * Gets the offset of the {@code address} field of the given direct {@link ByteBuffer}.
+ */
+ private static long addressOffset(ByteBuffer buffer) {
+ return UNSAFE.getLong(buffer, BUFFER_ADDRESS_OFFSET);
+ }
+
+ /**
+ * Gets the {@code sun.misc.Unsafe} instance, or {@code null} if not available on this
+ * platform.
+ */
+ private static sun.misc.Unsafe getUnsafe() {
+ sun.misc.Unsafe unsafe = null;
+ try {
+ unsafe = AccessController.doPrivileged(new PrivilegedExceptionAction<sun.misc.Unsafe>() {
+ @Override
+ public sun.misc.Unsafe run() throws Exception {
+ Class<sun.misc.Unsafe> k = sun.misc.Unsafe.class;
+
+ // Check that this platform supports all of the required unsafe methods.
+ checkRequiredMethods(k);
+
+ for (Field f : k.getDeclaredFields()) {
+ f.setAccessible(true);
+ Object x = f.get(null);
+ if (k.isInstance(x)) {
+ return k.cast(x);
+ }
+ }
+ // The sun.misc.Unsafe field does not exist.
+ return null;
+ }
+ });
+ } catch (Throwable e) {
+ // Catching Throwable here due to the fact that Google AppEngine raises NoClassDefFoundError
+ // for Unsafe.
+ }
+
+ logger.log(Level.FINEST, "sun.misc.Unsafe: {}",
+ unsafe != null ? "available" : "unavailable");
+ return unsafe;
+ }
+
+ /**
+ * Verifies that all required methods of {@code sun.misc.Unsafe} are available on this platform.
+ */
+ private static void checkRequiredMethods(Class<sun.misc.Unsafe> clazz)
+ throws NoSuchMethodException, SecurityException {
+ // Needed for Unsafe byte[] access
+ clazz.getMethod("arrayBaseOffset", Class.class);
+ clazz.getMethod("getByte", Object.class, long.class);
+ clazz.getMethod("putByte", Object.class, long.class, byte.class);
+ clazz.getMethod("getLong", Object.class, long.class);
+
+ // Needed for Unsafe Direct ByteBuffer access
+ clazz.getMethod("objectFieldOffset", Field.class);
+ clazz.getMethod("getByte", long.class);
+ clazz.getMethod("getLong", Object.class, long.class);
+ clazz.getMethod("putByte", long.class, byte.class);
+ clazz.getMethod("getLong", long.class);
+ }
+ }
+
+ private Utf8() {}
}