1 files changed, 375 insertions, 40 deletions
diff --git a/javanano/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java b/javanano/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java
index 37982b57..322ada8e 100644
--- a/javanano/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java
+++ b/javanano/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java
@@ -31,7 +31,10 @@
 package com.google.protobuf.nano;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.ReadOnlyBufferException;
 
 /**
  * Encodes and writes protocol message fields.
@@ -48,15 +51,18 @@ import java.io.UnsupportedEncodingException;
  * @author kneton@google.com Kenton Varda
  */
 public final class CodedOutputByteBufferNano {
-  private final byte[] buffer;
-  private final int limit;
-  private int position;
+  /* max bytes per java UTF-16 char in UTF-8 */
+  private static final int MAX_UTF8_EXPANSION = 3;
+  private final ByteBuffer buffer;
 
   private CodedOutputByteBufferNano(final byte[] buffer, final int offset,
                             final int length) {
+    this(ByteBuffer.wrap(buffer, offset, length));
+  }
+
+  private CodedOutputByteBufferNano(final ByteBuffer buffer) {
     this.buffer = buffer;
-    position = offset;
-    limit = offset + length;
+    this.buffer.order(ByteOrder.LITTLE_ENDIAN);
   }
 
   /**
@@ -288,14 +294,213 @@ public final class CodedOutputByteBufferNano {
 
   /** Write a {@code string} field to the stream. */
   public void writeStringNoTag(final String value) throws IOException {
-    // Unfortunately there does not appear to be any way to tell Java to encode
-    // UTF-8 directly into our buffer, so we have to let it create its own byte
-    // array and then copy.
-    final byte[] bytes = value.getBytes("UTF-8");
-    writeRawVarint32(bytes.length);
-    writeRawBytes(bytes);
+    // UTF-8 byte length of the string is at least its UTF-16 code unit length (value.length()),
+    // and at most 3 times of it. Optimize for the case where we know this length results in a
+    // constant varint length - saves measuring length of the string.
+    try {
+      final int minLengthVarIntSize = computeRawVarint32Size(value.length());
+      final int maxLengthVarIntSize = computeRawVarint32Size(value.length() * MAX_UTF8_EXPANSION);
+      if (minLengthVarIntSize == maxLengthVarIntSize) {
+        int oldPosition = buffer.position();
+        // Buffer.position, when passed a position that is past its limit, throws
+        // IllegalArgumentException, and this class is documented to throw
+        // OutOfSpaceException instead.
+        if (buffer.remaining() < minLengthVarIntSize) {
+          throw new OutOfSpaceException(oldPosition + minLengthVarIntSize, buffer.limit());
+        }
+        buffer.position(oldPosition + minLengthVarIntSize);
+        encode(value, buffer);
+        int newPosition = buffer.position();
+        buffer.position(oldPosition);
+        writeRawVarint32(newPosition - oldPosition - minLengthVarIntSize);
+        buffer.position(newPosition);
+      } else {
+        writeRawVarint32(encodedLength(value));
+        encode(value, buffer);
+      }
+    } catch (BufferOverflowException e) {
+      final OutOfSpaceException outOfSpaceException = new OutOfSpaceException(buffer.position(),
+          buffer.limit());
+      outOfSpaceException.initCause(e);
+      throw outOfSpaceException;
+    }
+  }
+
+  // These UTF-8 handling methods are copied from Guava's Utf8 class.
+  /**
+   * Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string,
+   * this method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in
+   * both time and space.
+   *
+   * @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired
+   *     surrogates)
+   */
+  private static int encodedLength(CharSequence sequence) {
+    // Warning to maintainers: this implementation is highly optimized.
+    int utf16Length = sequence.length();
+    int utf8Length = utf16Length;
+    int i = 0;
+
+    // This loop optimizes for pure ASCII.
+    while (i < utf16Length && sequence.charAt(i) < 0x80) {
+      i++;
+    }
+
+    // This loop optimizes for chars less than 0x800.
+    for (; i < utf16Length; i++) {
+      char c = sequence.charAt(i);
+      if (c < 0x800) {
+        utf8Length += ((0x7f - c) >>> 31);  // branch free!
+      } else {
+        utf8Length += encodedLengthGeneral(sequence, i);
+        break;
+      }
+    }
+
+    if (utf8Length < utf16Length) {
+      // Necessary and sufficient condition for overflow because of maximum 3x expansion
+      throw new IllegalArgumentException("UTF-8 length does not fit in int: "
+              + (utf8Length + (1L << 32)));
+    }
+    return utf8Length;
+  }
+
+  private static int encodedLengthGeneral(CharSequence sequence, int start) {
+    int utf16Length = sequence.length();
+    int utf8Length = 0;
+    for (int i = start; i < utf16Length; i++) {
+      char c = sequence.charAt(i);
+      if (c < 0x800) {
+        utf8Length += (0x7f - c) >>> 31; // branch free!
+      } else {
+        utf8Length += 2;
+        // jdk7+: if (Character.isSurrogate(c)) {
+        if (Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) {
+          // Check that we have a well-formed surrogate pair.
+          int cp = Character.codePointAt(sequence, i);
+          if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+            throw new IllegalArgumentException("Unpaired surrogate at index " + i);
+          }
+          i++;
+        }
+      }
+    }
+    return utf8Length;
+  }
+
+  /**
+   * Encodes {@code sequence} into UTF-8, in {@code byteBuffer}. For a string, this method is
+   * equivalent to {@code buffer.put(string.getBytes(UTF_8))}, but is more efficient in both time
+   * and space. Bytes are written starting at the current position. This method requires paired
+   * surrogates, and therefore does not support chunking.
+   *
+   * <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to
+   * compute the exact amount needed, or leave room for {@code 3 * sequence.length()}, which is the
+   * largest possible number of bytes that any input can be encoded to.
+   *
+   * @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired
+   *     surrogates)
+   * @throws BufferOverflowException if {@code sequence} encoded in UTF-8 does not fit in
+   *     {@code byteBuffer}'s remaining space.
+   * @throws ReadOnlyBufferException if {@code byteBuffer} is a read-only buffer.
+   */
+  private static void encode(CharSequence sequence, ByteBuffer byteBuffer) {
+    if (byteBuffer.isReadOnly()) {
+      throw new ReadOnlyBufferException();
+    } else if (byteBuffer.hasArray()) {
+      try {
+        int encoded = encode(sequence,
+                byteBuffer.array(),
+                byteBuffer.arrayOffset() + byteBuffer.position(),
+                byteBuffer.remaining());
+        byteBuffer.position(encoded - byteBuffer.arrayOffset());
+      } catch (ArrayIndexOutOfBoundsException e) {
+        BufferOverflowException boe = new BufferOverflowException();
+        boe.initCause(e);
+        throw boe;
+      }
+    } else {
+      encodeDirect(sequence, byteBuffer);
+    }
+  }
+
+  private static void encodeDirect(CharSequence sequence, ByteBuffer byteBuffer) {
+    int utf16Length = sequence.length();
+    for (int i = 0; i < utf16Length; i++) {
+      final char c = sequence.charAt(i);
+      if (c < 0x80) { // ASCII
+        byteBuffer.put((byte) c);
+      } else if (c < 0x800) { // 11 bits, two UTF-8 bytes
+        byteBuffer.put((byte) ((0xF << 6) | (c >>> 6)));
+        byteBuffer.put((byte) (0x80 | (0x3F & c)));
+      } else if (c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) {
+        // Maximium single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
+        byteBuffer.put((byte) ((0xF << 5) | (c >>> 12)));
+        byteBuffer.put((byte) (0x80 | (0x3F & (c >>> 6))));
+        byteBuffer.put((byte) (0x80 | (0x3F & c)));
+      } else {
+        final char low;
+        if (i + 1 == sequence.length()
+                || !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) {
+          throw new IllegalArgumentException("Unpaired surrogate at index " + (i - 1));
+        }
+        int codePoint = Character.toCodePoint(c, low);
+        byteBuffer.put((byte) ((0xF << 4) | (codePoint >>> 18)));
+        byteBuffer.put((byte) (0x80 | (0x3F & (codePoint >>> 12))));
+        byteBuffer.put((byte) (0x80 | (0x3F & (codePoint >>> 6))));
+        byteBuffer.put((byte) (0x80 | (0x3F & codePoint)));
+      }
+    }
   }
 
+  private static int encode(CharSequence sequence, byte[] bytes, int offset, int length) {
+    int utf16Length = sequence.length();
+    int j = offset;
+    int i = 0;
+    int limit = offset + length;
+    // Designed to take advantage of
+    // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
+    for (char c; i < utf16Length && i + j < limit && (c = sequence.charAt(i)) < 0x80; i++) {
+      bytes[j + i] = (byte) c;
+    }
+    if (i == utf16Length) {
+      return j + utf16Length;
+    }
+    j += i;
+    for (char c; i < utf16Length; i++) {
+      c = sequence.charAt(i);
+      if (c < 0x80 && j < limit) {
+        bytes[j++] = (byte) c;
+      } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes
+        bytes[j++] = (byte) ((0xF << 6) | (c >>> 6));
+        bytes[j++] = (byte) (0x80 | (0x3F & c));
+      } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) {
+        // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
+        bytes[j++] = (byte) ((0xF << 5) | (c >>> 12));
+        bytes[j++] = (byte) (0x80 | (0x3F & (c >>> 6)));
+        bytes[j++] = (byte) (0x80 | (0x3F & c));
+      } else if (j <= limit - 4) {
+        // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 bytes
+        final char low;
+        if (i + 1 == sequence.length()
+                || !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) {
+          throw new IllegalArgumentException("Unpaired surrogate at index " + (i - 1));
+        }
+        int codePoint = Character.toCodePoint(c, low);
+        bytes[j++] = (byte) ((0xF << 4) | (codePoint >>> 18));
+        bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12)));
+        bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6)));
+        bytes[j++] = (byte) (0x80 | (0x3F & codePoint));
+      } else {
+        throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j);
+      }
+    }
+    return j;
+  }
+
+  // End guava UTF-8 methods
+
+
   /** Write a {@code group} field to the stream. */
   public void writeGroupNoTag(final MessageNano value) throws IOException {
     value.writeTo(this);
@@ -603,13 +808,8 @@ public final class CodedOutputByteBufferNano {
    * {@code string} field.
    */
   public static int computeStringSizeNoTag(final String value) {
-    try {
-      final byte[] bytes = value.getBytes("UTF-8");
-      return computeRawVarint32Size(bytes.length) +
-             bytes.length;
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported.");
-    }
+    final int length = encodedLength(value);
+    return computeRawVarint32Size(length) + length;
   }
 
   /**
@@ -692,7 +892,7 @@ public final class CodedOutputByteBufferNano {
    * Otherwise, throws {@code UnsupportedOperationException}.
    */
   public int spaceLeft() {
-    return limit - position;
+    return buffer.remaining();
   }
 
   /**
@@ -710,6 +910,23 @@ public final class CodedOutputByteBufferNano {
   }
 
   /**
+   * Returns the position within the internal buffer.
+   */
+  public int position() {
+    return buffer.position();
+  }
+
+  /**
+   * Resets the position within the internal buffer to zero.
+   *
+   * @see #position
+   * @see #spaceLeft
+   */
+  public void reset() {
+    buffer.clear();
+  }
+
+  /**
    * If you create a CodedOutputStream around a simple flat array, you must
    * not attempt to write more bytes than the array has space.  Otherwise,
    * this exception will be thrown.
@@ -725,12 +942,12 @@ public final class CodedOutputByteBufferNano {
 
   /** Write a single byte. */
   public void writeRawByte(final byte value) throws IOException {
-    if (position == limit) {
+    if (!buffer.hasRemaining()) {
       // We're writing to a single buffer.
-      throw new OutOfSpaceException(position, limit);
+      throw new OutOfSpaceException(buffer.position(), buffer.limit());
     }
 
-    buffer[position++] = value;
+    buffer.put(value);
   }
 
   /** Write a single byte, represented by an integer value. */
@@ -746,13 +963,11 @@ public final class CodedOutputByteBufferNano {
   /** Write part of an array of bytes. */
   public void writeRawBytes(final byte[] value, int offset, int length)
                             throws IOException {
-    if (limit - position >= length) {
-      // We have room in the current buffer.
-      System.arraycopy(value, offset, buffer, position, length);
-      position += length;
+    if (buffer.remaining() >= length) {
+      buffer.put(value, offset, length);
     } else {
       // We're writing to a single buffer.
-      throw new OutOfSpaceException(position, limit);
+      throw new OutOfSpaceException(buffer.position(), buffer.limit());
     }
   }
 
@@ -825,24 +1040,20 @@ public final class CodedOutputByteBufferNano {
 
   /** Write a little-endian 32-bit integer. */
   public void writeRawLittleEndian32(final int value) throws IOException {
-    writeRawByte((value      ) & 0xFF);
-    writeRawByte((value >>  8) & 0xFF);
-    writeRawByte((value >> 16) & 0xFF);
-    writeRawByte((value >> 24) & 0xFF);
+    if (buffer.remaining() < 4) {
+      throw new OutOfSpaceException(buffer.position(), buffer.limit());
+    }
+    buffer.putInt(value);
   }
 
   public static final int LITTLE_ENDIAN_32_SIZE = 4;
 
   /** Write a little-endian 64-bit integer. */
   public void writeRawLittleEndian64(final long value) throws IOException {
-    writeRawByte((int)(value      ) & 0xFF);
-    writeRawByte((int)(value >>  8) & 0xFF);
-    writeRawByte((int)(value >> 16) & 0xFF);
-    writeRawByte((int)(value >> 24) & 0xFF);
-    writeRawByte((int)(value >> 32) & 0xFF);
-    writeRawByte((int)(value >> 40) & 0xFF);
-    writeRawByte((int)(value >> 48) & 0xFF);
-    writeRawByte((int)(value >> 56) & 0xFF);
+    if (buffer.remaining() < 8) {
+      throw new OutOfSpaceException(buffer.position(), buffer.limit());
+    }
+    buffer.putLong(value);
   }
 
   public static final int LITTLE_ENDIAN_64_SIZE = 8;
@@ -876,4 +1087,128 @@ public final class CodedOutputByteBufferNano {
     // Note:  the right-shift must be arithmetic
     return (n << 1) ^ (n >> 63);
   }
+
+  static int computeFieldSize(int number, int type, Object object) {
+    switch (type) {
+      case InternalNano.TYPE_BOOL:
+        return computeBoolSize(number, (Boolean) object);
+      case InternalNano.TYPE_BYTES:
+        return computeBytesSize(number, (byte[]) object);
+      case InternalNano.TYPE_STRING:
+        return computeStringSize(number, (String) object);
+      case InternalNano.TYPE_FLOAT:
+        return computeFloatSize(number, (Float) object);
+      case InternalNano.TYPE_DOUBLE:
+        return computeDoubleSize(number, (Double) object);
+      case InternalNano.TYPE_ENUM:
+        return computeEnumSize(number, (Integer) object);
+      case InternalNano.TYPE_FIXED32:
+        return computeFixed32Size(number, (Integer) object);
+      case InternalNano.TYPE_INT32:
+        return computeInt32Size(number, (Integer) object);
+      case InternalNano.TYPE_UINT32:
+        return computeUInt32Size(number, (Integer) object);
+      case InternalNano.TYPE_SINT32:
+        return computeSInt32Size(number, (Integer) object);
+      case InternalNano.TYPE_SFIXED32:
+        return computeSFixed32Size(number, (Integer) object);
+      case InternalNano.TYPE_INT64:
+        return computeInt64Size(number, (Long) object);
+      case InternalNano.TYPE_UINT64:
+        return computeUInt64Size(number, (Long) object);
+      case InternalNano.TYPE_SINT64:
+        return computeSInt64Size(number, (Long) object);
+      case InternalNano.TYPE_FIXED64:
+        return computeFixed64Size(number, (Long) object);
+      case InternalNano.TYPE_SFIXED64:
+        return computeSFixed64Size(number, (Long) object);
+      case InternalNano.TYPE_MESSAGE:
+        return computeMessageSize(number, (MessageNano) object);
+      case InternalNano.TYPE_GROUP:
+        return computeGroupSize(number, (MessageNano) object);
+      default:
+        throw new IllegalArgumentException("Unknown type: " + type);
+    }
+  }
+
+  void writeField(int number, int type, Object value)
+      throws IOException {
+    switch (type) {
+      case InternalNano.TYPE_DOUBLE:
+        Double doubleValue = (Double) value;
+        writeDouble(number, doubleValue);
+        break;
+      case InternalNano.TYPE_FLOAT:
+        Float floatValue = (Float) value;
+        writeFloat(number, floatValue);
+        break;
+      case InternalNano.TYPE_INT64:
+        Long int64Value = (Long) value;
+        writeInt64(number, int64Value);
+        break;
+      case InternalNano.TYPE_UINT64:
+        Long uint64Value = (Long) value;
+        writeUInt64(number, uint64Value);
+        break;
+      case InternalNano.TYPE_INT32:
+        Integer int32Value = (Integer) value;
+        writeInt32(number, int32Value);
+        break;
+      case InternalNano.TYPE_FIXED64:
+        Long fixed64Value = (Long) value;
+        writeFixed64(number, fixed64Value);
+        break;
+      case InternalNano.TYPE_FIXED32:
+        Integer fixed32Value = (Integer) value;
+        writeFixed32(number, fixed32Value);
+        break;
+      case InternalNano.TYPE_BOOL:
+        Boolean boolValue = (Boolean) value;
+        writeBool(number, boolValue);
+        break;
+      case InternalNano.TYPE_STRING:
+        String stringValue = (String) value;
+        writeString(number, stringValue);
+        break;
+      case InternalNano.TYPE_BYTES:
+        byte[] bytesValue = (byte[]) value;
+        writeBytes(number, bytesValue);
+        break;
+      case InternalNano.TYPE_UINT32:
+        Integer uint32Value = (Integer) value;
+        writeUInt32(number, uint32Value);
+        break;
+      case InternalNano.TYPE_ENUM:
+        Integer enumValue = (Integer) value;
+        writeEnum(number, enumValue);
+        break;
+      case InternalNano.TYPE_SFIXED32:
+        Integer sfixed32Value = (Integer) value;
+        writeSFixed32(number, sfixed32Value);
+        break;
+      case InternalNano.TYPE_SFIXED64:
+        Long sfixed64Value = (Long) value;
+        writeSFixed64(number, sfixed64Value);
+        break;
+      case InternalNano.TYPE_SINT32:
+        Integer sint32Value = (Integer) value;
+        writeSInt32(number, sint32Value);
+        break;
+      case InternalNano.TYPE_SINT64:
+        Long sint64Value = (Long) value;
+        writeSInt64(number, sint64Value);
+        break;
+      case InternalNano.TYPE_MESSAGE:
+        MessageNano messageValue = (MessageNano) value;
+        writeMessage(number, messageValue);
+        break;
+      case InternalNano.TYPE_GROUP:
+        MessageNano groupValue = (MessageNano) value;
+        writeGroup(number, groupValue);
+        break;
+      default:
+        throw new IOException("Unknown type: " + type);
+    }
+  }
+
 }