1 files changed, 606 insertions, 186 deletions
diff --git a/java/src/main/java/com/google/protobuf/TextFormat.java b/java/src/main/java/com/google/protobuf/TextFormat.java
index ed462899..97da09bb 100644
--- a/java/src/main/java/com/google/protobuf/TextFormat.java
+++ b/java/src/main/java/com/google/protobuf/TextFormat.java
@@ -31,17 +31,18 @@
 package com.google.protobuf;
 
 import com.google.protobuf.Descriptors.Descriptor;
-import com.google.protobuf.Descriptors.FieldDescriptor;
 import com.google.protobuf.Descriptors.EnumDescriptor;
 import com.google.protobuf.Descriptors.EnumValueDescriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
 
 import java.io.IOException;
-import java.nio.CharBuffer;
 import java.math.BigInteger;
+import java.nio.CharBuffer;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -55,6 +56,9 @@ import java.util.regex.Pattern;
 public final class TextFormat {
   private TextFormat() {}
 
+  private static final Logger logger =
+      Logger.getLogger(TextFormat.class.getName());
+
   private static final Printer DEFAULT_PRINTER = new Printer();
   private static final Printer SINGLE_LINE_PRINTER =
       (new Printer()).setSingleLineMode(true);
@@ -66,8 +70,9 @@ public final class TextFormat {
    * the parameter output. (This representation is the new version of the
    * classic "ProtocolPrinter" output from the original Protocol Buffer system)
    */
-  public static void print(final MessageOrBuilder message, final Appendable output)
-                           throws IOException {
+  public static void print(
+      final MessageOrBuilder message, final Appendable output)
+      throws IOException {
     DEFAULT_PRINTER.print(message, new TextGenerator(output));
   }
 
@@ -266,7 +271,8 @@ public final class TextFormat {
       return this;
     }
 
-    private void print(final MessageOrBuilder message, final TextGenerator generator)
+    private void print(
+        final MessageOrBuilder message, final TextGenerator generator)
         throws IOException {
       for (Map.Entry<FieldDescriptor, Object> field
           : message.getAllFields().entrySet()) {
@@ -385,13 +391,17 @@ public final class TextFormat {
           generator.print("\"");
           generator.print(escapeNonAscii ?
               escapeText((String) value) :
-              (String) value);
+              escapeDoubleQuotesAndBackslashes((String) value));
           generator.print("\"");
           break;
 
         case BYTES:
           generator.print("\"");
-          generator.print(escapeBytes((ByteString) value));
+          if (value instanceof ByteString) {
+            generator.print(escapeBytes((ByteString) value));
+          } else {
+            generator.print(escapeBytes((byte[]) value));
+          }
           generator.print("\"");
           break;
 
@@ -455,16 +465,16 @@ public final class TextFormat {
   }
 
   /** Convert an unsigned 32-bit integer to a string. */
-  private static String unsignedToString(final int value) {
+  public static String unsignedToString(final int value) {
     if (value >= 0) {
       return Integer.toString(value);
     } else {
-      return Long.toString(((long) value) & 0x00000000FFFFFFFFL);
+      return Long.toString(value & 0x00000000FFFFFFFFL);
     }
   }
 
   /** Convert an unsigned 64-bit integer to a string. */
-  private static String unsignedToString(final long value) {
+  public static String unsignedToString(final long value) {
     if (value >= 0) {
       return Long.toString(value);
     } else {
@@ -518,17 +528,16 @@ public final class TextFormat {
 
       for (int i = 0; i < size; i++) {
         if (text.charAt(i) == '\n') {
-          write(text.subSequence(pos, size), i - pos + 1);
+          write(text.subSequence(pos, i + 1));
           pos = i + 1;
           atStartOfLine = true;
         }
       }
-      write(text.subSequence(pos, size), size - pos);
+      write(text.subSequence(pos, size));
     }
 
-    private void write(final CharSequence data, final int size)
-                       throws IOException {
-      if (size == 0) {
+    private void write(final CharSequence data) throws IOException {
+      if (data.length() == 0) {
         return;
       }
       if (atStartOfLine) {
@@ -705,6 +714,14 @@ public final class TextFormat {
     }
 
     /**
+     * Returns {@code true} if the current token's text is equal to that
+     * specified.
+     */
+    public boolean lookingAt(String text) {
+      return currentToken.equals(text);
+    }
+
+    /**
      * If the next token is an identifier, consume it and return its value.
      * Otherwise, throw a {@link ParseException}.
      */
@@ -717,7 +734,8 @@ public final class TextFormat {
             (c == '_') || (c == '.')) {
           // OK
         } else {
-          throw parseException("Expected identifier.");
+          throw parseException(
+              "Expected identifier. Found '" + currentToken + "'");
         }
       }
 
@@ -727,6 +745,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is an identifier, consume it and return {@code true}.
+     * Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeIdentifier() {
+      try {
+        consumeIdentifier();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a 32-bit signed integer, consume it and return its
      * value.  Otherwise, throw a {@link ParseException}.
      */
@@ -769,6 +800,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a 64-bit signed integer, consume it and return
+     * {@code true}.  Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeInt64() {
+      try {
+        consumeInt64();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a 64-bit unsigned integer, consume it and return its
      * value.  Otherwise, throw a {@link ParseException}.
      */
@@ -783,6 +827,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a 64-bit unsigned integer, consume it and return
+     * {@code true}.  Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeUInt64() {
+      try {
+        consumeUInt64();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a double, consume it and return its value.
      * Otherwise, throw a {@link ParseException}.
      */
@@ -808,6 +865,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a double, consume it and return {@code true}.
+     * Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeDouble() {
+      try {
+        consumeDouble();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a float, consume it and return its value.
      * Otherwise, throw a {@link ParseException}.
      */
@@ -833,6 +903,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a float, consume it and return {@code true}.
+     * Otherwise, return {@code false} without doing anything.
+     */
+    public boolean tryConsumeFloat() {
+      try {
+        consumeFloat();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a boolean, consume it and return its value.
      * Otherwise, throw a {@link ParseException}.
      */
@@ -861,6 +944,19 @@ public final class TextFormat {
     }
 
     /**
+     * If the next token is a string, consume it and return true.  Otherwise,
+     * return false.
+     */
+    public boolean tryConsumeString() {
+      try {
+        consumeString();
+        return true;
+      } catch (ParseException e) {
+        return false;
+      }
+    }
+
+    /**
      * If the next token is a string, consume it, unescape it as a
      * {@link ByteString}, and return it.  Otherwise, throw a
      * {@link ParseException}.
@@ -880,7 +976,8 @@ public final class TextFormat {
      * multiple adjacent tokens which are automatically concatenated, like in
      * C or Python.
      */
-    private void consumeByteString(List<ByteString> list) throws ParseException {
+    private void consumeByteString(List<ByteString> list)
+        throws ParseException {
       final char quote = currentToken.length() > 0 ? currentToken.charAt(0)
                                                    : '\0';
       if (quote != '\"' && quote != '\'') {
@@ -988,6 +1085,16 @@ public final class TextFormat {
     }
   }
 
+  private static final Parser PARSER = Parser.newBuilder().build();
+
+  /**
+   * Return a {@link Parser} instance which can parse text-format
+   * messages. The returned instance is thread-safe.
+   */
+  public static Parser getParser() {
+    return PARSER;
+  }
+
   /**
    * Parse a text-format message from {@code input} and merge the contents
    * into {@code builder}.
@@ -995,7 +1102,7 @@ public final class TextFormat {
   public static void merge(final Readable input,
                            final Message.Builder builder)
                            throws IOException {
-    merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+    PARSER.merge(input, builder);
   }
 
   /**
@@ -1005,7 +1112,7 @@ public final class TextFormat {
   public static void merge(final CharSequence input,
                            final Message.Builder builder)
                            throws ParseException {
-    merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+    PARSER.merge(input, builder);
   }
 
   /**
@@ -1017,35 +1124,9 @@ public final class TextFormat {
                            final ExtensionRegistry extensionRegistry,
                            final Message.Builder builder)
                            throws IOException {
-    // Read the entire input to a String then parse that.
-
-    // If StreamTokenizer were not quite so crippled, or if there were a kind
-    // of Reader that could read in chunks that match some particular regex,
-    // or if we wanted to write a custom Reader to tokenize our stream, then
-    // we would not have to read to one big String.  Alas, none of these is
-    // the case.  Oh well.
-
-    merge(toStringBuilder(input), extensionRegistry, builder);
+    PARSER.merge(input, extensionRegistry, builder);
   }
 
-  private static final int BUFFER_SIZE = 4096;
-
-  // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
-  // overhead is worthwhile
-  private static StringBuilder toStringBuilder(final Readable input)
-      throws IOException {
-    final StringBuilder text = new StringBuilder();
-    final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
-    while (true) {
-      final int n = input.read(buffer);
-      if (n == -1) {
-        break;
-      }
-      buffer.flip();
-      text.append(buffer, 0, n);
-    }
-    return text;
-  }
 
   /**
    * Parse a text-format message from {@code input} and merge the contents
@@ -1056,187 +1137,481 @@ public final class TextFormat {
                            final ExtensionRegistry extensionRegistry,
                            final Message.Builder builder)
                            throws ParseException {
-    final Tokenizer tokenizer = new Tokenizer(input);
-
-    while (!tokenizer.atEnd()) {
-      mergeField(tokenizer, extensionRegistry, builder);
-    }
+    PARSER.merge(input, extensionRegistry, builder);
   }
 
+
   /**
-   * Parse a single field from {@code tokenizer} and merge it into
-   * {@code builder}.
+   * Parser for text-format proto2 instances. This class is thread-safe.
+   * The implementation largely follows google/protobuf/text_format.cc.
+   *
+   * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or
+   * {@link Builder} to control the parser behavior.
    */
-  private static void mergeField(final Tokenizer tokenizer,
-                                 final ExtensionRegistry extensionRegistry,
-                                 final Message.Builder builder)
-                                 throws ParseException {
-    FieldDescriptor field;
-    final Descriptor type = builder.getDescriptorForType();
-    ExtensionRegistry.ExtensionInfo extension = null;
+  public static class Parser {
+    /**
+     * Determines if repeated values for non-repeated fields and
+     * oneofs are permitted. For example, given required/optional field "foo"
+     * and a oneof containing "baz" and "qux":
+     * <li>
+     * <ul>"foo: 1 foo: 2"
+     * <ul>"baz: 1 qux: 2"
+     * <ul>merging "foo: 2" into a proto in which foo is already set, or
+     * <ul>merging "qux: 2" into a proto in which baz is already set.
+     * </li>
+     */
+    public enum SingularOverwritePolicy {
+      /** The last value is retained. */
+      ALLOW_SINGULAR_OVERWRITES,
+      /** An error is issued. */
+      FORBID_SINGULAR_OVERWRITES
+    }
+
+    private final boolean allowUnknownFields;
+    private final SingularOverwritePolicy singularOverwritePolicy;
+
+    private Parser(boolean allowUnknownFields,
+        SingularOverwritePolicy singularOverwritePolicy) {
+      this.allowUnknownFields = allowUnknownFields;
+      this.singularOverwritePolicy = singularOverwritePolicy;
+    }
 
-    if (tokenizer.tryConsume("[")) {
-      // An extension.
-      final StringBuilder name =
-          new StringBuilder(tokenizer.consumeIdentifier());
-      while (tokenizer.tryConsume(".")) {
-        name.append('.');
-        name.append(tokenizer.consumeIdentifier());
+    /**
+     * Returns a new instance of {@link Builder}.
+     */
+    public static Builder newBuilder() {
+      return new Builder();
+    }
+
+    /**
+     * Builder that can be used to obtain new instances of {@link Parser}.
+     */
+    public static class Builder {
+      private boolean allowUnknownFields = false;
+      private SingularOverwritePolicy singularOverwritePolicy =
+          SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
+
+      /**
+       * Set whether this parser will allow unknown fields. By default, an
+       * exception is thrown if an unknown field is encountered. If this is
+       * set, the parser will only log a warning.
+       *
+       * <p>Use of this parameter is discouraged. See:
+       * https://sites.google.com/a/google.com/protocol-buffers/migration/
+       *     proto2-faq#How_do_I_ignore_unknown_fields_w
+       * for more details.
+       */
+      public Builder setAllowUnknownFields(boolean allowUnknownFields) {
+        this.allowUnknownFields = allowUnknownFields;
+        return this;
       }
 
-      extension = extensionRegistry.findExtensionByName(name.toString());
+      /**
+       * Sets parser behavior when a non-repeated field appears more than once.
+       */
+      public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) {
+        this.singularOverwritePolicy = p;
+        return this;
+      }
 
-      if (extension == null) {
-        throw tokenizer.parseExceptionPreviousToken(
-          "Extension \"" + name + "\" not found in the ExtensionRegistry.");
-      } else if (extension.descriptor.getContainingType() != type) {
-        throw tokenizer.parseExceptionPreviousToken(
-          "Extension \"" + name + "\" does not extend message type \"" +
-          type.getFullName() + "\".");
+      public Parser build() {
+        return new Parser(allowUnknownFields, singularOverwritePolicy);
       }
+    }
 
-      tokenizer.consume("]");
+    /**
+     * Parse a text-format message from {@code input} and merge the contents
+     * into {@code builder}.
+     */
+    public void merge(final Readable input,
+                      final Message.Builder builder)
+                      throws IOException {
+      merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+    }
 
-      field = extension.descriptor;
-    } else {
-      final String name = tokenizer.consumeIdentifier();
-      field = type.findFieldByName(name);
+    /**
+     * Parse a text-format message from {@code input} and merge the contents
+     * into {@code builder}.
+     */
+    public void merge(final CharSequence input,
+                      final Message.Builder builder)
+                      throws ParseException {
+      merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+    }
 
-      // Group names are expected to be capitalized as they appear in the
-      // .proto file, which actually matches their type names, not their field
-      // names.
-      if (field == null) {
-        // Explicitly specify US locale so that this code does not break when
-        // executing in Turkey.
-        final String lowerName = name.toLowerCase(Locale.US);
-        field = type.findFieldByName(lowerName);
-        // If the case-insensitive match worked but the field is NOT a group,
-        if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
-          field = null;
+    /**
+     * Parse a text-format message from {@code input} and merge the contents
+     * into {@code builder}.  Extensions will be recognized if they are
+     * registered in {@code extensionRegistry}.
+     */
+    public void merge(final Readable input,
+                      final ExtensionRegistry extensionRegistry,
+                      final Message.Builder builder)
+                      throws IOException {
+      // Read the entire input to a String then parse that.
+
+      // If StreamTokenizer were not quite so crippled, or if there were a kind
+      // of Reader that could read in chunks that match some particular regex,
+      // or if we wanted to write a custom Reader to tokenize our stream, then
+      // we would not have to read to one big String.  Alas, none of these is
+      // the case.  Oh well.
+
+      merge(toStringBuilder(input), extensionRegistry, builder);
+    }
+
+
+    private static final int BUFFER_SIZE = 4096;
+
+    // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
+    // overhead is worthwhile
+    private static StringBuilder toStringBuilder(final Readable input)
+        throws IOException {
+      final StringBuilder text = new StringBuilder();
+      final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
+      while (true) {
+        final int n = input.read(buffer);
+        if (n == -1) {
+          break;
         }
+        buffer.flip();
+        text.append(buffer, 0, n);
       }
-      // Again, special-case group names as described above.
-      if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
-          !field.getMessageType().getName().equals(name)) {
-        field = null;
-      }
+      return text;
+    }
 
-      if (field == null) {
-        throw tokenizer.parseExceptionPreviousToken(
-          "Message type \"" + type.getFullName() +
-          "\" has no field named \"" + name + "\".");
+    /**
+     * Parse a text-format message from {@code input} and merge the contents
+     * into {@code builder}.  Extensions will be recognized if they are
+     * registered in {@code extensionRegistry}.
+     */
+    public void merge(final CharSequence input,
+                      final ExtensionRegistry extensionRegistry,
+                      final Message.Builder builder)
+                      throws ParseException {
+      final Tokenizer tokenizer = new Tokenizer(input);
+      MessageReflection.BuilderAdapter target =
+          new MessageReflection.BuilderAdapter(builder);
+
+      while (!tokenizer.atEnd()) {
+        mergeField(tokenizer, extensionRegistry, target);
       }
     }
 
-    Object value = null;
 
-    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-      tokenizer.tryConsume(":");  // optional
+    /**
+     * Parse a single field from {@code tokenizer} and merge it into
+     * {@code builder}.
+     */
+    private void mergeField(final Tokenizer tokenizer,
+                            final ExtensionRegistry extensionRegistry,
+                            final MessageReflection.MergeTarget target)
+                            throws ParseException {
+      FieldDescriptor field = null;
+      final Descriptor type = target.getDescriptorForType();
+      ExtensionRegistry.ExtensionInfo extension = null;
+
+      if (tokenizer.tryConsume("[")) {
+        // An extension.
+        final StringBuilder name =
+            new StringBuilder(tokenizer.consumeIdentifier());
+        while (tokenizer.tryConsume(".")) {
+          name.append('.');
+          name.append(tokenizer.consumeIdentifier());
+        }
 
-      final String endToken;
-      if (tokenizer.tryConsume("<")) {
-        endToken = ">";
-      } else {
-        tokenizer.consume("{");
-        endToken = "}";
-      }
+        extension = target.findExtensionByName(
+            extensionRegistry, name.toString());
+
+        if (extension == null) {
+          if (!allowUnknownFields) {
+            throw tokenizer.parseExceptionPreviousToken(
+              "Extension \"" + name + "\" not found in the ExtensionRegistry.");
+          } else {
+            logger.warning(
+              "Extension \"" + name + "\" not found in the ExtensionRegistry.");
+          }
+        } else {
+          if (extension.descriptor.getContainingType() != type) {
+            throw tokenizer.parseExceptionPreviousToken(
+              "Extension \"" + name + "\" does not extend message type \"" +
+              type.getFullName() + "\".");
+          }
+          field = extension.descriptor;
+        }
 
-      final Message.Builder subBuilder;
-      if (extension == null) {
-        subBuilder = builder.newBuilderForField(field);
+        tokenizer.consume("]");
       } else {
-        subBuilder = extension.defaultInstance.newBuilderForType();
-      }
+        final String name = tokenizer.consumeIdentifier();
+        field = type.findFieldByName(name);
+
+        // Group names are expected to be capitalized as they appear in the
+        // .proto file, which actually matches their type names, not their field
+        // names.
+        if (field == null) {
+          // Explicitly specify US locale so that this code does not break when
+          // executing in Turkey.
+          final String lowerName = name.toLowerCase(Locale.US);
+          field = type.findFieldByName(lowerName);
+          // If the case-insensitive match worked but the field is NOT a group,
+          if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
+            field = null;
+          }
+        }
+        // Again, special-case group names as described above.
+        if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
+            !field.getMessageType().getName().equals(name)) {
+          field = null;
+        }
 
-      while (!tokenizer.tryConsume(endToken)) {
-        if (tokenizer.atEnd()) {
-          throw tokenizer.parseException(
-            "Expected \"" + endToken + "\".");
+        if (field == null) {
+          if (!allowUnknownFields) {
+            throw tokenizer.parseExceptionPreviousToken(
+              "Message type \"" + type.getFullName() +
+              "\" has no field named \"" + name + "\".");
+          } else {
+            logger.warning(
+              "Message type \"" + type.getFullName() +
+              "\" has no field named \"" + name + "\".");
+          }
         }
-        mergeField(tokenizer, extensionRegistry, subBuilder);
       }
 
-      value = subBuilder.buildPartial();
+      // Skips unknown fields.
+      if (field == null) {
+        // Try to guess the type of this field.
+        // If this field is not a message, there should be a ":" between the
+        // field name and the field value and also the field value should not
+        // start with "{" or "<" which indicates the begining of a message body.
+        // If there is no ":" or there is a "{" or "<" after ":", this field has
+        // to be a message or the input is ill-formed.
+        if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") &&
+            !tokenizer.lookingAt("<")) {
+          skipFieldValue(tokenizer);
+        } else {
+          skipFieldMessage(tokenizer);
+        }
+        return;
+      }
 
-    } else {
-      tokenizer.consume(":");
+      // Handle potential ':'.
+      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+        tokenizer.tryConsume(":");  // optional
+      } else {
+        tokenizer.consume(":");  // required
+      }
+      // Support specifying repeated field values as a comma-separated list.
+      // Ex."foo: [1, 2, 3]"
+      if (field.isRepeated() && tokenizer.tryConsume("[")) {
+        while (true) {
+          consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
+          if (tokenizer.tryConsume("]")) {
+            // End of list.
+            break;
+          }
+          tokenizer.consume(",");
+        }
+      } else {
+        consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
+      }
+    }
 
-      switch (field.getType()) {
-        case INT32:
-        case SINT32:
-        case SFIXED32:
-          value = tokenizer.consumeInt32();
-          break;
+    /**
+     * Parse a single field value from {@code tokenizer} and merge it into
+     * {@code builder}.
+     */
+    private void consumeFieldValue(
+        final Tokenizer tokenizer,
+        final ExtensionRegistry extensionRegistry,
+        final MessageReflection.MergeTarget target,
+        final FieldDescriptor field,
+        final ExtensionRegistry.ExtensionInfo extension)
+        throws ParseException {
+      Object value = null;
 
-        case INT64:
-        case SINT64:
-        case SFIXED64:
-          value = tokenizer.consumeInt64();
-          break;
+      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+        final String endToken;
+        if (tokenizer.tryConsume("<")) {
+          endToken = ">";
+        } else {
+          tokenizer.consume("{");
+          endToken = "}";
+        }
 
-        case UINT32:
-        case FIXED32:
-          value = tokenizer.consumeUInt32();
-          break;
+        final MessageReflection.MergeTarget subField;
+        subField = target.newMergeTargetForField(field,
+            (extension == null) ? null : extension.defaultInstance);
 
-        case UINT64:
-        case FIXED64:
-          value = tokenizer.consumeUInt64();
-          break;
+        while (!tokenizer.tryConsume(endToken)) {
+          if (tokenizer.atEnd()) {
+            throw tokenizer.parseException(
+              "Expected \"" + endToken + "\".");
+          }
+          mergeField(tokenizer, extensionRegistry, subField);
+        }
 
-        case FLOAT:
-          value = tokenizer.consumeFloat();
-          break;
+        value = subField.finish();
 
-        case DOUBLE:
-          value = tokenizer.consumeDouble();
-          break;
+      } else {
+        switch (field.getType()) {
+          case INT32:
+          case SINT32:
+          case SFIXED32:
+            value = tokenizer.consumeInt32();
+            break;
+
+          case INT64:
+          case SINT64:
+          case SFIXED64:
+            value = tokenizer.consumeInt64();
+            break;
+
+          case UINT32:
+          case FIXED32:
+            value = tokenizer.consumeUInt32();
+            break;
+
+          case UINT64:
+          case FIXED64:
+            value = tokenizer.consumeUInt64();
+            break;
+
+          case FLOAT:
+            value = tokenizer.consumeFloat();
+            break;
+
+          case DOUBLE:
+            value = tokenizer.consumeDouble();
+            break;
+
+          case BOOL:
+            value = tokenizer.consumeBoolean();
+            break;
+
+          case STRING:
+            value = tokenizer.consumeString();
+            break;
+
+          case BYTES:
+            value = tokenizer.consumeByteString();
+            break;
+
+          case ENUM:
+            final EnumDescriptor enumType = field.getEnumType();
+
+            if (tokenizer.lookingAtInteger()) {
+              final int number = tokenizer.consumeInt32();
+              value = enumType.findValueByNumber(number);
+              if (value == null) {
+                throw tokenizer.parseExceptionPreviousToken(
+                  "Enum type \"" + enumType.getFullName() +
+                  "\" has no value with number " + number + '.');
+              }
+            } else {
+              final String id = tokenizer.consumeIdentifier();
+              value = enumType.findValueByName(id);
+              if (value == null) {
+                throw tokenizer.parseExceptionPreviousToken(
+                  "Enum type \"" + enumType.getFullName() +
+                  "\" has no value named \"" + id + "\".");
+              }
+            }
 
-        case BOOL:
-          value = tokenizer.consumeBoolean();
-          break;
+            break;
 
-        case STRING:
-          value = tokenizer.consumeString();
-          break;
+          case MESSAGE:
+          case GROUP:
+            throw new RuntimeException("Can't get here.");
+        }
+      }
 
-        case BYTES:
-          value = tokenizer.consumeByteString();
-          break;
+      if (field.isRepeated()) {
+        target.addRepeatedField(field, value);
+      } else if ((singularOverwritePolicy
+              == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
+          && target.hasField(field)) {
+        throw tokenizer.parseExceptionPreviousToken("Non-repeated field \""
+            + field.getFullName() + "\" cannot be overwritten.");
+      } else if ((singularOverwritePolicy
+              == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
+          && field.getContainingOneof() != null
+          && target.hasOneof(field.getContainingOneof())) {
+        Descriptors.OneofDescriptor oneof = field.getContainingOneof();
+        throw tokenizer.parseExceptionPreviousToken("Field \""
+            + field.getFullName() + "\" is specified along with field \""
+            + target.getOneofFieldDescriptor(oneof).getFullName()
+            + "\", another member of oneof \"" + oneof.getName() + "\".");
+      } else {
+        target.setField(field, value);
+      }
+    }
 
-        case ENUM:
-          final EnumDescriptor enumType = field.getEnumType();
-
-          if (tokenizer.lookingAtInteger()) {
-            final int number = tokenizer.consumeInt32();
-            value = enumType.findValueByNumber(number);
-            if (value == null) {
-              throw tokenizer.parseExceptionPreviousToken(
-                "Enum type \"" + enumType.getFullName() +
-                "\" has no value with number " + number + '.');
-            }
-          } else {
-            final String id = tokenizer.consumeIdentifier();
-            value = enumType.findValueByName(id);
-            if (value == null) {
-              throw tokenizer.parseExceptionPreviousToken(
-                "Enum type \"" + enumType.getFullName() +
-                "\" has no value named \"" + id + "\".");
-            }
-          }
+    /**
+     * Skips the next field including the field's name and value.
+     */
+    private void skipField(Tokenizer tokenizer) throws ParseException {
+      if (tokenizer.tryConsume("[")) {
+        // Extension name.
+        do {
+          tokenizer.consumeIdentifier();
+        } while (tokenizer.tryConsume("."));
+        tokenizer.consume("]");
+      } else {
+        tokenizer.consumeIdentifier();
+      }
 
-          break;
+      // Try to guess the type of this field.
+      // If this field is not a message, there should be a ":" between the
+      // field name and the field value and also the field value should not
+      // start with "{" or "<" which indicates the begining of a message body.
+      // If there is no ":" or there is a "{" or "<" after ":", this field has
+      // to be a message or the input is ill-formed.
+      if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") &&
+          !tokenizer.lookingAt("{")) {
+        skipFieldValue(tokenizer);
+      } else {
+        skipFieldMessage(tokenizer);
+      }
+      // For historical reasons, fields may optionally be separated by commas or
+      // semicolons.
+      if (!tokenizer.tryConsume(";")) {
+        tokenizer.tryConsume(",");
+      }
+    }
 
-        case MESSAGE:
-        case GROUP:
-          throw new RuntimeException("Can't get here.");
+    /**
+     * Skips the whole body of a message including the beginning delimeter and
+     * the ending delimeter.
+     */
+    private void skipFieldMessage(Tokenizer tokenizer) throws ParseException {
+      final String delimiter;
+      if (tokenizer.tryConsume("<")) {
+        delimiter = ">";
+      } else {
+        tokenizer.consume("{");
+        delimiter = "}";
       }
+      while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
+        skipField(tokenizer);
+      }
+      tokenizer.consume(delimiter);
     }
 
-    if (field.isRepeated()) {
-      builder.addRepeatedField(field, value);
-    } else {
-      builder.setField(field, value);
+    /**
+     * Skips a field value.
+     */
+    private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
+      if (tokenizer.tryConsumeString()) {
+        while (tokenizer.tryConsumeString()) {}
+        return;
+      }
+      if (!tokenizer.tryConsumeIdentifier() &&  // includes enum & boolean
+          !tokenizer.tryConsumeInt64() &&       // includes int32
+          !tokenizer.tryConsumeUInt64() &&      // includes uint32
+          !tokenizer.tryConsumeDouble() &&
+          !tokenizer.tryConsumeFloat()) {
+        throw tokenizer.parseException(
+            "Invalid field value: " + tokenizer.currentToken);
+      }
     }
   }
 
@@ -1246,6 +1621,11 @@ public final class TextFormat {
   // Some of these methods are package-private because Descriptors.java uses
   // them.
 
+  private interface ByteSequence {
+    int size();
+    byte byteAt(int offset);
+  }
+
   /**
    * Escapes bytes in the format used in protocol buffer text format, which
    * is the same as the format used for C string literals.  All bytes
@@ -1254,7 +1634,7 @@ public final class TextFormat {
    * which no defined short-hand escape sequence is defined will be escaped
    * using 3-digit octal sequences.
    */
-  static String escapeBytes(final ByteString input) {
+  private static String escapeBytes(final ByteSequence input) {
     final StringBuilder builder = new StringBuilder(input.size());
     for (int i = 0; i < input.size(); i++) {
       final byte b = input.byteAt(i);
@@ -1289,6 +1669,39 @@ public final class TextFormat {
   }
 
   /**
+   * Escapes bytes in the format used in protocol buffer text format, which
+   * is the same as the format used for C string literals.  All bytes
+   * that are not printable 7-bit ASCII characters are escaped, as well as
+   * backslash, single-quote, and double-quote characters.  Characters for
+   * which no defined short-hand escape sequence is defined will be escaped
+   * using 3-digit octal sequences.
+   */
+  static String escapeBytes(final ByteString input) {
+    return escapeBytes(new ByteSequence() {
+      public int size() {
+        return input.size();
+      }
+      public byte byteAt(int offset) {
+        return input.byteAt(offset);
+      }
+    });
+  }
+
+  /**
+   * Like {@link #escapeBytes(ByteString)}, but used for byte array.
+   */
+  static String escapeBytes(final byte[] input) {
+    return escapeBytes(new ByteSequence() {
+      public int size() {
+        return input.length;
+      }
+      public byte byteAt(int offset) {
+        return input[offset];
+      }
+    });
+  }
+
+  /**
    * Un-escape a byte sequence as escaped using
    * {@link #escapeBytes(ByteString)}.  Two-digit hex escapes (starting with
    * "\x") are also recognized.
@@ -1394,6 +1807,13 @@ public final class TextFormat {
   }
 
   /**
+   * Escape double quotes and backslashes in a String for unicode output of a message.
+   */
+  public static String escapeDoubleQuotesAndBackslashes(final String input) {
+    return input.replace("\\", "\\\\").replace("\"", "\\\"");
+  }
+
+  /**
    * Un-escape a text string as escaped using {@link #escapeText(String)}.
    * Two-digit hex escapes (starting with "\x") are also recognized.
    */