aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/protobuf/3.6.0/java/core/src/main/java/com/google/protobuf/TextFormat.java
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/protobuf/3.6.0/java/core/src/main/java/com/google/protobuf/TextFormat.java')
-rw-r--r--third_party/protobuf/3.6.0/java/core/src/main/java/com/google/protobuf/TextFormat.java2189
1 files changed, 2189 insertions, 0 deletions
diff --git a/third_party/protobuf/3.6.0/java/core/src/main/java/com/google/protobuf/TextFormat.java b/third_party/protobuf/3.6.0/java/core/src/main/java/com/google/protobuf/TextFormat.java
new file mode 100644
index 0000000000..25c3474f91
--- /dev/null
+++ b/third_party/protobuf/3.6.0/java/core/src/main/java/com/google/protobuf/TextFormat.java
@@ -0,0 +1,2189 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.Descriptors.Descriptor;
+import com.google.protobuf.Descriptors.EnumDescriptor;
+import com.google.protobuf.Descriptors.EnumValueDescriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.CharBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Provide text parsing and formatting support for proto2 instances.
+ * The implementation largely follows google/protobuf/text_format.cc.
+ *
+ * @author wenboz@google.com Wenbo Zhu
+ * @author kenton@google.com Kenton Varda
+ */
+public final class TextFormat {
+ private TextFormat() {}
+
+ private static final Logger logger = Logger.getLogger(TextFormat.class.getName());
+
+ /**
+ * Outputs a textual representation of the Protocol Message supplied into
+ * the parameter output. (This representation is the new version of the
+ * classic "ProtocolPrinter" output from the original Protocol Buffer system)
+ */
+ public static void print(
+ final MessageOrBuilder message, final Appendable output)
+ throws IOException {
+ Printer.DEFAULT.print(message, multiLineOutput(output));
+ }
+
+ /** Outputs a textual representation of {@code fields} to {@code output}. */
+ public static void print(final UnknownFieldSet fields,
+ final Appendable output)
+ throws IOException {
+ Printer.DEFAULT.printUnknownFields(fields, multiLineOutput(output));
+ }
+
+ /**
+ * Same as {@code print()}, except that non-ASCII characters are not
+ * escaped.
+ */
+ public static void printUnicode(
+ final MessageOrBuilder message, final Appendable output)
+ throws IOException {
+ Printer.UNICODE.print(message, multiLineOutput(output));
+ }
+
+ /**
+ * Same as {@code print()}, except that non-ASCII characters are not
+ * escaped.
+ */
+ public static void printUnicode(final UnknownFieldSet fields,
+ final Appendable output)
+ throws IOException {
+ Printer.UNICODE.printUnknownFields(fields, multiLineOutput(output));
+ }
+
+ /**
+ * Generates a human readable form of this message, useful for debugging and
+ * other purposes, with no newline characters.
+ */
+ public static String shortDebugString(final MessageOrBuilder message) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ Printer.DEFAULT.print(message, singleLineOutput(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Generates a human readable form of the field, useful for debugging
+ * and other purposes, with no newline characters.
+ */
+ public static String shortDebugString(final FieldDescriptor field,
+ final Object value) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ Printer.DEFAULT.printField(field, value, singleLineOutput(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Generates a human readable form of the unknown fields, useful for debugging
+ * and other purposes, with no newline characters.
+ */
+ public static String shortDebugString(final UnknownFieldSet fields) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ Printer.DEFAULT.printUnknownFields(fields, singleLineOutput(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Like {@code print()}, but writes directly to a {@code String} and
+ * returns it.
+ */
+ public static String printToString(final MessageOrBuilder message) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ print(message, text);
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Like {@code print()}, but writes directly to a {@code String} and
+ * returns it.
+ */
+ public static String printToString(final UnknownFieldSet fields) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ print(fields, text);
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Same as {@code printToString()}, except that non-ASCII characters
+ * in string type fields are not escaped in backslash+octals.
+ */
+ public static String printToUnicodeString(final MessageOrBuilder message) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ Printer.UNICODE.print(message, multiLineOutput(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Same as {@code printToString()}, except that non-ASCII characters
+ * in string type fields are not escaped in backslash+octals.
+ */
+ public static String printToUnicodeString(final UnknownFieldSet fields) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ Printer.UNICODE.printUnknownFields(fields, multiLineOutput(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ public static void printField(final FieldDescriptor field,
+ final Object value,
+ final Appendable output)
+ throws IOException {
+ Printer.DEFAULT.printField(field, value, multiLineOutput(output));
+ }
+
+ public static String printFieldToString(final FieldDescriptor field,
+ final Object value) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ printField(field, value, text);
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Outputs a unicode textual representation of the value of given field value.
+ *
+ * <p>Same as {@code printFieldValue()}, except that non-ASCII characters in string type fields
+ * are not escaped in backslash+octals.
+ *
+ * @param field the descriptor of the field
+ * @param value the value of the field
+ * @param output the output to which to append the formatted value
+ * @throws ClassCastException if the value is not appropriate for the given field descriptor
+ * @throws IOException if there is an exception writing to the output
+ */
+ public static void printUnicodeFieldValue(
+ final FieldDescriptor field, final Object value, final Appendable output) throws IOException {
+ Printer.UNICODE.printFieldValue(field, value, multiLineOutput(output));
+ }
+
+ /**
+ * Outputs a textual representation of the value of given field value.
+ *
+ * @param field the descriptor of the field
+ * @param value the value of the field
+ * @param output the output to which to append the formatted value
+ * @throws ClassCastException if the value is not appropriate for the
+ * given field descriptor
+ * @throws IOException if there is an exception writing to the output
+ */
+ public static void printFieldValue(final FieldDescriptor field,
+ final Object value,
+ final Appendable output)
+ throws IOException {
+ Printer.DEFAULT.printFieldValue(field, value, multiLineOutput(output));
+ }
+
+ /**
+ * Outputs a textual representation of the value of an unknown field.
+ *
+ * @param tag the field's tag number
+ * @param value the value of the field
+ * @param output the output to which to append the formatted value
+ * @throws ClassCastException if the value is not appropriate for the
+ * given field descriptor
+ * @throws IOException if there is an exception writing to the output
+ */
+ public static void printUnknownFieldValue(final int tag,
+ final Object value,
+ final Appendable output)
+ throws IOException {
+ printUnknownFieldValue(tag, value, multiLineOutput(output));
+ }
+
+ private static void printUnknownFieldValue(final int tag,
+ final Object value,
+ final TextGenerator generator)
+ throws IOException {
+ switch (WireFormat.getTagWireType(tag)) {
+ case WireFormat.WIRETYPE_VARINT:
+ generator.print(unsignedToString((Long) value));
+ break;
+ case WireFormat.WIRETYPE_FIXED32:
+ generator.print(
+ String.format((Locale) null, "0x%08x", (Integer) value));
+ break;
+ case WireFormat.WIRETYPE_FIXED64:
+ generator.print(String.format((Locale) null, "0x%016x", (Long) value));
+ break;
+ case WireFormat.WIRETYPE_LENGTH_DELIMITED:
+ try {
+ // Try to parse and print the field as an embedded message
+ UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value);
+ generator.print("{");
+ generator.eol();
+ generator.indent();
+ Printer.DEFAULT.printUnknownFields(message, generator);
+ generator.outdent();
+ generator.print("}");
+ } catch (InvalidProtocolBufferException e) {
+ // If not parseable as a message, print as a String
+ generator.print("\"");
+ generator.print(escapeBytes((ByteString) value));
+ generator.print("\"");
+ }
+ break;
+ case WireFormat.WIRETYPE_START_GROUP:
+ Printer.DEFAULT.printUnknownFields((UnknownFieldSet) value, generator);
+ break;
+ default:
+ throw new IllegalArgumentException("Bad tag: " + tag);
+ }
+ }
+
+ /** Helper class for converting protobufs to text. */
+ private static final class Printer {
+ // Printer instance which escapes non-ASCII characters.
+ static final Printer DEFAULT = new Printer(true);
+ // Printer instance which emits Unicode (it still escapes newlines and quotes in strings).
+ static final Printer UNICODE = new Printer(false);
+
+ /** Whether to escape non ASCII characters with backslash and octal. */
+ private final boolean escapeNonAscii;
+
+ private Printer(boolean escapeNonAscii) {
+ this.escapeNonAscii = escapeNonAscii;
+ }
+
+ private void print(
+ final MessageOrBuilder message, final TextGenerator generator)
+ throws IOException {
+ for (Map.Entry<FieldDescriptor, Object> field
+ : message.getAllFields().entrySet()) {
+ printField(field.getKey(), field.getValue(), generator);
+ }
+ printUnknownFields(message.getUnknownFields(), generator);
+ }
+
+ private void printField(final FieldDescriptor field, final Object value,
+ final TextGenerator generator) throws IOException {
+ if (field.isRepeated()) {
+ // Repeated field. Print each element.
+ for (Object element : (List<?>) value) {
+ printSingleField(field, element, generator);
+ }
+ } else {
+ printSingleField(field, value, generator);
+ }
+ }
+
+ private void printSingleField(final FieldDescriptor field,
+ final Object value,
+ final TextGenerator generator)
+ throws IOException {
+ if (field.isExtension()) {
+ generator.print("[");
+ // We special-case MessageSet elements for compatibility with proto1.
+ if (field.getContainingType().getOptions().getMessageSetWireFormat()
+ && (field.getType() == FieldDescriptor.Type.MESSAGE)
+ && (field.isOptional())
+ // object equality
+ && (field.getExtensionScope() == field.getMessageType())) {
+ generator.print(field.getMessageType().getFullName());
+ } else {
+ generator.print(field.getFullName());
+ }
+ generator.print("]");
+ } else {
+ if (field.getType() == FieldDescriptor.Type.GROUP) {
+ // Groups must be serialized with their original capitalization.
+ generator.print(field.getMessageType().getName());
+ } else {
+ generator.print(field.getName());
+ }
+ }
+
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ generator.print(" {");
+ generator.eol();
+ generator.indent();
+ } else {
+ generator.print(": ");
+ }
+
+ printFieldValue(field, value, generator);
+
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ generator.outdent();
+ generator.print("}");
+ }
+ generator.eol();
+ }
+
+ private void printFieldValue(final FieldDescriptor field,
+ final Object value,
+ final TextGenerator generator)
+ throws IOException {
+ switch (field.getType()) {
+ case INT32:
+ case SINT32:
+ case SFIXED32:
+ generator.print(((Integer) value).toString());
+ break;
+
+ case INT64:
+ case SINT64:
+ case SFIXED64:
+ generator.print(((Long) value).toString());
+ break;
+
+ case BOOL:
+ generator.print(((Boolean) value).toString());
+ break;
+
+ case FLOAT:
+ generator.print(((Float) value).toString());
+ break;
+
+ case DOUBLE:
+ generator.print(((Double) value).toString());
+ break;
+
+ case UINT32:
+ case FIXED32:
+ generator.print(unsignedToString((Integer) value));
+ break;
+
+ case UINT64:
+ case FIXED64:
+ generator.print(unsignedToString((Long) value));
+ break;
+
+ case STRING:
+ generator.print("\"");
+ generator.print(escapeNonAscii
+ ? TextFormatEscaper.escapeText((String) value)
+ : escapeDoubleQuotesAndBackslashes((String) value)
+ .replace("\n", "\\n"));
+ generator.print("\"");
+ break;
+
+ case BYTES:
+ generator.print("\"");
+ if (value instanceof ByteString) {
+ generator.print(escapeBytes((ByteString) value));
+ } else {
+ generator.print(escapeBytes((byte[]) value));
+ }
+ generator.print("\"");
+ break;
+
+ case ENUM:
+ generator.print(((EnumValueDescriptor) value).getName());
+ break;
+
+ case MESSAGE:
+ case GROUP:
+ print((Message) value, generator);
+ break;
+ }
+ }
+
+ private void printUnknownFields(final UnknownFieldSet unknownFields,
+ final TextGenerator generator)
+ throws IOException {
+ for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
+ unknownFields.asMap().entrySet()) {
+ final int number = entry.getKey();
+ final UnknownFieldSet.Field field = entry.getValue();
+ printUnknownField(number, WireFormat.WIRETYPE_VARINT,
+ field.getVarintList(), generator);
+ printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
+ field.getFixed32List(), generator);
+ printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
+ field.getFixed64List(), generator);
+ printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
+ field.getLengthDelimitedList(), generator);
+ for (final UnknownFieldSet value : field.getGroupList()) {
+ generator.print(entry.getKey().toString());
+ generator.print(" {");
+ generator.eol();
+ generator.indent();
+ printUnknownFields(value, generator);
+ generator.outdent();
+ generator.print("}");
+ generator.eol();
+ }
+ }
+ }
+
+ private void printUnknownField(final int number,
+ final int wireType,
+ final List<?> values,
+ final TextGenerator generator)
+ throws IOException {
+ for (final Object value : values) {
+ generator.print(String.valueOf(number));
+ generator.print(": ");
+ printUnknownFieldValue(wireType, value, generator);
+ generator.eol();
+ }
+ }
+ }
+
+ /** Convert an unsigned 32-bit integer to a string. */
+ public static String unsignedToString(final int value) {
+ if (value >= 0) {
+ return Integer.toString(value);
+ } else {
+ return Long.toString(value & 0x00000000FFFFFFFFL);
+ }
+ }
+
+ /** Convert an unsigned 64-bit integer to a string. */
+ public static String unsignedToString(final long value) {
+ if (value >= 0) {
+ return Long.toString(value);
+ } else {
+ // Pull off the most-significant bit so that BigInteger doesn't think
+ // the number is negative, then set it again using setBit().
+ return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL)
+ .setBit(63).toString();
+ }
+ }
+
+ private static TextGenerator multiLineOutput(Appendable output) {
+ return new TextGenerator(output, false);
+ }
+
+ private static TextGenerator singleLineOutput(Appendable output) {
+ return new TextGenerator(output, true);
+ }
+
+ /**
+ * An inner class for writing text to the output stream.
+ */
+ private static final class TextGenerator {
+ private final Appendable output;
+ private final StringBuilder indent = new StringBuilder();
+ private final boolean singleLineMode;
+ // While technically we are "at the start of a line" at the very beginning of the output, all
+ // we would do in response to this is emit the (zero length) indentation, so it has no effect.
+ // Setting it false here does however suppress an unwanted leading space in single-line mode.
+ private boolean atStartOfLine = false;
+
+ private TextGenerator(final Appendable output, boolean singleLineMode) {
+ this.output = output;
+ this.singleLineMode = singleLineMode;
+ }
+
+ /**
+ * Indent text by two spaces. After calling Indent(), two spaces will be
+ * inserted at the beginning of each line of text. Indent() may be called
+ * multiple times to produce deeper indents.
+ */
+ public void indent() {
+ indent.append(" ");
+ }
+
+ /**
+ * Reduces the current indent level by two spaces, or crashes if the indent
+ * level is zero.
+ */
+ public void outdent() {
+ final int length = indent.length();
+ if (length == 0) {
+ throw new IllegalArgumentException(
+ " Outdent() without matching Indent().");
+ }
+ indent.setLength(length - 2);
+ }
+
+ /**
+ * Print text to the output stream. Bare newlines are never expected to be passed to this
+ * method; to indicate the end of a line, call "eol()".
+ */
+ public void print(final CharSequence text) throws IOException {
+ if (atStartOfLine) {
+ atStartOfLine = false;
+ output.append(singleLineMode ? " " : indent);
+ }
+ output.append(text);
+ }
+
+ /**
+ * Signifies reaching the "end of the current line" in the output. In single-line mode, this
+ * does not result in a newline being emitted, but ensures that a separating space is written
+ * before the next output.
+ */
+ public void eol() throws IOException {
+ if (!singleLineMode) {
+ output.append("\n");
+ }
+ atStartOfLine = true;
+ }
+ }
+
+ // =================================================================
+ // Parsing
+
+ /**
+ * Represents a stream of tokens parsed from a {@code String}.
+ *
+ * <p>The Java standard library provides many classes that you might think
+ * would be useful for implementing this, but aren't. For example:
+ *
+ * <ul>
+ * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or,
+ * at least, something that would get us close to what we want -- except
+ * for one fatal flaw: It automatically un-escapes strings using Java
+ * escape sequences, which do not include all the escape sequences we
+ * need to support (e.g. '\x').
+ * <li>{@code java.util.Scanner}: This seems like a great way at least to
+ * parse regular expressions out of a stream (so we wouldn't have to load
+ * the entire input into a single string before parsing). Sadly,
+ * {@code Scanner} requires that tokens be delimited with some delimiter.
+ * Thus, although the text "foo:" should parse to two tokens ("foo" and
+ * ":"), {@code Scanner} would recognize it only as a single token.
+ * Furthermore, {@code Scanner} provides no way to inspect the contents
+ * of delimiters, making it impossible to keep track of line and column
+ * numbers.
+ * </ul>
+ *
+ * <p>Luckily, Java's regular expression support does manage to be useful to
+ * us. (Barely: We need {@code Matcher.usePattern()}, which is new in
+ * Java 1.5.) So, we can use that, at least. Unfortunately, this implies
+ * that we need to have the entire input in one contiguous string.
+ */
+ private static final class Tokenizer {
+ private final CharSequence text;
+ private final Matcher matcher;
+ private String currentToken;
+
+ // The character index within this.text at which the current token begins.
+ private int pos = 0;
+
+ // The line and column numbers of the current token.
+ private int line = 0;
+ private int column = 0;
+
+ // The line and column numbers of the previous token (allows throwing
+ // errors *after* consuming).
+ private int previousLine = 0;
+ private int previousColumn = 0;
+
+ // We use possessive quantifiers (*+ and ++) because otherwise the Java
+ // regex matcher has stack overflows on large inputs.
+ private static final Pattern WHITESPACE =
+ Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
+ private static final Pattern TOKEN = Pattern.compile(
+ "[a-zA-Z_][0-9a-zA-Z_+-]*+|" + // an identifier
+ "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number
+ "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" + // a double-quoted string
+ "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string
+ Pattern.MULTILINE);
+
+ private static final Pattern DOUBLE_INFINITY = Pattern.compile(
+ "-?inf(inity)?",
+ Pattern.CASE_INSENSITIVE);
+ private static final Pattern FLOAT_INFINITY = Pattern.compile(
+ "-?inf(inity)?f?",
+ Pattern.CASE_INSENSITIVE);
+ private static final Pattern FLOAT_NAN = Pattern.compile(
+ "nanf?",
+ Pattern.CASE_INSENSITIVE);
+
+ /** Construct a tokenizer that parses tokens from the given text. */
+ private Tokenizer(final CharSequence text) {
+ this.text = text;
+ this.matcher = WHITESPACE.matcher(text);
+ skipWhitespace();
+ nextToken();
+ }
+
+ int getPreviousLine() {
+ return previousLine;
+ }
+
+ int getPreviousColumn() {
+ return previousColumn;
+ }
+
+ int getLine() {
+ return line;
+ }
+
+ int getColumn() {
+ return column;
+ }
+
+ /** Are we at the end of the input? */
+ public boolean atEnd() {
+ return currentToken.length() == 0;
+ }
+
+ /** Advance to the next token. */
+ public void nextToken() {
+ previousLine = line;
+ previousColumn = column;
+
+ // Advance the line counter to the current position.
+ while (pos < matcher.regionStart()) {
+ if (text.charAt(pos) == '\n') {
+ ++line;
+ column = 0;
+ } else {
+ ++column;
+ }
+ ++pos;
+ }
+
+ // Match the next token.
+ if (matcher.regionStart() == matcher.regionEnd()) {
+ // EOF
+ currentToken = "";
+ } else {
+ matcher.usePattern(TOKEN);
+ if (matcher.lookingAt()) {
+ currentToken = matcher.group();
+ matcher.region(matcher.end(), matcher.regionEnd());
+ } else {
+ // Take one character.
+ currentToken = String.valueOf(text.charAt(pos));
+ matcher.region(pos + 1, matcher.regionEnd());
+ }
+
+ skipWhitespace();
+ }
+ }
+
+ /**
+ * Skip over any whitespace so that the matcher region starts at the next
+ * token.
+ */
+ private void skipWhitespace() {
+ matcher.usePattern(WHITESPACE);
+ if (matcher.lookingAt()) {
+ matcher.region(matcher.end(), matcher.regionEnd());
+ }
+ }
+
+ /**
+ * If the next token exactly matches {@code token}, consume it and return
+ * {@code true}. Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsume(final String token) {
+ if (currentToken.equals(token)) {
+ nextToken();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * If the next token exactly matches {@code token}, consume it. Otherwise,
+ * throw a {@link ParseException}.
+ */
+ public void consume(final String token) throws ParseException {
+ if (!tryConsume(token)) {
+ throw parseException("Expected \"" + token + "\".");
+ }
+ }
+
+ /**
+ * Returns {@code true} if the next token is an integer, but does
+ * not consume it.
+ */
+ public boolean lookingAtInteger() {
+ if (currentToken.length() == 0) {
+ return false;
+ }
+
+ final char c = currentToken.charAt(0);
+ return ('0' <= c && c <= '9')
+ || c == '-' || c == '+';
+ }
+
+ /**
+ * Returns {@code true} if the current token's text is equal to that
+ * specified.
+ */
+ public boolean lookingAt(String text) {
+ return currentToken.equals(text);
+ }
+
+ /**
+ * If the next token is an identifier, consume it and return its value.
+ * Otherwise, throw a {@link ParseException}.
+ */
+ public String consumeIdentifier() throws ParseException {
+ for (int i = 0; i < currentToken.length(); i++) {
+ final char c = currentToken.charAt(i);
+ if (('a' <= c && c <= 'z')
+ || ('A' <= c && c <= 'Z')
+ || ('0' <= c && c <= '9')
+ || (c == '_') || (c == '.')) {
+ // OK
+ } else {
+ throw parseException(
+ "Expected identifier. Found '" + currentToken + "'");
+ }
+ }
+
+ final String result = currentToken;
+ nextToken();
+ return result;
+ }
+
+ /**
+ * If the next token is an identifier, consume it and return {@code true}.
+ * Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeIdentifier() {
+ try {
+ consumeIdentifier();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
+ * If the next token is a 32-bit signed integer, consume it and return its
+ * value. Otherwise, throw a {@link ParseException}.
+ */
+ public int consumeInt32() throws ParseException {
+ try {
+ final int result = parseInt32(currentToken);
+ nextToken();
+ return result;
+ } catch (NumberFormatException e) {
+ throw integerParseException(e);
+ }
+ }
+
+ /**
+ * If the next token is a 32-bit unsigned integer, consume it and return its
+ * value. Otherwise, throw a {@link ParseException}.
+ */
+ public int consumeUInt32() throws ParseException {
+ try {
+ final int result = parseUInt32(currentToken);
+ nextToken();
+ return result;
+ } catch (NumberFormatException e) {
+ throw integerParseException(e);
+ }
+ }
+
+ /**
+ * If the next token is a 64-bit signed integer, consume it and return its
+ * value. Otherwise, throw a {@link ParseException}.
+ */
+ public long consumeInt64() throws ParseException {
+ try {
+ final long result = parseInt64(currentToken);
+ nextToken();
+ return result;
+ } catch (NumberFormatException e) {
+ throw integerParseException(e);
+ }
+ }
+
+ /**
+ * If the next token is a 64-bit signed integer, consume it and return
+ * {@code true}. Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeInt64() {
+ try {
+ consumeInt64();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
+ * If the next token is a 64-bit unsigned integer, consume it and return its
+ * value. Otherwise, throw a {@link ParseException}.
+ */
+ public long consumeUInt64() throws ParseException {
+ try {
+ final long result = parseUInt64(currentToken);
+ nextToken();
+ return result;
+ } catch (NumberFormatException e) {
+ throw integerParseException(e);
+ }
+ }
+
+ /**
+ * If the next token is a 64-bit unsigned integer, consume it and return
+ * {@code true}. Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeUInt64() {
+ try {
+ consumeUInt64();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
+ * If the next token is a double, consume it and return its value.
+ * Otherwise, throw a {@link ParseException}.
+ */
+ public double consumeDouble() throws ParseException {
+ // We need to parse infinity and nan separately because
+ // Double.parseDouble() does not accept "inf", "infinity", or "nan".
+ if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
+ final boolean negative = currentToken.startsWith("-");
+ nextToken();
+ return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
+ }
+ if (currentToken.equalsIgnoreCase("nan")) {
+ nextToken();
+ return Double.NaN;
+ }
+ try {
+ final double result = Double.parseDouble(currentToken);
+ nextToken();
+ return result;
+ } catch (NumberFormatException e) {
+ throw floatParseException(e);
+ }
+ }
+
+ /**
+ * If the next token is a double, consume it and return {@code true}.
+ * Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeDouble() {
+ try {
+ consumeDouble();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
+ * If the next token is a float, consume it and return its value.
+ * Otherwise, throw a {@link ParseException}.
+ */
+ public float consumeFloat() throws ParseException {
+ // We need to parse infinity and nan separately because
+ // Float.parseFloat() does not accept "inf", "infinity", or "nan".
+ if (FLOAT_INFINITY.matcher(currentToken).matches()) {
+ final boolean negative = currentToken.startsWith("-");
+ nextToken();
+ return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
+ }
+ if (FLOAT_NAN.matcher(currentToken).matches()) {
+ nextToken();
+ return Float.NaN;
+ }
+ try {
+ final float result = Float.parseFloat(currentToken);
+ nextToken();
+ return result;
+ } catch (NumberFormatException e) {
+ throw floatParseException(e);
+ }
+ }
+
+ /**
+ * If the next token is a float, consume it and return {@code true}.
+ * Otherwise, return {@code false} without doing anything.
+ */
+ public boolean tryConsumeFloat() {
+ try {
+ consumeFloat();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
+ * If the next token is a boolean, consume it and return its value.
+ * Otherwise, throw a {@link ParseException}.
+ */
+ public boolean consumeBoolean() throws ParseException {
+ if (currentToken.equals("true")
+ || currentToken.equals("True")
+ || currentToken.equals("t")
+ || currentToken.equals("1")) {
+ nextToken();
+ return true;
+ } else if (currentToken.equals("false")
+ || currentToken.equals("False")
+ || currentToken.equals("f")
+ || currentToken.equals("0")) {
+ nextToken();
+ return false;
+ } else {
+ throw parseException("Expected \"true\" or \"false\". Found \"" + currentToken + "\".");
+ }
+ }
+
+ /**
+ * If the next token is a string, consume it and return its (unescaped)
+ * value. Otherwise, throw a {@link ParseException}.
+ */
+ public String consumeString() throws ParseException {
+ return consumeByteString().toStringUtf8();
+ }
+
+ /**
+ * If the next token is a string, consume it and return true. Otherwise,
+ * return false.
+ */
+ public boolean tryConsumeString() {
+ try {
+ consumeString();
+ return true;
+ } catch (ParseException e) {
+ return false;
+ }
+ }
+
+ /**
+ * If the next token is a string, consume it, unescape it as a
+ * {@link ByteString}, and return it. Otherwise, throw a
+ * {@link ParseException}.
+ */
+ public ByteString consumeByteString() throws ParseException {
+ List<ByteString> list = new ArrayList<ByteString>();
+ consumeByteString(list);
+ while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
+ consumeByteString(list);
+ }
+ return ByteString.copyFrom(list);
+ }
+
+ /**
+ * Like {@link #consumeByteString()} but adds each token of the string to
+ * the given list. String literals (whether bytes or text) may come in
+ * multiple adjacent tokens which are automatically concatenated, like in
+ * C or Python.
+ */
+ private void consumeByteString(List<ByteString> list)
+ throws ParseException {
+ final char quote = currentToken.length() > 0
+ ? currentToken.charAt(0)
+ : '\0';
+ if (quote != '\"' && quote != '\'') {
+ throw parseException("Expected string.");
+ }
+
+ if (currentToken.length() < 2
+ || currentToken.charAt(currentToken.length() - 1) != quote) {
+ throw parseException("String missing ending quote.");
+ }
+
+ try {
+ final String escaped =
+ currentToken.substring(1, currentToken.length() - 1);
+ final ByteString result = unescapeBytes(escaped);
+ nextToken();
+ list.add(result);
+ } catch (InvalidEscapeSequenceException e) {
+ throw parseException(e.getMessage());
+ }
+ }
+
+ /**
+ * Returns a {@link ParseException} with the current line and column
+ * numbers in the description, suitable for throwing.
+ */
+ public ParseException parseException(final String description) {
+ // Note: People generally prefer one-based line and column numbers.
+ return new ParseException(
+ line + 1, column + 1, description);
+ }
+
+ /**
+ * Returns a {@link ParseException} with the line and column numbers of
+ * the previous token in the description, suitable for throwing.
+ */
+ public ParseException parseExceptionPreviousToken(
+ final String description) {
+ // Note: People generally prefer one-based line and column numbers.
+ return new ParseException(
+ previousLine + 1, previousColumn + 1, description);
+ }
+
+ /**
+ * Constructs an appropriate {@link ParseException} for the given
+ * {@code NumberFormatException} when trying to parse an integer.
+ */
+ private ParseException integerParseException(
+ final NumberFormatException e) {
+ return parseException("Couldn't parse integer: " + e.getMessage());
+ }
+
+ /**
+ * Constructs an appropriate {@link ParseException} for the given
+ * {@code NumberFormatException} when trying to parse a float or double.
+ */
+ private ParseException floatParseException(final NumberFormatException e) {
+ return parseException("Couldn't parse number: " + e.getMessage());
+ }
+
+ /**
+ * Returns a {@link UnknownFieldParseException} with the line and column
+ * numbers of the previous token in the description, and the unknown field
+ * name, suitable for throwing.
+ */
+ public UnknownFieldParseException unknownFieldParseExceptionPreviousToken(
+ final String unknownField, final String description) {
+ // Note: People generally prefer one-based line and column numbers.
+ return new UnknownFieldParseException(
+ previousLine + 1, previousColumn + 1, unknownField, description);
+ }
+ }
+
+ /** Thrown when parsing an invalid text format message. */
+ public static class ParseException extends IOException {
+ private static final long serialVersionUID = 3196188060225107702L;
+
+ private final int line;
+ private final int column;
+
+ /** Create a new instance, with -1 as the line and column numbers. */
+ public ParseException(final String message) {
+ this(-1, -1, message);
+ }
+
+ /**
+ * Create a new instance
+ *
+ * @param line the line number where the parse error occurred,
+ * using 1-offset.
+ * @param column the column number where the parser error occurred,
+ * using 1-offset.
+ */
+ public ParseException(final int line, final int column,
+ final String message) {
+ super(Integer.toString(line) + ":" + column + ": " + message);
+ this.line = line;
+ this.column = column;
+ }
+
+ /**
+ * Return the line where the parse exception occurred, or -1 when
+ * none is provided. The value is specified as 1-offset, so the first
+ * line is line 1.
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Return the column where the parse exception occurred, or -1 when
+ * none is provided. The value is specified as 1-offset, so the first
+ * line is line 1.
+ */
+ public int getColumn() {
+ return column;
+ }
+ }
+
+ /**
+ * Thrown when encountering an unknown field while parsing
+ * a text format message.
+ */
+ public static class UnknownFieldParseException extends ParseException {
+ private final String unknownField;
+
+ /**
+ * Create a new instance, with -1 as the line and column numbers, and an
+ * empty unknown field name.
+ */
+ public UnknownFieldParseException(final String message) {
+ this(-1, -1, "", message);
+ }
+
+ /**
+ * Create a new instance
+ *
+ * @param line the line number where the parse error occurred,
+ * using 1-offset.
+ * @param column the column number where the parser error occurred,
+ * using 1-offset.
+ * @param unknownField the name of the unknown field found while parsing.
+ */
+ public UnknownFieldParseException(final int line, final int column,
+ final String unknownField, final String message) {
+ super(line, column, message);
+ this.unknownField = unknownField;
+ }
+
+ /**
+ * Return the name of the unknown field encountered while parsing the
+ * protocol buffer string.
+ */
+ public String getUnknownField() {
+ return unknownField;
+ }
+ }
+
+ private static final Parser PARSER = Parser.newBuilder().build();
+
+ /**
+ * Return a {@link Parser} instance which can parse text-format
+ * messages. The returned instance is thread-safe.
+ */
+ public static Parser getParser() {
+ return PARSER;
+ }
+
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}.
+ */
+ public static void merge(final Readable input,
+ final Message.Builder builder)
+ throws IOException {
+ PARSER.merge(input, builder);
+ }
+
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}.
+ */
+ public static void merge(final CharSequence input,
+ final Message.Builder builder)
+ throws ParseException {
+ PARSER.merge(input, builder);
+ }
+
+ /**
+ * Parse a text-format message from {@code input}.
+ *
+ * @return the parsed message, guaranteed initialized
+ */
+ public static <T extends Message> T parse(final CharSequence input,
+ final Class<T> protoClass)
+ throws ParseException {
+ Message.Builder builder =
+ Internal.getDefaultInstance(protoClass).newBuilderForType();
+ merge(input, builder);
+ @SuppressWarnings("unchecked")
+ T output = (T) builder.build();
+ return output;
+ }
+
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}. Extensions will be recognized if they are
+ * registered in {@code extensionRegistry}.
+ */
+ public static void merge(final Readable input,
+ final ExtensionRegistry extensionRegistry,
+ final Message.Builder builder)
+ throws IOException {
+ PARSER.merge(input, extensionRegistry, builder);
+ }
+
+
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}. Extensions will be recognized if they are
+ * registered in {@code extensionRegistry}.
+ */
+ public static void merge(final CharSequence input,
+ final ExtensionRegistry extensionRegistry,
+ final Message.Builder builder)
+ throws ParseException {
+ PARSER.merge(input, extensionRegistry, builder);
+ }
+
+ /**
+ * Parse a text-format message from {@code input}. Extensions will be
+ * recognized if they are registered in {@code extensionRegistry}.
+ *
+ * @return the parsed message, guaranteed initialized
+ */
+ public static <T extends Message> T parse(
+ final CharSequence input,
+ final ExtensionRegistry extensionRegistry,
+ final Class<T> protoClass)
+ throws ParseException {
+ Message.Builder builder =
+ Internal.getDefaultInstance(protoClass).newBuilderForType();
+ merge(input, extensionRegistry, builder);
+ @SuppressWarnings("unchecked")
+ T output = (T) builder.build();
+ return output;
+ }
+
+
+ /**
+ * Parser for text-format proto2 instances. This class is thread-safe.
+ * The implementation largely follows google/protobuf/text_format.cc.
+ *
+ * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or
+ * {@link Builder} to control the parser behavior.
+ */
+ public static class Parser {
+ /**
+ * Determines if repeated values for non-repeated fields and
+ * oneofs are permitted. For example, given required/optional field "foo"
+ * and a oneof containing "baz" and "qux":
+ * <ul>
+ * <li>"foo: 1 foo: 2"
+ * <li>"baz: 1 qux: 2"
+ * <li>merging "foo: 2" into a proto in which foo is already set, or
+ * <li>merging "qux: 2" into a proto in which baz is already set.
+ * </ul>
+ */
+ public enum SingularOverwritePolicy {
+ /** The last value is retained. */
+ ALLOW_SINGULAR_OVERWRITES,
+ /** An error is issued. */
+ FORBID_SINGULAR_OVERWRITES
+ }
+
+ private final boolean allowUnknownFields;
+ private final boolean allowUnknownEnumValues;
+ private final SingularOverwritePolicy singularOverwritePolicy;
+ private TextFormatParseInfoTree.Builder parseInfoTreeBuilder;
+
+ private Parser(
+ boolean allowUnknownFields,
+ boolean allowUnknownEnumValues,
+ SingularOverwritePolicy singularOverwritePolicy,
+ TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
+ this.allowUnknownFields = allowUnknownFields;
+ this.allowUnknownEnumValues = allowUnknownEnumValues;
+ this.singularOverwritePolicy = singularOverwritePolicy;
+ this.parseInfoTreeBuilder = parseInfoTreeBuilder;
+ }
+
+ /**
+ * Returns a new instance of {@link Builder}.
+ */
+ public static Builder newBuilder() {
+ return new Builder();
+ }
+
+ /**
+ * Builder that can be used to obtain new instances of {@link Parser}.
+ */
+ public static class Builder {
+ private boolean allowUnknownFields = false;
+ private boolean allowUnknownEnumValues = false;
+ private SingularOverwritePolicy singularOverwritePolicy =
+ SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
+ private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null;
+
+
+ /**
+ * Sets parser behavior when a non-repeated field appears more than once.
+ */
+ public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) {
+ this.singularOverwritePolicy = p;
+ return this;
+ }
+
+ public Builder setParseInfoTreeBuilder(
+ TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
+ this.parseInfoTreeBuilder = parseInfoTreeBuilder;
+ return this;
+ }
+
+ public Parser build() {
+ return new Parser(
+ allowUnknownFields,
+ allowUnknownEnumValues,
+ singularOverwritePolicy,
+ parseInfoTreeBuilder);
+ }
+ }
+
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}.
+ */
+ public void merge(final Readable input,
+ final Message.Builder builder)
+ throws IOException {
+ merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+ }
+
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}.
+ */
+ public void merge(final CharSequence input,
+ final Message.Builder builder)
+ throws ParseException {
+ merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
+ }
+
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}. Extensions will be recognized if they are
+ * registered in {@code extensionRegistry}.
+ */
+ public void merge(final Readable input,
+ final ExtensionRegistry extensionRegistry,
+ final Message.Builder builder)
+ throws IOException {
+ // Read the entire input to a String then parse that.
+
+ // If StreamTokenizer were not quite so crippled, or if there were a kind
+ // of Reader that could read in chunks that match some particular regex,
+ // or if we wanted to write a custom Reader to tokenize our stream, then
+ // we would not have to read to one big String. Alas, none of these is
+ // the case. Oh well.
+
+ merge(toStringBuilder(input), extensionRegistry, builder);
+ }
+
+
+ private static final int BUFFER_SIZE = 4096;
+
+ // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
+ // overhead is worthwhile
+ private static StringBuilder toStringBuilder(final Readable input)
+ throws IOException {
+ final StringBuilder text = new StringBuilder();
+ final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
+ while (true) {
+ final int n = input.read(buffer);
+ if (n == -1) {
+ break;
+ }
+ buffer.flip();
+ text.append(buffer, 0, n);
+ }
+ return text;
+ }
+
+ // Check both unknown fields and unknown extensions and log warning messages
+ // or throw exceptions according to the flag.
+ private void checkUnknownFields(final List<String> unknownFields)
+ throws ParseException {
+ if (unknownFields.isEmpty()) {
+ return;
+ }
+
+ StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:");
+ for (String field : unknownFields) {
+ msg.append('\n').append(field);
+ }
+
+ if (allowUnknownFields) {
+ logger.warning(msg.toString());
+ } else {
+ String[] lineColumn = unknownFields.get(0).split(":");
+ throw new ParseException(Integer.valueOf(lineColumn[0]),
+ Integer.valueOf(lineColumn[1]), msg.toString());
+ }
+ }
+
+ /**
+ * Parse a text-format message from {@code input} and merge the contents
+ * into {@code builder}. Extensions will be recognized if they are
+ * registered in {@code extensionRegistry}.
+ */
+ public void merge(final CharSequence input,
+ final ExtensionRegistry extensionRegistry,
+ final Message.Builder builder)
+ throws ParseException {
+ final Tokenizer tokenizer = new Tokenizer(input);
+ MessageReflection.BuilderAdapter target =
+ new MessageReflection.BuilderAdapter(builder);
+
+ List<String> unknownFields = new ArrayList<String>();
+
+ while (!tokenizer.atEnd()) {
+ mergeField(tokenizer, extensionRegistry, target, unknownFields);
+ }
+
+ checkUnknownFields(unknownFields);
+ }
+
+
+ /**
+ * Parse a single field from {@code tokenizer} and merge it into
+ * {@code builder}.
+ */
+ private void mergeField(final Tokenizer tokenizer,
+ final ExtensionRegistry extensionRegistry,
+ final MessageReflection.MergeTarget target,
+ List<String> unknownFields)
+ throws ParseException {
+ mergeField(tokenizer, extensionRegistry, target, parseInfoTreeBuilder,
+ unknownFields);
+ }
+
+ /**
+ * Parse a single field from {@code tokenizer} and merge it into
+ * {@code target}.
+ */
+ private void mergeField(final Tokenizer tokenizer,
+ final ExtensionRegistry extensionRegistry,
+ final MessageReflection.MergeTarget target,
+ TextFormatParseInfoTree.Builder parseTreeBuilder,
+ List<String> unknownFields)
+ throws ParseException {
+ FieldDescriptor field = null;
+ int startLine = tokenizer.getLine();
+ int startColumn = tokenizer.getColumn();
+ final Descriptor type = target.getDescriptorForType();
+ ExtensionRegistry.ExtensionInfo extension = null;
+
+ if (tokenizer.tryConsume("[")) {
+ // An extension.
+ final StringBuilder name =
+ new StringBuilder(tokenizer.consumeIdentifier());
+ while (tokenizer.tryConsume(".")) {
+ name.append('.');
+ name.append(tokenizer.consumeIdentifier());
+ }
+
+ extension = target.findExtensionByName(
+ extensionRegistry, name.toString());
+
+ if (extension == null) {
+ unknownFields.add(
+ (tokenizer.getPreviousLine() + 1)
+ + ":"
+ + (tokenizer.getPreviousColumn() + 1)
+ + ":\t"
+ + type.getFullName()
+ + ".["
+ + name
+ + "]");
+ } else {
+ if (extension.descriptor.getContainingType() != type) {
+ throw tokenizer.parseExceptionPreviousToken(
+ "Extension \"" + name + "\" does not extend message type \""
+ + type.getFullName() + "\".");
+ }
+ field = extension.descriptor;
+ }
+
+ tokenizer.consume("]");
+ } else {
+ final String name = tokenizer.consumeIdentifier();
+ field = type.findFieldByName(name);
+
+ // Group names are expected to be capitalized as they appear in the
+ // .proto file, which actually matches their type names, not their field
+ // names.
+ if (field == null) {
+ // Explicitly specify US locale so that this code does not break when
+ // executing in Turkey.
+ final String lowerName = name.toLowerCase(Locale.US);
+ field = type.findFieldByName(lowerName);
+ // If the case-insensitive match worked but the field is NOT a group,
+ if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
+ field = null;
+ }
+ }
+ // Again, special-case group names as described above.
+ if (field != null && field.getType() == FieldDescriptor.Type.GROUP
+ && !field.getMessageType().getName().equals(name)) {
+ field = null;
+ }
+
+ if (field == null) {
+ unknownFields.add(
+ (tokenizer.getPreviousLine() + 1)
+ + ":"
+ + (tokenizer.getPreviousColumn() + 1)
+ + ":\t"
+ + type.getFullName()
+ + "."
+ + name);
+ }
+ }
+
+ // Skips unknown fields.
+ if (field == null) {
+ // Try to guess the type of this field.
+ // If this field is not a message, there should be a ":" between the
+ // field name and the field value and also the field value should not
+ // start with "{" or "<" which indicates the beginning of a message body.
+ // If there is no ":" or there is a "{" or "<" after ":", this field has
+ // to be a message or the input is ill-formed.
+ if (tokenizer.tryConsume(":")
+ && !tokenizer.lookingAt("{")
+ && !tokenizer.lookingAt("<")) {
+ skipFieldValue(tokenizer);
+ } else {
+ skipFieldMessage(tokenizer);
+ }
+ return;
+ }
+
+ // Handle potential ':'.
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ tokenizer.tryConsume(":"); // optional
+ if (parseTreeBuilder != null) {
+ TextFormatParseInfoTree.Builder childParseTreeBuilder =
+ parseTreeBuilder.getBuilderForSubMessageField(field);
+ consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
+ childParseTreeBuilder, unknownFields);
+ } else {
+ consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
+ parseTreeBuilder, unknownFields);
+ }
+ } else {
+ tokenizer.consume(":"); // required
+ consumeFieldValues(tokenizer, extensionRegistry, target, field,
+ extension, parseTreeBuilder, unknownFields);
+ }
+
+ if (parseTreeBuilder != null) {
+ parseTreeBuilder.setLocation(
+ field, TextFormatParseLocation.create(startLine, startColumn));
+ }
+
+ // For historical reasons, fields may optionally be separated by commas or
+ // semicolons.
+ if (!tokenizer.tryConsume(";")) {
+ tokenizer.tryConsume(",");
+ }
+ }
+
+ /**
+ * Parse a one or more field values from {@code tokenizer} and merge it into
+ * {@code builder}.
+ */
+ private void consumeFieldValues(
+ final Tokenizer tokenizer,
+ final ExtensionRegistry extensionRegistry,
+ final MessageReflection.MergeTarget target,
+ final FieldDescriptor field,
+ final ExtensionRegistry.ExtensionInfo extension,
+ final TextFormatParseInfoTree.Builder parseTreeBuilder,
+ List<String> unknownFields)
+ throws ParseException {
+ // Support specifying repeated field values as a comma-separated list.
+ // Ex."foo: [1, 2, 3]"
+ if (field.isRepeated() && tokenizer.tryConsume("[")) {
+ if (!tokenizer.tryConsume("]")) { // Allow "foo: []" to be treated as empty.
+ while (true) {
+ consumeFieldValue(
+ tokenizer,
+ extensionRegistry,
+ target,
+ field,
+ extension,
+ parseTreeBuilder,
+ unknownFields);
+ if (tokenizer.tryConsume("]")) {
+ // End of list.
+ break;
+ }
+ tokenizer.consume(",");
+ }
+ }
+ } else {
+ consumeFieldValue(tokenizer, extensionRegistry, target, field,
+ extension, parseTreeBuilder, unknownFields);
+ }
+ }
+
+ /**
+ * Parse a single field value from {@code tokenizer} and merge it into
+ * {@code builder}.
+ */
+ private void consumeFieldValue(
+ final Tokenizer tokenizer,
+ final ExtensionRegistry extensionRegistry,
+ final MessageReflection.MergeTarget target,
+ final FieldDescriptor field,
+ final ExtensionRegistry.ExtensionInfo extension,
+ final TextFormatParseInfoTree.Builder parseTreeBuilder,
+ List<String> unknownFields)
+ throws ParseException {
+ Object value = null;
+
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ final String endToken;
+ if (tokenizer.tryConsume("<")) {
+ endToken = ">";
+ } else {
+ tokenizer.consume("{");
+ endToken = "}";
+ }
+
+ final MessageReflection.MergeTarget subField;
+ subField = target.newMergeTargetForField(field,
+ (extension == null) ? null : extension.defaultInstance);
+
+ while (!tokenizer.tryConsume(endToken)) {
+ if (tokenizer.atEnd()) {
+ throw tokenizer.parseException(
+ "Expected \"" + endToken + "\".");
+ }
+ mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder,
+ unknownFields);
+ }
+
+ value = subField.finish();
+
+ } else {
+ switch (field.getType()) {
+ case INT32:
+ case SINT32:
+ case SFIXED32:
+ value = tokenizer.consumeInt32();
+ break;
+
+ case INT64:
+ case SINT64:
+ case SFIXED64:
+ value = tokenizer.consumeInt64();
+ break;
+
+ case UINT32:
+ case FIXED32:
+ value = tokenizer.consumeUInt32();
+ break;
+
+ case UINT64:
+ case FIXED64:
+ value = tokenizer.consumeUInt64();
+ break;
+
+ case FLOAT:
+ value = tokenizer.consumeFloat();
+ break;
+
+ case DOUBLE:
+ value = tokenizer.consumeDouble();
+ break;
+
+ case BOOL:
+ value = tokenizer.consumeBoolean();
+ break;
+
+ case STRING:
+ value = tokenizer.consumeString();
+ break;
+
+ case BYTES:
+ value = tokenizer.consumeByteString();
+ break;
+
+ case ENUM:
+ final EnumDescriptor enumType = field.getEnumType();
+
+ if (tokenizer.lookingAtInteger()) {
+ final int number = tokenizer.consumeInt32();
+ value = enumType.findValueByNumber(number);
+ if (value == null) {
+ String unknownValueMsg =
+ "Enum type \""
+ + enumType.getFullName()
+ + "\" has no value with number "
+ + number
+ + '.';
+ if (allowUnknownEnumValues) {
+ logger.warning(unknownValueMsg);
+ return;
+ } else {
+ throw tokenizer.parseExceptionPreviousToken(
+ "Enum type \""
+ + enumType.getFullName()
+ + "\" has no value with number "
+ + number
+ + '.');
+ }
+ }
+ } else {
+ final String id = tokenizer.consumeIdentifier();
+ value = enumType.findValueByName(id);
+ if (value == null) {
+ String unknownValueMsg =
+ "Enum type \""
+ + enumType.getFullName()
+ + "\" has no value named \""
+ + id
+ + "\".";
+ if (allowUnknownEnumValues) {
+ logger.warning(unknownValueMsg);
+ return;
+ } else {
+ throw tokenizer.parseExceptionPreviousToken(unknownValueMsg);
+ }
+ }
+ }
+
+ break;
+
+ case MESSAGE:
+ case GROUP:
+ throw new RuntimeException("Can't get here.");
+ }
+ }
+
+ if (field.isRepeated()) {
+ // TODO(b/29122459): If field.isMapField() and FORBID_SINGULAR_OVERWRITES mode,
+ // check for duplicate map keys here.
+ target.addRepeatedField(field, value);
+ } else if ((singularOverwritePolicy
+ == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
+ && target.hasField(field)) {
+ throw tokenizer.parseExceptionPreviousToken("Non-repeated field \""
+ + field.getFullName() + "\" cannot be overwritten.");
+ } else if ((singularOverwritePolicy
+ == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES)
+ && field.getContainingOneof() != null
+ && target.hasOneof(field.getContainingOneof())) {
+ Descriptors.OneofDescriptor oneof = field.getContainingOneof();
+ throw tokenizer.parseExceptionPreviousToken("Field \""
+ + field.getFullName() + "\" is specified along with field \""
+ + target.getOneofFieldDescriptor(oneof).getFullName()
+ + "\", another member of oneof \"" + oneof.getName() + "\".");
+ } else {
+ target.setField(field, value);
+ }
+ }
+
+ /**
+ * Skips the next field including the field's name and value.
+ */
+ private void skipField(Tokenizer tokenizer) throws ParseException {
+ if (tokenizer.tryConsume("[")) {
+ // Extension name.
+ do {
+ tokenizer.consumeIdentifier();
+ } while (tokenizer.tryConsume("."));
+ tokenizer.consume("]");
+ } else {
+ tokenizer.consumeIdentifier();
+ }
+
+ // Try to guess the type of this field.
+ // If this field is not a message, there should be a ":" between the
+ // field name and the field value and also the field value should not
+ // start with "{" or "<" which indicates the beginning of a message body.
+ // If there is no ":" or there is a "{" or "<" after ":", this field has
+ // to be a message or the input is ill-formed.
+ if (tokenizer.tryConsume(":")
+ && !tokenizer.lookingAt("<")
+ && !tokenizer.lookingAt("{")) {
+ skipFieldValue(tokenizer);
+ } else {
+ skipFieldMessage(tokenizer);
+ }
+ // For historical reasons, fields may optionally be separated by commas or
+ // semicolons.
+ if (!tokenizer.tryConsume(";")) {
+ tokenizer.tryConsume(",");
+ }
+ }
+
+ /**
+ * Skips the whole body of a message including the beginning delimiter and
+ * the ending delimiter.
+ */
+ private void skipFieldMessage(Tokenizer tokenizer) throws ParseException {
+ final String delimiter;
+ if (tokenizer.tryConsume("<")) {
+ delimiter = ">";
+ } else {
+ tokenizer.consume("{");
+ delimiter = "}";
+ }
+ while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
+ skipField(tokenizer);
+ }
+ tokenizer.consume(delimiter);
+ }
+
+ /**
+ * Skips a field value.
+ */
+ private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
+ if (tokenizer.tryConsumeString()) {
+ while (tokenizer.tryConsumeString()) {}
+ return;
+ }
+ if (!tokenizer.tryConsumeIdentifier() // includes enum & boolean
+ && !tokenizer.tryConsumeInt64() // includes int32
+ && !tokenizer.tryConsumeUInt64() // includes uint32
+ && !tokenizer.tryConsumeDouble()
+ && !tokenizer.tryConsumeFloat()) {
+ throw tokenizer.parseException(
+ "Invalid field value: " + tokenizer.currentToken);
+ }
+ }
+ }
+
+ // =================================================================
+ // Utility functions
+ //
+ // Some of these methods are package-private because Descriptors.java uses
+ // them.
+
+ /**
+ * Escapes bytes in the format used in protocol buffer text format, which
+ * is the same as the format used for C string literals. All bytes
+ * that are not printable 7-bit ASCII characters are escaped, as well as
+ * backslash, single-quote, and double-quote characters. Characters for
+ * which no defined short-hand escape sequence is defined will be escaped
+ * using 3-digit octal sequences.
+ */
+ public static String escapeBytes(ByteString input) {
+ return TextFormatEscaper.escapeBytes(input);
+ }
+
+ /**
+ * Like {@link #escapeBytes(ByteString)}, but used for byte array.
+ */
+ public static String escapeBytes(byte[] input) {
+ return TextFormatEscaper.escapeBytes(input);
+ }
+
+ /**
+ * Un-escape a byte sequence as escaped using
+ * {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with
+ * "\x") are also recognized.
+ */
+ public static ByteString unescapeBytes(final CharSequence charString)
+ throws InvalidEscapeSequenceException {
+ // First convert the Java character sequence to UTF-8 bytes.
+ ByteString input = ByteString.copyFromUtf8(charString.toString());
+ // Then unescape certain byte sequences introduced by ASCII '\\'. The valid
+ // escapes can all be expressed with ASCII characters, so it is safe to
+ // operate on bytes here.
+ //
+ // Unescaping the input byte array will result in a byte sequence that's no
+ // longer than the input. That's because each escape sequence is between
+ // two and four bytes long and stands for a single byte.
+ final byte[] result = new byte[input.size()];
+ int pos = 0;
+ for (int i = 0; i < input.size(); i++) {
+ byte c = input.byteAt(i);
+ if (c == '\\') {
+ if (i + 1 < input.size()) {
+ ++i;
+ c = input.byteAt(i);
+ if (isOctal(c)) {
+ // Octal escape.
+ int code = digitValue(c);
+ if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
+ ++i;
+ code = code * 8 + digitValue(input.byteAt(i));
+ }
+ if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
+ ++i;
+ code = code * 8 + digitValue(input.byteAt(i));
+ }
+ // TODO: Check that 0 <= code && code <= 0xFF.
+ result[pos++] = (byte) code;
+ } else {
+ switch (c) {
+ case 'a' : result[pos++] = 0x07; break;
+ case 'b' : result[pos++] = '\b'; break;
+ case 'f' : result[pos++] = '\f'; break;
+ case 'n' : result[pos++] = '\n'; break;
+ case 'r' : result[pos++] = '\r'; break;
+ case 't' : result[pos++] = '\t'; break;
+ case 'v' : result[pos++] = 0x0b; break;
+ case '\\': result[pos++] = '\\'; break;
+ case '\'': result[pos++] = '\''; break;
+ case '"' : result[pos++] = '\"'; break;
+
+ case 'x':
+ // hex escape
+ int code = 0;
+ if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
+ ++i;
+ code = digitValue(input.byteAt(i));
+ } else {
+ throw new InvalidEscapeSequenceException(
+ "Invalid escape sequence: '\\x' with no digits");
+ }
+ if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
+ ++i;
+ code = code * 16 + digitValue(input.byteAt(i));
+ }
+ result[pos++] = (byte) code;
+ break;
+
+ default:
+ throw new InvalidEscapeSequenceException(
+ "Invalid escape sequence: '\\" + (char) c + '\'');
+ }
+ }
+ } else {
+ throw new InvalidEscapeSequenceException(
+ "Invalid escape sequence: '\\' at end of string.");
+ }
+ } else {
+ result[pos++] = c;
+ }
+ }
+
+ return result.length == pos
+ ? ByteString.wrap(result) // This reference has not been out of our control.
+ : ByteString.copyFrom(result, 0, pos);
+ }
+
+ /**
+ * Thrown by {@link TextFormat#unescapeBytes} and
+ * {@link TextFormat#unescapeText} when an invalid escape sequence is seen.
+ */
+ public static class InvalidEscapeSequenceException extends IOException {
+ private static final long serialVersionUID = -8164033650142593304L;
+
+ InvalidEscapeSequenceException(final String description) {
+ super(description);
+ }
+ }
+
+ /**
+ * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
+ * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
+ * individually as a 3-digit octal escape. Yes, it's weird.
+ */
+ static String escapeText(final String input) {
+ return escapeBytes(ByteString.copyFromUtf8(input));
+ }
+
+ /**
+ * Escape double quotes and backslashes in a String for unicode output of a message.
+ */
+ public static String escapeDoubleQuotesAndBackslashes(final String input) {
+ return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input);
+ }
+
+ /**
+ * Un-escape a text string as escaped using {@link #escapeText(String)}.
+ * Two-digit hex escapes (starting with "\x") are also recognized.
+ */
+ static String unescapeText(final String input)
+ throws InvalidEscapeSequenceException {
+ return unescapeBytes(input).toStringUtf8();
+ }
+
+ /** Is this an octal digit? */
+ private static boolean isOctal(final byte c) {
+ return '0' <= c && c <= '7';
+ }
+
+ /** Is this a hex digit? */
+ private static boolean isHex(final byte c) {
+ return ('0' <= c && c <= '9')
+ || ('a' <= c && c <= 'f')
+ || ('A' <= c && c <= 'F');
+ }
+
+ /**
+ * Interpret a character as a digit (in any base up to 36) and return the
+ * numeric value. This is like {@code Character.digit()} but we don't accept
+ * non-ASCII digits.
+ */
+ private static int digitValue(final byte c) {
+ if ('0' <= c && c <= '9') {
+ return c - '0';
+ } else if ('a' <= c && c <= 'z') {
+ return c - 'a' + 10;
+ } else {
+ return c - 'A' + 10;
+ }
+ }
+
+ /**
+ * Parse a 32-bit signed integer from the text. Unlike the Java standard
+ * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
+ * and "0" to signify hexadecimal and octal numbers, respectively.
+ */
+ static int parseInt32(final String text) throws NumberFormatException {
+ return (int) parseInteger(text, true, false);
+ }
+
+ /**
+ * Parse a 32-bit unsigned integer from the text. Unlike the Java standard
+ * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
+ * and "0" to signify hexadecimal and octal numbers, respectively. The
+ * result is coerced to a (signed) {@code int} when returned since Java has
+ * no unsigned integer type.
+ */
+ static int parseUInt32(final String text) throws NumberFormatException {
+ return (int) parseInteger(text, false, false);
+ }
+
+ /**
+ * Parse a 64-bit signed integer from the text. Unlike the Java standard
+ * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
+ * and "0" to signify hexadecimal and octal numbers, respectively.
+ */
+ static long parseInt64(final String text) throws NumberFormatException {
+ return parseInteger(text, true, true);
+ }
+
+ /**
+ * Parse a 64-bit unsigned integer from the text. Unlike the Java standard
+ * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
+ * and "0" to signify hexadecimal and octal numbers, respectively. The
+ * result is coerced to a (signed) {@code long} when returned since Java has
+ * no unsigned long type.
+ */
+ static long parseUInt64(final String text) throws NumberFormatException {
+ return parseInteger(text, false, true);
+ }
+
+ private static long parseInteger(final String text,
+ final boolean isSigned,
+ final boolean isLong)
+ throws NumberFormatException {
+ int pos = 0;
+
+ boolean negative = false;
+ if (text.startsWith("-", pos)) {
+ if (!isSigned) {
+ throw new NumberFormatException("Number must be positive: " + text);
+ }
+ ++pos;
+ negative = true;
+ }
+
+ int radix = 10;
+ if (text.startsWith("0x", pos)) {
+ pos += 2;
+ radix = 16;
+ } else if (text.startsWith("0", pos)) {
+ radix = 8;
+ }
+
+ final String numberText = text.substring(pos);
+
+ long result = 0;
+ if (numberText.length() < 16) {
+ // Can safely assume no overflow.
+ result = Long.parseLong(numberText, radix);
+ if (negative) {
+ result = -result;
+ }
+
+ // Check bounds.
+ // No need to check for 64-bit numbers since they'd have to be 16 chars
+ // or longer to overflow.
+ if (!isLong) {
+ if (isSigned) {
+ if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
+ throw new NumberFormatException(
+ "Number out of range for 32-bit signed integer: " + text);
+ }
+ } else {
+ if (result >= (1L << 32) || result < 0) {
+ throw new NumberFormatException(
+ "Number out of range for 32-bit unsigned integer: " + text);
+ }
+ }
+ }
+ } else {
+ BigInteger bigValue = new BigInteger(numberText, radix);
+ if (negative) {
+ bigValue = bigValue.negate();
+ }
+
+ // Check bounds.
+ if (!isLong) {
+ if (isSigned) {
+ if (bigValue.bitLength() > 31) {
+ throw new NumberFormatException(
+ "Number out of range for 32-bit signed integer: " + text);
+ }
+ } else {
+ if (bigValue.bitLength() > 32) {
+ throw new NumberFormatException(
+ "Number out of range for 32-bit unsigned integer: " + text);
+ }
+ }
+ } else {
+ if (isSigned) {
+ if (bigValue.bitLength() > 63) {
+ throw new NumberFormatException(
+ "Number out of range for 64-bit signed integer: " + text);
+ }
+ } else {
+ if (bigValue.bitLength() > 64) {
+ throw new NumberFormatException(
+ "Number out of range for 64-bit unsigned integer: " + text);
+ }
+ }
+ }
+
+ result = bigValue.longValue();
+ }
+
+ return result;
+ }
+}