diff options
Diffstat (limited to 'java/src/main/java/com/google/protobuf/TextFormat.java')
-rw-r--r-- | java/src/main/java/com/google/protobuf/TextFormat.java | 792 |
1 files changed, 606 insertions, 186 deletions
diff --git a/java/src/main/java/com/google/protobuf/TextFormat.java b/java/src/main/java/com/google/protobuf/TextFormat.java index ed462899..97da09bb 100644 --- a/java/src/main/java/com/google/protobuf/TextFormat.java +++ b/java/src/main/java/com/google/protobuf/TextFormat.java @@ -31,17 +31,18 @@ package com.google.protobuf; import com.google.protobuf.Descriptors.Descriptor; -import com.google.protobuf.Descriptors.FieldDescriptor; import com.google.protobuf.Descriptors.EnumDescriptor; import com.google.protobuf.Descriptors.EnumValueDescriptor; +import com.google.protobuf.Descriptors.FieldDescriptor; import java.io.IOException; -import java.nio.CharBuffer; import java.math.BigInteger; +import java.nio.CharBuffer; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -55,6 +56,9 @@ import java.util.regex.Pattern; public final class TextFormat { private TextFormat() {} + private static final Logger logger = + Logger.getLogger(TextFormat.class.getName()); + private static final Printer DEFAULT_PRINTER = new Printer(); private static final Printer SINGLE_LINE_PRINTER = (new Printer()).setSingleLineMode(true); @@ -66,8 +70,9 @@ public final class TextFormat { * the parameter output. (This representation is the new version of the * classic "ProtocolPrinter" output from the original Protocol Buffer system) */ - public static void print(final MessageOrBuilder message, final Appendable output) - throws IOException { + public static void print( + final MessageOrBuilder message, final Appendable output) + throws IOException { DEFAULT_PRINTER.print(message, new TextGenerator(output)); } @@ -266,7 +271,8 @@ public final class TextFormat { return this; } - private void print(final MessageOrBuilder message, final TextGenerator generator) + private void print( + final MessageOrBuilder message, final TextGenerator generator) throws IOException { for (Map.Entry<FieldDescriptor, Object> field : message.getAllFields().entrySet()) { @@ -385,13 +391,17 @@ public final class TextFormat { generator.print("\""); generator.print(escapeNonAscii ? escapeText((String) value) : - (String) value); + escapeDoubleQuotesAndBackslashes((String) value)); generator.print("\""); break; case BYTES: generator.print("\""); - generator.print(escapeBytes((ByteString) value)); + if (value instanceof ByteString) { + generator.print(escapeBytes((ByteString) value)); + } else { + generator.print(escapeBytes((byte[]) value)); + } generator.print("\""); break; @@ -455,16 +465,16 @@ public final class TextFormat { } /** Convert an unsigned 32-bit integer to a string. */ - private static String unsignedToString(final int value) { + public static String unsignedToString(final int value) { if (value >= 0) { return Integer.toString(value); } else { - return Long.toString(((long) value) & 0x00000000FFFFFFFFL); + return Long.toString(value & 0x00000000FFFFFFFFL); } } /** Convert an unsigned 64-bit integer to a string. */ - private static String unsignedToString(final long value) { + public static String unsignedToString(final long value) { if (value >= 0) { return Long.toString(value); } else { @@ -518,17 +528,16 @@ public final class TextFormat { for (int i = 0; i < size; i++) { if (text.charAt(i) == '\n') { - write(text.subSequence(pos, size), i - pos + 1); + write(text.subSequence(pos, i + 1)); pos = i + 1; atStartOfLine = true; } } - write(text.subSequence(pos, size), size - pos); + write(text.subSequence(pos, size)); } - private void write(final CharSequence data, final int size) - throws IOException { - if (size == 0) { + private void write(final CharSequence data) throws IOException { + if (data.length() == 0) { return; } if (atStartOfLine) { @@ -705,6 +714,14 @@ public final class TextFormat { } /** + * Returns {@code true} if the current token's text is equal to that + * specified. + */ + public boolean lookingAt(String text) { + return currentToken.equals(text); + } + + /** * If the next token is an identifier, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ @@ -717,7 +734,8 @@ public final class TextFormat { (c == '_') || (c == '.')) { // OK } else { - throw parseException("Expected identifier."); + throw parseException( + "Expected identifier. Found '" + currentToken + "'"); } } @@ -727,6 +745,19 @@ public final class TextFormat { } /** + * If the next token is an identifier, consume it and return {@code true}. + * Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeIdentifier() { + try { + consumeIdentifier(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a 32-bit signed integer, consume it and return its * value. Otherwise, throw a {@link ParseException}. */ @@ -769,6 +800,19 @@ public final class TextFormat { } /** + * If the next token is a 64-bit signed integer, consume it and return + * {@code true}. Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeInt64() { + try { + consumeInt64(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a 64-bit unsigned integer, consume it and return its * value. Otherwise, throw a {@link ParseException}. */ @@ -783,6 +827,19 @@ public final class TextFormat { } /** + * If the next token is a 64-bit unsigned integer, consume it and return + * {@code true}. Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeUInt64() { + try { + consumeUInt64(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a double, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ @@ -808,6 +865,19 @@ public final class TextFormat { } /** + * If the next token is a double, consume it and return {@code true}. + * Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeDouble() { + try { + consumeDouble(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a float, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ @@ -833,6 +903,19 @@ public final class TextFormat { } /** + * If the next token is a float, consume it and return {@code true}. + * Otherwise, return {@code false} without doing anything. + */ + public boolean tryConsumeFloat() { + try { + consumeFloat(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a boolean, consume it and return its value. * Otherwise, throw a {@link ParseException}. */ @@ -861,6 +944,19 @@ public final class TextFormat { } /** + * If the next token is a string, consume it and return true. Otherwise, + * return false. + */ + public boolean tryConsumeString() { + try { + consumeString(); + return true; + } catch (ParseException e) { + return false; + } + } + + /** * If the next token is a string, consume it, unescape it as a * {@link ByteString}, and return it. Otherwise, throw a * {@link ParseException}. @@ -880,7 +976,8 @@ public final class TextFormat { * multiple adjacent tokens which are automatically concatenated, like in * C or Python. */ - private void consumeByteString(List<ByteString> list) throws ParseException { + private void consumeByteString(List<ByteString> list) + throws ParseException { final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0'; if (quote != '\"' && quote != '\'') { @@ -988,6 +1085,16 @@ public final class TextFormat { } } + private static final Parser PARSER = Parser.newBuilder().build(); + + /** + * Return a {@link Parser} instance which can parse text-format + * messages. The returned instance is thread-safe. + */ + public static Parser getParser() { + return PARSER; + } + /** * Parse a text-format message from {@code input} and merge the contents * into {@code builder}. @@ -995,7 +1102,7 @@ public final class TextFormat { public static void merge(final Readable input, final Message.Builder builder) throws IOException { - merge(input, ExtensionRegistry.getEmptyRegistry(), builder); + PARSER.merge(input, builder); } /** @@ -1005,7 +1112,7 @@ public final class TextFormat { public static void merge(final CharSequence input, final Message.Builder builder) throws ParseException { - merge(input, ExtensionRegistry.getEmptyRegistry(), builder); + PARSER.merge(input, builder); } /** @@ -1017,35 +1124,9 @@ public final class TextFormat { final ExtensionRegistry extensionRegistry, final Message.Builder builder) throws IOException { - // Read the entire input to a String then parse that. - - // If StreamTokenizer were not quite so crippled, or if there were a kind - // of Reader that could read in chunks that match some particular regex, - // or if we wanted to write a custom Reader to tokenize our stream, then - // we would not have to read to one big String. Alas, none of these is - // the case. Oh well. - - merge(toStringBuilder(input), extensionRegistry, builder); + PARSER.merge(input, extensionRegistry, builder); } - private static final int BUFFER_SIZE = 4096; - - // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) - // overhead is worthwhile - private static StringBuilder toStringBuilder(final Readable input) - throws IOException { - final StringBuilder text = new StringBuilder(); - final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); - while (true) { - final int n = input.read(buffer); - if (n == -1) { - break; - } - buffer.flip(); - text.append(buffer, 0, n); - } - return text; - } /** * Parse a text-format message from {@code input} and merge the contents @@ -1056,187 +1137,481 @@ public final class TextFormat { final ExtensionRegistry extensionRegistry, final Message.Builder builder) throws ParseException { - final Tokenizer tokenizer = new Tokenizer(input); - - while (!tokenizer.atEnd()) { - mergeField(tokenizer, extensionRegistry, builder); - } + PARSER.merge(input, extensionRegistry, builder); } + /** - * Parse a single field from {@code tokenizer} and merge it into - * {@code builder}. + * Parser for text-format proto2 instances. This class is thread-safe. + * The implementation largely follows google/protobuf/text_format.cc. + * + * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or + * {@link Builder} to control the parser behavior. */ - private static void mergeField(final Tokenizer tokenizer, - final ExtensionRegistry extensionRegistry, - final Message.Builder builder) - throws ParseException { - FieldDescriptor field; - final Descriptor type = builder.getDescriptorForType(); - ExtensionRegistry.ExtensionInfo extension = null; + public static class Parser { + /** + * Determines if repeated values for non-repeated fields and + * oneofs are permitted. For example, given required/optional field "foo" + * and a oneof containing "baz" and "qux": + * <li> + * <ul>"foo: 1 foo: 2" + * <ul>"baz: 1 qux: 2" + * <ul>merging "foo: 2" into a proto in which foo is already set, or + * <ul>merging "qux: 2" into a proto in which baz is already set. + * </li> + */ + public enum SingularOverwritePolicy { + /** The last value is retained. */ + ALLOW_SINGULAR_OVERWRITES, + /** An error is issued. */ + FORBID_SINGULAR_OVERWRITES + } + + private final boolean allowUnknownFields; + private final SingularOverwritePolicy singularOverwritePolicy; + + private Parser(boolean allowUnknownFields, + SingularOverwritePolicy singularOverwritePolicy) { + this.allowUnknownFields = allowUnknownFields; + this.singularOverwritePolicy = singularOverwritePolicy; + } - if (tokenizer.tryConsume("[")) { - // An extension. - final StringBuilder name = - new StringBuilder(tokenizer.consumeIdentifier()); - while (tokenizer.tryConsume(".")) { - name.append('.'); - name.append(tokenizer.consumeIdentifier()); + /** + * Returns a new instance of {@link Builder}. + */ + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Builder that can be used to obtain new instances of {@link Parser}. + */ + public static class Builder { + private boolean allowUnknownFields = false; + private SingularOverwritePolicy singularOverwritePolicy = + SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES; + + /** + * Set whether this parser will allow unknown fields. By default, an + * exception is thrown if an unknown field is encountered. If this is + * set, the parser will only log a warning. + * + * <p>Use of this parameter is discouraged. See: + * https://sites.google.com/a/google.com/protocol-buffers/migration/ + * proto2-faq#How_do_I_ignore_unknown_fields_w + * for more details. + */ + public Builder setAllowUnknownFields(boolean allowUnknownFields) { + this.allowUnknownFields = allowUnknownFields; + return this; } - extension = extensionRegistry.findExtensionByName(name.toString()); + /** + * Sets parser behavior when a non-repeated field appears more than once. + */ + public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) { + this.singularOverwritePolicy = p; + return this; + } - if (extension == null) { - throw tokenizer.parseExceptionPreviousToken( - "Extension \"" + name + "\" not found in the ExtensionRegistry."); - } else if (extension.descriptor.getContainingType() != type) { - throw tokenizer.parseExceptionPreviousToken( - "Extension \"" + name + "\" does not extend message type \"" + - type.getFullName() + "\"."); + public Parser build() { + return new Parser(allowUnknownFields, singularOverwritePolicy); } + } - tokenizer.consume("]"); + /** + * Parse a text-format message from {@code input} and merge the contents + * into {@code builder}. + */ + public void merge(final Readable input, + final Message.Builder builder) + throws IOException { + merge(input, ExtensionRegistry.getEmptyRegistry(), builder); + } - field = extension.descriptor; - } else { - final String name = tokenizer.consumeIdentifier(); - field = type.findFieldByName(name); + /** + * Parse a text-format message from {@code input} and merge the contents + * into {@code builder}. + */ + public void merge(final CharSequence input, + final Message.Builder builder) + throws ParseException { + merge(input, ExtensionRegistry.getEmptyRegistry(), builder); + } - // Group names are expected to be capitalized as they appear in the - // .proto file, which actually matches their type names, not their field - // names. - if (field == null) { - // Explicitly specify US locale so that this code does not break when - // executing in Turkey. - final String lowerName = name.toLowerCase(Locale.US); - field = type.findFieldByName(lowerName); - // If the case-insensitive match worked but the field is NOT a group, - if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { - field = null; + /** + * Parse a text-format message from {@code input} and merge the contents + * into {@code builder}. Extensions will be recognized if they are + * registered in {@code extensionRegistry}. + */ + public void merge(final Readable input, + final ExtensionRegistry extensionRegistry, + final Message.Builder builder) + throws IOException { + // Read the entire input to a String then parse that. + + // If StreamTokenizer were not quite so crippled, or if there were a kind + // of Reader that could read in chunks that match some particular regex, + // or if we wanted to write a custom Reader to tokenize our stream, then + // we would not have to read to one big String. Alas, none of these is + // the case. Oh well. + + merge(toStringBuilder(input), extensionRegistry, builder); + } + + + private static final int BUFFER_SIZE = 4096; + + // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) + // overhead is worthwhile + private static StringBuilder toStringBuilder(final Readable input) + throws IOException { + final StringBuilder text = new StringBuilder(); + final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); + while (true) { + final int n = input.read(buffer); + if (n == -1) { + break; } + buffer.flip(); + text.append(buffer, 0, n); } - // Again, special-case group names as described above. - if (field != null && field.getType() == FieldDescriptor.Type.GROUP && - !field.getMessageType().getName().equals(name)) { - field = null; - } + return text; + } - if (field == null) { - throw tokenizer.parseExceptionPreviousToken( - "Message type \"" + type.getFullName() + - "\" has no field named \"" + name + "\"."); + /** + * Parse a text-format message from {@code input} and merge the contents + * into {@code builder}. Extensions will be recognized if they are + * registered in {@code extensionRegistry}. + */ + public void merge(final CharSequence input, + final ExtensionRegistry extensionRegistry, + final Message.Builder builder) + throws ParseException { + final Tokenizer tokenizer = new Tokenizer(input); + MessageReflection.BuilderAdapter target = + new MessageReflection.BuilderAdapter(builder); + + while (!tokenizer.atEnd()) { + mergeField(tokenizer, extensionRegistry, target); } } - Object value = null; - if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { - tokenizer.tryConsume(":"); // optional + /** + * Parse a single field from {@code tokenizer} and merge it into + * {@code builder}. + */ + private void mergeField(final Tokenizer tokenizer, + final ExtensionRegistry extensionRegistry, + final MessageReflection.MergeTarget target) + throws ParseException { + FieldDescriptor field = null; + final Descriptor type = target.getDescriptorForType(); + ExtensionRegistry.ExtensionInfo extension = null; + + if (tokenizer.tryConsume("[")) { + // An extension. + final StringBuilder name = + new StringBuilder(tokenizer.consumeIdentifier()); + while (tokenizer.tryConsume(".")) { + name.append('.'); + name.append(tokenizer.consumeIdentifier()); + } - final String endToken; - if (tokenizer.tryConsume("<")) { - endToken = ">"; - } else { - tokenizer.consume("{"); - endToken = "}"; - } + extension = target.findExtensionByName( + extensionRegistry, name.toString()); + + if (extension == null) { + if (!allowUnknownFields) { + throw tokenizer.parseExceptionPreviousToken( + "Extension \"" + name + "\" not found in the ExtensionRegistry."); + } else { + logger.warning( + "Extension \"" + name + "\" not found in the ExtensionRegistry."); + } + } else { + if (extension.descriptor.getContainingType() != type) { + throw tokenizer.parseExceptionPreviousToken( + "Extension \"" + name + "\" does not extend message type \"" + + type.getFullName() + "\"."); + } + field = extension.descriptor; + } - final Message.Builder subBuilder; - if (extension == null) { - subBuilder = builder.newBuilderForField(field); + tokenizer.consume("]"); } else { - subBuilder = extension.defaultInstance.newBuilderForType(); - } + final String name = tokenizer.consumeIdentifier(); + field = type.findFieldByName(name); + + // Group names are expected to be capitalized as they appear in the + // .proto file, which actually matches their type names, not their field + // names. + if (field == null) { + // Explicitly specify US locale so that this code does not break when + // executing in Turkey. + final String lowerName = name.toLowerCase(Locale.US); + field = type.findFieldByName(lowerName); + // If the case-insensitive match worked but the field is NOT a group, + if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { + field = null; + } + } + // Again, special-case group names as described above. + if (field != null && field.getType() == FieldDescriptor.Type.GROUP && + !field.getMessageType().getName().equals(name)) { + field = null; + } - while (!tokenizer.tryConsume(endToken)) { - if (tokenizer.atEnd()) { - throw tokenizer.parseException( - "Expected \"" + endToken + "\"."); + if (field == null) { + if (!allowUnknownFields) { + throw tokenizer.parseExceptionPreviousToken( + "Message type \"" + type.getFullName() + + "\" has no field named \"" + name + "\"."); + } else { + logger.warning( + "Message type \"" + type.getFullName() + + "\" has no field named \"" + name + "\"."); + } } - mergeField(tokenizer, extensionRegistry, subBuilder); } - value = subBuilder.buildPartial(); + // Skips unknown fields. + if (field == null) { + // Try to guess the type of this field. + // If this field is not a message, there should be a ":" between the + // field name and the field value and also the field value should not + // start with "{" or "<" which indicates the begining of a message body. + // If there is no ":" or there is a "{" or "<" after ":", this field has + // to be a message or the input is ill-formed. + if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && + !tokenizer.lookingAt("<")) { + skipFieldValue(tokenizer); + } else { + skipFieldMessage(tokenizer); + } + return; + } - } else { - tokenizer.consume(":"); + // Handle potential ':'. + if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + tokenizer.tryConsume(":"); // optional + } else { + tokenizer.consume(":"); // required + } + // Support specifying repeated field values as a comma-separated list. + // Ex."foo: [1, 2, 3]" + if (field.isRepeated() && tokenizer.tryConsume("[")) { + while (true) { + consumeFieldValue(tokenizer, extensionRegistry, target, field, extension); + if (tokenizer.tryConsume("]")) { + // End of list. + break; + } + tokenizer.consume(","); + } + } else { + consumeFieldValue(tokenizer, extensionRegistry, target, field, extension); + } + } - switch (field.getType()) { - case INT32: - case SINT32: - case SFIXED32: - value = tokenizer.consumeInt32(); - break; + /** + * Parse a single field value from {@code tokenizer} and merge it into + * {@code builder}. + */ + private void consumeFieldValue( + final Tokenizer tokenizer, + final ExtensionRegistry extensionRegistry, + final MessageReflection.MergeTarget target, + final FieldDescriptor field, + final ExtensionRegistry.ExtensionInfo extension) + throws ParseException { + Object value = null; - case INT64: - case SINT64: - case SFIXED64: - value = tokenizer.consumeInt64(); - break; + if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + final String endToken; + if (tokenizer.tryConsume("<")) { + endToken = ">"; + } else { + tokenizer.consume("{"); + endToken = "}"; + } - case UINT32: - case FIXED32: - value = tokenizer.consumeUInt32(); - break; + final MessageReflection.MergeTarget subField; + subField = target.newMergeTargetForField(field, + (extension == null) ? null : extension.defaultInstance); - case UINT64: - case FIXED64: - value = tokenizer.consumeUInt64(); - break; + while (!tokenizer.tryConsume(endToken)) { + if (tokenizer.atEnd()) { + throw tokenizer.parseException( + "Expected \"" + endToken + "\"."); + } + mergeField(tokenizer, extensionRegistry, subField); + } - case FLOAT: - value = tokenizer.consumeFloat(); - break; + value = subField.finish(); - case DOUBLE: - value = tokenizer.consumeDouble(); - break; + } else { + switch (field.getType()) { + case INT32: + case SINT32: + case SFIXED32: + value = tokenizer.consumeInt32(); + break; + + case INT64: + case SINT64: + case SFIXED64: + value = tokenizer.consumeInt64(); + break; + + case UINT32: + case FIXED32: + value = tokenizer.consumeUInt32(); + break; + + case UINT64: + case FIXED64: + value = tokenizer.consumeUInt64(); + break; + + case FLOAT: + value = tokenizer.consumeFloat(); + break; + + case DOUBLE: + value = tokenizer.consumeDouble(); + break; + + case BOOL: + value = tokenizer.consumeBoolean(); + break; + + case STRING: + value = tokenizer.consumeString(); + break; + + case BYTES: + value = tokenizer.consumeByteString(); + break; + + case ENUM: + final EnumDescriptor enumType = field.getEnumType(); + + if (tokenizer.lookingAtInteger()) { + final int number = tokenizer.consumeInt32(); + value = enumType.findValueByNumber(number); + if (value == null) { + throw tokenizer.parseExceptionPreviousToken( + "Enum type \"" + enumType.getFullName() + + "\" has no value with number " + number + '.'); + } + } else { + final String id = tokenizer.consumeIdentifier(); + value = enumType.findValueByName(id); + if (value == null) { + throw tokenizer.parseExceptionPreviousToken( + "Enum type \"" + enumType.getFullName() + + "\" has no value named \"" + id + "\"."); + } + } - case BOOL: - value = tokenizer.consumeBoolean(); - break; + break; - case STRING: - value = tokenizer.consumeString(); - break; + case MESSAGE: + case GROUP: + throw new RuntimeException("Can't get here."); + } + } - case BYTES: - value = tokenizer.consumeByteString(); - break; + if (field.isRepeated()) { + target.addRepeatedField(field, value); + } else if ((singularOverwritePolicy + == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES) + && target.hasField(field)) { + throw tokenizer.parseExceptionPreviousToken("Non-repeated field \"" + + field.getFullName() + "\" cannot be overwritten."); + } else if ((singularOverwritePolicy + == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES) + && field.getContainingOneof() != null + && target.hasOneof(field.getContainingOneof())) { + Descriptors.OneofDescriptor oneof = field.getContainingOneof(); + throw tokenizer.parseExceptionPreviousToken("Field \"" + + field.getFullName() + "\" is specified along with field \"" + + target.getOneofFieldDescriptor(oneof).getFullName() + + "\", another member of oneof \"" + oneof.getName() + "\"."); + } else { + target.setField(field, value); + } + } - case ENUM: - final EnumDescriptor enumType = field.getEnumType(); - - if (tokenizer.lookingAtInteger()) { - final int number = tokenizer.consumeInt32(); - value = enumType.findValueByNumber(number); - if (value == null) { - throw tokenizer.parseExceptionPreviousToken( - "Enum type \"" + enumType.getFullName() + - "\" has no value with number " + number + '.'); - } - } else { - final String id = tokenizer.consumeIdentifier(); - value = enumType.findValueByName(id); - if (value == null) { - throw tokenizer.parseExceptionPreviousToken( - "Enum type \"" + enumType.getFullName() + - "\" has no value named \"" + id + "\"."); - } - } + /** + * Skips the next field including the field's name and value. + */ + private void skipField(Tokenizer tokenizer) throws ParseException { + if (tokenizer.tryConsume("[")) { + // Extension name. + do { + tokenizer.consumeIdentifier(); + } while (tokenizer.tryConsume(".")); + tokenizer.consume("]"); + } else { + tokenizer.consumeIdentifier(); + } - break; + // Try to guess the type of this field. + // If this field is not a message, there should be a ":" between the + // field name and the field value and also the field value should not + // start with "{" or "<" which indicates the begining of a message body. + // If there is no ":" or there is a "{" or "<" after ":", this field has + // to be a message or the input is ill-formed. + if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && + !tokenizer.lookingAt("{")) { + skipFieldValue(tokenizer); + } else { + skipFieldMessage(tokenizer); + } + // For historical reasons, fields may optionally be separated by commas or + // semicolons. + if (!tokenizer.tryConsume(";")) { + tokenizer.tryConsume(","); + } + } - case MESSAGE: - case GROUP: - throw new RuntimeException("Can't get here."); + /** + * Skips the whole body of a message including the beginning delimeter and + * the ending delimeter. + */ + private void skipFieldMessage(Tokenizer tokenizer) throws ParseException { + final String delimiter; + if (tokenizer.tryConsume("<")) { + delimiter = ">"; + } else { + tokenizer.consume("{"); + delimiter = "}"; } + while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) { + skipField(tokenizer); + } + tokenizer.consume(delimiter); } - if (field.isRepeated()) { - builder.addRepeatedField(field, value); - } else { - builder.setField(field, value); + /** + * Skips a field value. + */ + private void skipFieldValue(Tokenizer tokenizer) throws ParseException { + if (tokenizer.tryConsumeString()) { + while (tokenizer.tryConsumeString()) {} + return; + } + if (!tokenizer.tryConsumeIdentifier() && // includes enum & boolean + !tokenizer.tryConsumeInt64() && // includes int32 + !tokenizer.tryConsumeUInt64() && // includes uint32 + !tokenizer.tryConsumeDouble() && + !tokenizer.tryConsumeFloat()) { + throw tokenizer.parseException( + "Invalid field value: " + tokenizer.currentToken); + } } } @@ -1246,6 +1621,11 @@ public final class TextFormat { // Some of these methods are package-private because Descriptors.java uses // them. + private interface ByteSequence { + int size(); + byte byteAt(int offset); + } + /** * Escapes bytes in the format used in protocol buffer text format, which * is the same as the format used for C string literals. All bytes @@ -1254,7 +1634,7 @@ public final class TextFormat { * which no defined short-hand escape sequence is defined will be escaped * using 3-digit octal sequences. */ - static String escapeBytes(final ByteString input) { + private static String escapeBytes(final ByteSequence input) { final StringBuilder builder = new StringBuilder(input.size()); for (int i = 0; i < input.size(); i++) { final byte b = input.byteAt(i); @@ -1289,6 +1669,39 @@ public final class TextFormat { } /** + * Escapes bytes in the format used in protocol buffer text format, which + * is the same as the format used for C string literals. All bytes + * that are not printable 7-bit ASCII characters are escaped, as well as + * backslash, single-quote, and double-quote characters. Characters for + * which no defined short-hand escape sequence is defined will be escaped + * using 3-digit octal sequences. + */ + static String escapeBytes(final ByteString input) { + return escapeBytes(new ByteSequence() { + public int size() { + return input.size(); + } + public byte byteAt(int offset) { + return input.byteAt(offset); + } + }); + } + + /** + * Like {@link #escapeBytes(ByteString)}, but used for byte array. + */ + static String escapeBytes(final byte[] input) { + return escapeBytes(new ByteSequence() { + public int size() { + return input.length; + } + public byte byteAt(int offset) { + return input[offset]; + } + }); + } + + /** * Un-escape a byte sequence as escaped using * {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with * "\x") are also recognized. @@ -1394,6 +1807,13 @@ public final class TextFormat { } /** + * Escape double quotes and backslashes in a String for unicode output of a message. + */ + public static String escapeDoubleQuotesAndBackslashes(final String input) { + return input.replace("\\", "\\\\").replace("\"", "\\\""); + } + + /** * Un-escape a text string as escaped using {@link #escapeText(String)}. * Two-digit hex escapes (starting with "\x") are also recognized. */ |