aboutsummaryrefslogtreecommitdiffhomepage
path: root/csharp/src/ProtocolBuffers/TextTokenizer.cs
diff options
context:
space:
mode:
Diffstat (limited to 'csharp/src/ProtocolBuffers/TextTokenizer.cs')
-rw-r--r--csharp/src/ProtocolBuffers/TextTokenizer.cs501
1 files changed, 0 insertions, 501 deletions
diff --git a/csharp/src/ProtocolBuffers/TextTokenizer.cs b/csharp/src/ProtocolBuffers/TextTokenizer.cs
deleted file mode 100644
index 5bb27fd0..00000000
--- a/csharp/src/ProtocolBuffers/TextTokenizer.cs
+++ /dev/null
@@ -1,501 +0,0 @@
-#region Copyright notice and license
-
-// Protocol Buffers - Google's data interchange format
-// Copyright 2008 Google Inc. All rights reserved.
-// http://github.com/jskeet/dotnet-protobufs/
-// Original C++/Java/Python code:
-// http://code.google.com/p/protobuf/
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#endregion
-
-using System;
-using System.Globalization;
-using System.Text.RegularExpressions;
-
-namespace Google.ProtocolBuffers
-{
- /// <summary>
- /// Represents a stream of tokens parsed from a string.
- /// </summary>
- internal sealed class TextTokenizer
- {
- private readonly string text;
- private string currentToken;
-
- /// <summary>
- /// The character index within the text to perform the next regex match at.
- /// </summary>
- private int matchPos = 0;
-
- /// <summary>
- /// The character index within the text at which the current token begins.
- /// </summary>
- private int pos = 0;
-
- /// <summary>
- /// The line number of the current token.
- /// </summary>
- private int line = 0;
-
- /// <summary>
- /// The column number of the current token.
- /// </summary>
- private int column = 0;
-
- /// <summary>
- /// The line number of the previous token.
- /// </summary>
- private int previousLine = 0;
-
- /// <summary>
- /// The column number of the previous token.
- /// </summary>
- private int previousColumn = 0;
-
- // Note: atomic groups used to mimic possessive quantifiers in Java in both of these regexes
- internal static readonly Regex WhitespaceAndCommentPattern = new Regex("\\G(?>(\\s|(#.*$))+)",
- FrameworkPortability.
- CompiledRegexWhereAvailable |
- RegexOptions.Multiline);
-
- private static readonly Regex TokenPattern = new Regex(
- "\\G[a-zA-Z_](?>[0-9a-zA-Z_+-]*)|" + // an identifier
- "\\G[0-9+-](?>[0-9a-zA-Z_.+-]*)|" + // a number
- "\\G\"(?>([^\"\\\n\\\\]|\\\\.)*)(\"|\\\\?$)|" + // a double-quoted string
- "\\G\'(?>([^\"\\\n\\\\]|\\\\.)*)(\'|\\\\?$)", // a single-quoted string
- FrameworkPortability.CompiledRegexWhereAvailable | RegexOptions.Multiline);
-
- private static readonly Regex DoubleInfinity = new Regex("^-?inf(inity)?$",
- FrameworkPortability.CompiledRegexWhereAvailable |
- RegexOptions.IgnoreCase);
-
- private static readonly Regex FloatInfinity = new Regex("^-?inf(inity)?f?$",
- FrameworkPortability.CompiledRegexWhereAvailable |
- RegexOptions.IgnoreCase);
-
- private static readonly Regex FloatNan = new Regex("^nanf?$",
- FrameworkPortability.CompiledRegexWhereAvailable |
- RegexOptions.IgnoreCase);
-
- /** Construct a tokenizer that parses tokens from the given text. */
-
- public TextTokenizer(string text)
- {
- this.text = text;
- SkipWhitespace();
- NextToken();
- }
-
- /// <summary>
- /// Are we at the end of the input?
- /// </summary>
- public bool AtEnd
- {
- get { return currentToken.Length == 0; }
- }
-
- /// <summary>
- /// Advances to the next token.
- /// </summary>
- public void NextToken()
- {
- previousLine = line;
- previousColumn = column;
-
- // Advance the line counter to the current position.
- while (pos < matchPos)
- {
- if (text[pos] == '\n')
- {
- ++line;
- column = 0;
- }
- else
- {
- ++column;
- }
- ++pos;
- }
-
- // Match the next token.
- if (matchPos == text.Length)
- {
- // EOF
- currentToken = "";
- }
- else
- {
- Match match = TokenPattern.Match(text, matchPos);
- if (match.Success)
- {
- currentToken = match.Value;
- matchPos += match.Length;
- }
- else
- {
- // Take one character.
- currentToken = text[matchPos].ToString();
- matchPos++;
- }
-
- SkipWhitespace();
- }
- }
-
- /// <summary>
- /// Skip over any whitespace so that matchPos starts at the next token.
- /// </summary>
- private void SkipWhitespace()
- {
- Match match = WhitespaceAndCommentPattern.Match(text, matchPos);
- if (match.Success)
- {
- matchPos += match.Length;
- }
- }
-
- /// <summary>
- /// If the next token exactly matches the given token, consume it and return
- /// true. Otherwise, return false without doing anything.
- /// </summary>
- public bool TryConsume(string token)
- {
- if (currentToken == token)
- {
- NextToken();
- return true;
- }
- return false;
- }
-
- /*
- * If the next token exactly matches {@code token}, consume it. Otherwise,
- * throw a {@link ParseException}.
- */
-
- /// <summary>
- /// If the next token exactly matches the specified one, consume it.
- /// Otherwise, throw a FormatException.
- /// </summary>
- /// <param name="token"></param>
- public void Consume(string token)
- {
- if (!TryConsume(token))
- {
- throw CreateFormatException("Expected \"" + token + "\".");
- }
- }
-
- /// <summary>
- /// Returns true if the next token is an integer, but does not consume it.
- /// </summary>
- public bool LookingAtInteger()
- {
- if (currentToken.Length == 0)
- {
- return false;
- }
-
- char c = currentToken[0];
- return ('0' <= c && c <= '9') || c == '-' || c == '+';
- }
-
- /// <summary>
- /// If the next token is an identifier, consume it and return its value.
- /// Otherwise, throw a FormatException.
- /// </summary>
- public string ConsumeIdentifier()
- {
- foreach (char c in currentToken)
- {
- if (('a' <= c && c <= 'z') ||
- ('A' <= c && c <= 'Z') ||
- ('0' <= c && c <= '9') ||
- (c == '_') || (c == '.'))
- {
- // OK
- }
- else
- {
- throw CreateFormatException("Expected identifier.");
- }
- }
-
- string result = currentToken;
- NextToken();
- return result;
- }
-
- /// <summary>
- /// If the next token is a 32-bit signed integer, consume it and return its
- /// value. Otherwise, throw a FormatException.
- /// </summary>
- public int ConsumeInt32()
- {
- try
- {
- int result = TextFormat.ParseInt32(currentToken);
- NextToken();
- return result;
- }
- catch (FormatException e)
- {
- throw CreateIntegerParseException(e);
- }
- }
-
- /// <summary>
- /// If the next token is a 32-bit unsigned integer, consume it and return its
- /// value. Otherwise, throw a FormatException.
- /// </summary>
- public uint ConsumeUInt32()
- {
- try
- {
- uint result = TextFormat.ParseUInt32(currentToken);
- NextToken();
- return result;
- }
- catch (FormatException e)
- {
- throw CreateIntegerParseException(e);
- }
- }
-
- /// <summary>
- /// If the next token is a 64-bit signed integer, consume it and return its
- /// value. Otherwise, throw a FormatException.
- /// </summary>
- public long ConsumeInt64()
- {
- try
- {
- long result = TextFormat.ParseInt64(currentToken);
- NextToken();
- return result;
- }
- catch (FormatException e)
- {
- throw CreateIntegerParseException(e);
- }
- }
-
- /// <summary>
- /// If the next token is a 64-bit unsigned integer, consume it and return its
- /// value. Otherwise, throw a FormatException.
- /// </summary>
- public ulong ConsumeUInt64()
- {
- try
- {
- ulong result = TextFormat.ParseUInt64(currentToken);
- NextToken();
- return result;
- }
- catch (FormatException e)
- {
- throw CreateIntegerParseException(e);
- }
- }
-
- /// <summary>
- /// If the next token is a double, consume it and return its value.
- /// Otherwise, throw a FormatException.
- /// </summary>
- public double ConsumeDouble()
- {
- // We need to parse infinity and nan separately because
- // double.Parse() does not accept "inf", "infinity", or "nan".
- if (DoubleInfinity.IsMatch(currentToken))
- {
- bool negative = currentToken.StartsWith("-");
- NextToken();
- return negative ? double.NegativeInfinity : double.PositiveInfinity;
- }
- if (currentToken.Equals("nan", StringComparison.OrdinalIgnoreCase))
- {
- NextToken();
- return Double.NaN;
- }
-
- try
- {
- double result = double.Parse(currentToken, FrameworkPortability.InvariantCulture);
- NextToken();
- return result;
- }
- catch (FormatException e)
- {
- throw CreateFloatParseException(e);
- }
- catch (OverflowException e)
- {
- throw CreateFloatParseException(e);
- }
- }
-
- /// <summary>
- /// If the next token is a float, consume it and return its value.
- /// Otherwise, throw a FormatException.
- /// </summary>
- public float ConsumeFloat()
- {
- // We need to parse infinity and nan separately because
- // Float.parseFloat() does not accept "inf", "infinity", or "nan".
- if (FloatInfinity.IsMatch(currentToken))
- {
- bool negative = currentToken.StartsWith("-");
- NextToken();
- return negative ? float.NegativeInfinity : float.PositiveInfinity;
- }
- if (FloatNan.IsMatch(currentToken))
- {
- NextToken();
- return float.NaN;
- }
-
- if (currentToken.EndsWith("f"))
- {
- currentToken = currentToken.TrimEnd('f');
- }
-
- try
- {
- float result = float.Parse(currentToken, FrameworkPortability.InvariantCulture);
- NextToken();
- return result;
- }
- catch (FormatException e)
- {
- throw CreateFloatParseException(e);
- }
- catch (OverflowException e)
- {
- throw CreateFloatParseException(e);
- }
- }
-
- /// <summary>
- /// If the next token is a Boolean, consume it and return its value.
- /// Otherwise, throw a FormatException.
- /// </summary>
- public bool ConsumeBoolean()
- {
- if (currentToken == "true")
- {
- NextToken();
- return true;
- }
- if (currentToken == "false")
- {
- NextToken();
- return false;
- }
- throw CreateFormatException("Expected \"true\" or \"false\".");
- }
-
- /// <summary>
- /// If the next token is a string, consume it and return its (unescaped) value.
- /// Otherwise, throw a FormatException.
- /// </summary>
- public string ConsumeString()
- {
- return ConsumeByteString().ToStringUtf8();
- }
-
- /// <summary>
- /// If the next token is a string, consume it, unescape it as a
- /// ByteString and return it. Otherwise, throw a FormatException.
- /// </summary>
- public ByteString ConsumeByteString()
- {
- char quote = currentToken.Length > 0 ? currentToken[0] : '\0';
- if (quote != '\"' && quote != '\'')
- {
- throw CreateFormatException("Expected string.");
- }
-
- if (currentToken.Length < 2 ||
- currentToken[currentToken.Length - 1] != quote)
- {
- throw CreateFormatException("String missing ending quote.");
- }
-
- try
- {
- string escaped = currentToken.Substring(1, currentToken.Length - 2);
- ByteString result = TextFormat.UnescapeBytes(escaped);
- NextToken();
- return result;
- }
- catch (FormatException e)
- {
- throw CreateFormatException(e.Message);
- }
- }
-
- /// <summary>
- /// Returns a format exception with the current line and column numbers
- /// in the description, suitable for throwing.
- /// </summary>
- public FormatException CreateFormatException(string description)
- {
- // Note: People generally prefer one-based line and column numbers.
- return new FormatException((line + 1) + ":" + (column + 1) + ": " + description);
- }
-
- /// <summary>
- /// Returns a format exception with the line and column numbers of the
- /// previous token in the description, suitable for throwing.
- /// </summary>
- public FormatException CreateFormatExceptionPreviousToken(string description)
- {
- // Note: People generally prefer one-based line and column numbers.
- return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
- }
-
- /// <summary>
- /// Constructs an appropriate FormatException for the given existing exception
- /// when trying to parse an integer.
- /// </summary>
- private FormatException CreateIntegerParseException(FormatException e)
- {
- return CreateFormatException("Couldn't parse integer: " + e.Message);
- }
-
- /// <summary>
- /// Constructs an appropriate FormatException for the given existing exception
- /// when trying to parse a float or double.
- /// </summary>
- private FormatException CreateFloatParseException(Exception e)
- {
- return CreateFormatException("Couldn't parse number: " + e.Message);
- }
- }
-} \ No newline at end of file