aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/io/tokenizer.cc
diff options
context:
space:
mode:
authorGravatar jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2014-07-18 00:47:59 +0000
committerGravatar jieluo@google.com <jieluo@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2014-07-18 00:47:59 +0000
commit4de8f55113007fdc8e34107950e605fc0209d465 (patch)
tree92b7da8757a7740d9e1f2d3ead233542947d8c8c /src/google/protobuf/io/tokenizer.cc
parentc5553a3d18f80132b9079c5504bc0aa1f7f950a0 (diff)
down integrate to svn
Diffstat (limited to 'src/google/protobuf/io/tokenizer.cc')
-rw-r--r--src/google/protobuf/io/tokenizer.cc44
1 files changed, 40 insertions, 4 deletions
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index a022b71d..d149305a 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -91,6 +91,7 @@
#include <google/protobuf/io/tokenizer.h>
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/stubs/stringprintf.h>
+#include <google/protobuf/io/strtod.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/stubs/strutil.h>
#include <google/protobuf/stubs/stl_util.h>
@@ -195,7 +196,9 @@ Tokenizer::Tokenizer(ZeroCopyInputStream* input,
record_target_(NULL),
record_start_(-1),
allow_f_after_float_(false),
- comment_style_(CPP_COMMENT_STYLE) {
+ comment_style_(CPP_COMMENT_STYLE),
+ require_space_after_number_(true),
+ allow_multiline_strings_(false) {
current_.line = 0;
current_.column = 0;
@@ -350,9 +353,16 @@ void Tokenizer::ConsumeString(char delimiter) {
while (true) {
switch (current_char_) {
case '\0':
- case '\n': {
- AddError("String literals cannot cross line boundaries.");
+ AddError("Unexpected end of string.");
return;
+
+ case '\n': {
+ if (!allow_multiline_strings_) {
+ AddError("String literals cannot cross line boundaries.");
+ return;
+ }
+ NextChar();
+ break;
}
case '\\': {
@@ -449,7 +459,7 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
}
}
- if (LookingAt<Letter>()) {
+ if (LookingAt<Letter>() && require_space_after_number_) {
AddError("Need space between number and identifier.");
} else if (current_char_ == '.') {
if (is_float) {
@@ -618,6 +628,12 @@ bool Tokenizer::Next() {
ConsumeString('\'');
current_.type = TYPE_STRING;
} else {
+ // Check if the high order bit is set.
+ if (current_char_ & 0x80) {
+ error_collector_->AddError(line_, column_,
+ StringPrintf("Interpreting non ascii codepoint %d.",
+ static_cast<unsigned char>(current_char_)));
+ }
NextChar();
current_.type = TYPE_SYMBOL;
}
@@ -1086,6 +1102,26 @@ void Tokenizer::ParseStringAppend(const string& text, string* output) {
}
}
+template<typename CharacterClass>
+static bool AllInClass(const string& s) {
+ for (int i = 0; i < s.size(); ++i) {
+ if (!CharacterClass::InClass(s[i]))
+ return false;
+ }
+ return true;
+}
+
+bool Tokenizer::IsIdentifier(const string& text) {
+ // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
+ if (text.size() == 0)
+ return false;
+ if (!Letter::InClass(text.at(0)))
+ return false;
+ if (!AllInClass<Alphanumeric>(text.substr(1)))
+ return false;
+ return true;
+}
+
} // namespace io
} // namespace protobuf
} // namespace google