aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/io
diff options
context:
space:
mode:
authorGravatar Jie Luo <jieluo@google.com>2015-07-15 14:31:19 -0700
committerGravatar Jie Luo <jieluo@google.com>2015-07-16 11:59:21 -0700
commitb2d2cf8b48c2235e048ea76368e0eda75c7c28d5 (patch)
treed173ef4fcc79e96c48e28a1c6c02fbc25ef7b98a /src/google/protobuf/io
parentfde6e89f99eda04a4f1b8677bcea07e6c2040405 (diff)
ignore UTF-8 BOM if it is in the begining of a proto file
Diffstat (limited to 'src/google/protobuf/io')
-rw-r--r--src/google/protobuf/io/tokenizer.cc9
1 files changed, 9 insertions, 0 deletions
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index ef2de300..60bd7957 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -762,6 +762,15 @@ bool Tokenizer::NextWithComments(string* prev_trailing_comments,
next_leading_comments);
if (current_.type == TYPE_START) {
+ // Ignore unicode byte order mark(BOM) if it appears at the file
+ // beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted.
+ if (TryConsume((char)0xEF)) {
+ if (!TryConsume((char)0xBB) || !TryConsume((char)0xBF)) {
+ AddError("Proto file starts with 0xEF but not UTF-8 BOM. "
+ "Only UTF-8 is accepted for proto file.");
+ return false;
+ }
+ }
collector.DetachFromPrev();
} else {
// A comment appearing on the same line must be attached to the previous