diff options
author | Jie Luo <jieluo@google.com> | 2015-07-15 14:31:19 -0700 |
---|---|---|
committer | Jie Luo <jieluo@google.com> | 2015-07-16 11:59:21 -0700 |
commit | b2d2cf8b48c2235e048ea76368e0eda75c7c28d5 (patch) | |
tree | d173ef4fcc79e96c48e28a1c6c02fbc25ef7b98a /src/google/protobuf/io/tokenizer.cc | |
parent | fde6e89f99eda04a4f1b8677bcea07e6c2040405 (diff) |
ignore UTF-8 BOM if it is in the begining of a proto file
Diffstat (limited to 'src/google/protobuf/io/tokenizer.cc')
-rw-r--r-- | src/google/protobuf/io/tokenizer.cc | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc index ef2de300..60bd7957 100644 --- a/src/google/protobuf/io/tokenizer.cc +++ b/src/google/protobuf/io/tokenizer.cc @@ -762,6 +762,15 @@ bool Tokenizer::NextWithComments(string* prev_trailing_comments, next_leading_comments); if (current_.type == TYPE_START) { + // Ignore unicode byte order mark(BOM) if it appears at the file + // beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted. + if (TryConsume((char)0xEF)) { + if (!TryConsume((char)0xBB) || !TryConsume((char)0xBF)) { + AddError("Proto file starts with 0xEF but not UTF-8 BOM. " + "Only UTF-8 is accepted for proto file."); + return false; + } + } collector.DetachFromPrev(); } else { // A comment appearing on the same line must be attached to the previous |