aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/hlo_lexer.cc
diff options
context:
space:
mode:
authorGravatar Mark Heffernan <meheff@google.com>2018-08-13 11:58:25 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-13 12:08:21 -0700
commit881f58d20cc159817eadefa6f8e1b7d87c85d01f (patch)
treee66a67e635afb9a788336a67f4b97300e468f82b /tensorflow/compiler/xla/service/hlo_lexer.cc
parent55b327916aa9b1f558242869412ae411d00d20ee (diff)
Enable arbitrary comments in HLO in both /*...*/ and // forms.
Allow '/*...*/' and '//' comments to appear anywhere in HLO text, including multi-line comments. Previously only '/*...*/' comments were only allowed and only in certain locations in a serialized Literal. PiperOrigin-RevId: 208519204
Diffstat (limited to 'tensorflow/compiler/xla/service/hlo_lexer.cc')
-rw-r--r--tensorflow/compiler/xla/service/hlo_lexer.cc55
1 files changed, 41 insertions, 14 deletions
diff --git a/tensorflow/compiler/xla/service/hlo_lexer.cc b/tensorflow/compiler/xla/service/hlo_lexer.cc
index 71b44507cc..8e0d38b6a6 100644
--- a/tensorflow/compiler/xla/service/hlo_lexer.cc
+++ b/tensorflow/compiler/xla/service/hlo_lexer.cc
@@ -143,8 +143,47 @@ TokKind HloLexer::LexToken() {
return TokKind::kLparen;
case ')':
return TokKind::kRparen;
- case '/':
- return LexComment();
+ case '/': {
+ if (PeekCurrentChar() == '*') {
+ // This is the start of a /*...*/ delimited comment. Save the current
+ // location in case the comment is unterminated so the error message
+ // will point to the beginning of the comment.
+ const char* comment_start = current_ptr_;
+ current_ptr_++;
+ // Advance until '*/' is found.
+ while (true) {
+ int current = GetNextChar();
+ if (current == '*' && PeekCurrentChar() == '/') {
+ // End of comment.
+ current_ptr_++;
+ break;
+ }
+ if (current == kEOF) {
+ // Unterminated comment.
+ current_ptr_ = comment_start;
+ return TokKind::kError;
+ }
+ }
+ // Return no token for the comment. Keep lexing.
+ continue;
+ } else if (PeekCurrentChar() == '/') {
+ // This is the start of a '//' delimited comment. Throw away
+ // everything until end of line or file. The end-of-line character(s)
+ // are left unlexed in the buffer which is harmless because these are
+ // skipped later by the lexer. This approach enables support for
+ // different end-of-line encodings.
+ while (true) {
+ int current = PeekCurrentChar();
+ if (current == kEOF || current == '\n' || current == '\r') {
+ break;
+ }
+ current_ptr_++;
+ }
+ continue;
+ }
+ // A lone '/' is an error.
+ return TokKind::kError;
+ }
case '"':
return LexString();
}
@@ -357,16 +396,6 @@ tensorflow::StringPiece HloLexer::GetLine(LocTy loc) const {
return StringPieceFromPointers(start, end);
}
-TokKind HloLexer::LexComment() {
- auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
- static LazyRE2 comment_pattern = {R"(\/\*.*?\*\/)"};
- if (RE2::Consume(&consumable, *comment_pattern)) {
- current_ptr_ = consumable.begin();
- return TokKind::kComment;
- }
- return TokKind::kError;
-}
-
// Lexes quoted string with escaping characters. If matched, the quoted string
// will be unescaped and stored to str_val_.
TokKind HloLexer::LexString() {
@@ -412,8 +441,6 @@ string TokKindToString(TokKind kind) {
return "kRparen";
case TokKind::kArrow:
return "kArrow";
- case TokKind::kComment:
- return "kComment";
case TokKind::kw_HloModule:
return "kw_HloModule";
case TokKind::kw_ENTRY: