diff options
Diffstat (limited to 'src/sksl/lex/RegexParser.cpp')
-rw-r--r-- | src/sksl/lex/RegexParser.cpp | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/src/sksl/lex/RegexParser.cpp b/src/sksl/lex/RegexParser.cpp new file mode 100644 index 0000000000..154df159db --- /dev/null +++ b/src/sksl/lex/RegexParser.cpp @@ -0,0 +1,176 @@ +/* + * Copyright 2017 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "RegexParser.h" + +#include "LexUtil.h" + +RegexNode RegexParser::parse(std::string source) { + fSource = source; + fIndex = 0; + ASSERT(fStack.size() == 0); + this->regex(); + ASSERT(fStack.size() == 1); + ASSERT(fIndex == source.size()); + return this->pop(); +} + +char RegexParser::peek() { + if (fIndex >= fSource.size()) { + return END; + } + return fSource[fIndex]; +} + +void RegexParser::expect(char c) { + if (this->peek() != c) { + printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek()); + exit(1); + } + ++fIndex; +} + +RegexNode RegexParser::pop() { + RegexNode result = fStack.top(); + fStack.pop(); + return result; +} + +void RegexParser::term() { + switch (this->peek()) { + case '(': this->group(); break; + case '[': this->set(); break; + case '.': this->dot(); break; + default: this->literal(); + } +} + +void RegexParser::quantifiedTerm() { + this->term(); + switch (this->peek()) { + case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break; + case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break; + case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break; + default: break; + } +} + +void RegexParser::sequence() { + this->quantifiedTerm(); + for (;;) { + switch (this->peek()) { + case END: // fall through + case '|': // fall through + case ')': return; + default: + this->sequence(); + RegexNode right = this->pop(); + RegexNode left = this->pop(); + fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right)); + } + } +} + +RegexNode RegexParser::escapeSequence(char c) { + switch (c) { + case 'n': return RegexNode(RegexNode::kChar_Kind, '\n'); + case 'r': return RegexNode(RegexNode::kChar_Kind, '\r'); + case 't': return RegexNode(RegexNode::kChar_Kind, '\t'); + case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r"); + default: return RegexNode(RegexNode::kChar_Kind, c); + } +} + +void RegexParser::literal() { + char c = this->peek(); + if (c == '\\') { + ++fIndex; + fStack.push(this->escapeSequence(peek())); + ++fIndex; + } + else { + fStack.push(RegexNode(RegexNode::kChar_Kind, c)); + ++fIndex; + } +} + +void RegexParser::dot() { + this->expect('.'); + fStack.push(RegexNode(RegexNode::kDot_Kind)); +} + +void RegexParser::group() { + this->expect('('); + this->regex(); + this->expect(')'); +} + +void RegexParser::setItem() { + this->literal(); + if (this->peek() == '-') { + ++fIndex; + if (peek() == ']') { + fStack.push(RegexNode(RegexNode::kChar_Kind, '-')); + } + else { + literal(); + RegexNode end = this->pop(); + ASSERT(end.fKind == RegexNode::kChar_Kind); + RegexNode start = this->pop(); + ASSERT(start.fKind == RegexNode::kChar_Kind); + fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end))); + } + } +} + +void RegexParser::set() { + expect('['); + size_t depth = fStack.size(); + RegexNode set(RegexNode::kCharset_Kind); + if (this->peek() == '^') { + ++fIndex; + set.fPayload.fBool = true; + } + else { + set.fPayload.fBool = false; + } + for (;;) { + switch (this->peek()) { + case ']': + ++fIndex; + while (fStack.size() > depth) { + set.fChildren.push_back(this->pop()); + } + fStack.push(std::move(set)); + return; + case END: + printf("unterminated character set\n"); + exit(1); + default: + this->setItem(); + } + } +} + +void RegexParser::regex() { + this->sequence(); + switch (this->peek()) { + case '|': { + ++fIndex; + this->regex(); + RegexNode right = this->pop(); + RegexNode left = this->pop(); + fStack.push(RegexNode(RegexNode::kOr_Kind, left, right)); + break; + } + case END: // fall through + case ')': + return; + default: + ASSERT(false); + } +} |