aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sksl/lex/RegexParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/sksl/lex/RegexParser.cpp')
-rw-r--r--src/sksl/lex/RegexParser.cpp176
1 files changed, 176 insertions, 0 deletions
diff --git a/src/sksl/lex/RegexParser.cpp b/src/sksl/lex/RegexParser.cpp
new file mode 100644
index 0000000000..154df159db
--- /dev/null
+++ b/src/sksl/lex/RegexParser.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "RegexParser.h"
+
+#include "LexUtil.h"
+
+RegexNode RegexParser::parse(std::string source) {
+ fSource = source;
+ fIndex = 0;
+ ASSERT(fStack.size() == 0);
+ this->regex();
+ ASSERT(fStack.size() == 1);
+ ASSERT(fIndex == source.size());
+ return this->pop();
+}
+
+char RegexParser::peek() {
+ if (fIndex >= fSource.size()) {
+ return END;
+ }
+ return fSource[fIndex];
+}
+
+void RegexParser::expect(char c) {
+ if (this->peek() != c) {
+ printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
+ exit(1);
+ }
+ ++fIndex;
+}
+
+RegexNode RegexParser::pop() {
+ RegexNode result = fStack.top();
+ fStack.pop();
+ return result;
+}
+
+void RegexParser::term() {
+ switch (this->peek()) {
+ case '(': this->group(); break;
+ case '[': this->set(); break;
+ case '.': this->dot(); break;
+ default: this->literal();
+ }
+}
+
+void RegexParser::quantifiedTerm() {
+ this->term();
+ switch (this->peek()) {
+ case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break;
+ case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break;
+ case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
+ default: break;
+ }
+}
+
+void RegexParser::sequence() {
+ this->quantifiedTerm();
+ for (;;) {
+ switch (this->peek()) {
+ case END: // fall through
+ case '|': // fall through
+ case ')': return;
+ default:
+ this->sequence();
+ RegexNode right = this->pop();
+ RegexNode left = this->pop();
+ fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
+ }
+ }
+}
+
+RegexNode RegexParser::escapeSequence(char c) {
+ switch (c) {
+ case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
+ case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
+ case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
+ case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
+ default: return RegexNode(RegexNode::kChar_Kind, c);
+ }
+}
+
+void RegexParser::literal() {
+ char c = this->peek();
+ if (c == '\\') {
+ ++fIndex;
+ fStack.push(this->escapeSequence(peek()));
+ ++fIndex;
+ }
+ else {
+ fStack.push(RegexNode(RegexNode::kChar_Kind, c));
+ ++fIndex;
+ }
+}
+
+void RegexParser::dot() {
+ this->expect('.');
+ fStack.push(RegexNode(RegexNode::kDot_Kind));
+}
+
+void RegexParser::group() {
+ this->expect('(');
+ this->regex();
+ this->expect(')');
+}
+
+void RegexParser::setItem() {
+ this->literal();
+ if (this->peek() == '-') {
+ ++fIndex;
+ if (peek() == ']') {
+ fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
+ }
+ else {
+ literal();
+ RegexNode end = this->pop();
+ ASSERT(end.fKind == RegexNode::kChar_Kind);
+ RegexNode start = this->pop();
+ ASSERT(start.fKind == RegexNode::kChar_Kind);
+ fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
+ }
+ }
+}
+
+void RegexParser::set() {
+ expect('[');
+ size_t depth = fStack.size();
+ RegexNode set(RegexNode::kCharset_Kind);
+ if (this->peek() == '^') {
+ ++fIndex;
+ set.fPayload.fBool = true;
+ }
+ else {
+ set.fPayload.fBool = false;
+ }
+ for (;;) {
+ switch (this->peek()) {
+ case ']':
+ ++fIndex;
+ while (fStack.size() > depth) {
+ set.fChildren.push_back(this->pop());
+ }
+ fStack.push(std::move(set));
+ return;
+ case END:
+ printf("unterminated character set\n");
+ exit(1);
+ default:
+ this->setItem();
+ }
+ }
+}
+
+void RegexParser::regex() {
+ this->sequence();
+ switch (this->peek()) {
+ case '|': {
+ ++fIndex;
+ this->regex();
+ RegexNode right = this->pop();
+ RegexNode left = this->pop();
+ fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
+ break;
+ }
+ case END: // fall through
+ case ')':
+ return;
+ default:
+ ASSERT(false);
+ }
+}