aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sksl/lex
diff options
context:
space:
mode:
authorGravatar Ethan Nicholas <ethannicholas@google.com>2017-09-07 09:39:50 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-09-07 13:57:51 +0000
commitca82a922e3d081ae83c940f4fd3ccc8cc6f0916f (patch)
tree480d95845bf2ce5e24d26ff30ab96cdeb3ee6315 /src/sksl/lex
parente5756eccc41fabc421a7fe8b00807c3dcd257b9f (diff)
Revert "Revert "Initial checkin of SkSL lexical analyzer generator (not actually in use yet).""
This reverts commit 3ed4781ee1bd5af9b3ee2623e5356e86ce36e812. Bug: skia: Change-Id: If0de7ca17c4da8000d3526a73b71be6ee34ce060 Reviewed-on: https://skia-review.googlesource.com/43561 Reviewed-by: Ethan Nicholas <ethannicholas@google.com> Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
Diffstat (limited to 'src/sksl/lex')
-rw-r--r--src/sksl/lex/DFA.h32
-rw-r--r--src/sksl/lex/DFAState.h68
-rw-r--r--src/sksl/lex/LexUtil.h14
-rw-r--r--src/sksl/lex/Main.cpp197
-rw-r--r--src/sksl/lex/NFA.cpp41
-rw-r--r--src/sksl/lex/NFA.h54
-rw-r--r--src/sksl/lex/NFAState.h150
-rw-r--r--src/sksl/lex/NFAtoDFA.h130
-rw-r--r--src/sksl/lex/RegexNode.cpp117
-rw-r--r--src/sksl/lex/RegexNode.h78
-rw-r--r--src/sksl/lex/RegexParser.cpp176
-rw-r--r--src/sksl/lex/RegexParser.h89
-rw-r--r--src/sksl/lex/sksl.lex95
13 files changed, 1241 insertions, 0 deletions
diff --git a/src/sksl/lex/DFA.h b/src/sksl/lex/DFA.h
new file mode 100644
index 0000000000..51e85ca2f8
--- /dev/null
+++ b/src/sksl/lex/DFA.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_DFA
+#define SKSL_DFA
+
+#include <string>
+#include <vector>
+
+/**
+ * Tables representing a deterministic finite automaton for matching regular expressions.
+ */
+struct DFA {
+ DFA(std::vector<std::vector<int>> transitions, std::vector<int> accepts)
+ : fTransitions(transitions)
+ , fAccepts(accepts) {}
+
+ static constexpr char END_CHAR = 126;
+
+ // one row per character, one column per state
+ std::vector<std::vector<int>> fTransitions;
+
+ // contains, for each state, the token id we should report when matching ends in that state (-1
+ // for no match)
+ std::vector<int> fAccepts;
+};
+
+#endif
diff --git a/src/sksl/lex/DFAState.h b/src/sksl/lex/DFAState.h
new file mode 100644
index 0000000000..b2c89fd4cb
--- /dev/null
+++ b/src/sksl/lex/DFAState.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_DFASTATE
+#define SKSL_DFASTATE
+
+struct DFAState {
+ struct Label {
+ std::vector<int> fStates;
+
+ Label(std::vector<int> states)
+ : fStates(std::move(states)) {}
+
+ bool operator==(const Label& other) const {
+ return fStates == other.fStates;
+ }
+
+ bool operator!=(const Label& other) const {
+ return !(*this == other);
+ }
+
+ std::string description() const {
+ std::string result = "<";
+ const char* separator = "";
+ for (int s : fStates) {
+ result += separator;
+ result += std::to_string(s);
+ separator = ", ";
+ }
+ result += ">";
+ return result;
+ }
+ };
+
+ DFAState()
+ : fId(-1)
+ , fLabel({}) {}
+
+ DFAState(int id, Label label)
+ : fId(id)
+ , fLabel(std::move(label)) {}
+
+ DFAState(const DFAState& other) = delete;
+
+ int fId;
+
+ Label fLabel;
+
+ bool fIsScanned = false;
+};
+
+namespace std {
+ template<> struct hash<DFAState::Label> {
+ size_t operator()(const DFAState::Label& s) const {
+ int result = 0;
+ for (int i : s.fStates) {
+ result = result * 101 + i;
+ }
+ return result;
+ }
+ };
+} // namespace
+
+#endif
diff --git a/src/sksl/lex/LexUtil.h b/src/sksl/lex/LexUtil.h
new file mode 100644
index 0000000000..d2c462b0ed
--- /dev/null
+++ b/src/sksl/lex/LexUtil.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_LEXUTIL
+#define SKSL_LEXUTIL
+
+#define ABORT(...) (fprintf(stderr, __VA_ARGS__), abort())
+#define ASSERT(x) (void)((x) || (ABORT("failed assert(%s): %s:%d\n", #x, __FILE__, __LINE__), 0))
+
+#endif
diff --git a/src/sksl/lex/Main.cpp b/src/sksl/lex/Main.cpp
new file mode 100644
index 0000000000..a5c7320356
--- /dev/null
+++ b/src/sksl/lex/Main.cpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "NFAtoDFA.h"
+#include "RegexParser.h"
+
+#include <fstream>
+#include <sstream>
+#include <string>
+
+/**
+ * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
+ * file is a text file with one token definition per line. Each line is of the form:
+ * <TOKEN_NAME> = <pattern>
+ * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
+ */
+
+static constexpr const char* HEADER =
+ "/*\n"
+ " * Copyright 2017 Google Inc.\n"
+ " *\n"
+ " * Use of this source code is governed by a BSD-style license that can be\n"
+ " * found in the LICENSE file.\n"
+ " */\n"
+ "/*****************************************************************************************\n"
+ " ******************** This file was generated by sksllex. Do not edit. *******************\n"
+ " *****************************************************************************************/\n";
+
+void writeH(const DFA& dfa, const char* lexer, const char* token,
+ const std::vector<std::string>& tokens, const char* hPath) {
+ std::ofstream out(hPath);
+ ASSERT(out.good());
+ out << HEADER;
+ out << "#ifndef SKSL_" << lexer << "\n";
+ out << "#define SKSL_" << lexer << "\n";
+ out << "#include <cstddef>\n";
+ out << "#include <cstdint>\n";
+ out << "namespace SkSL {\n";
+ out << "\n";
+ out << "struct " << token << " {\n";
+ out << " enum Kind {\n";
+ for (const std::string& t : tokens) {
+ out << " #undef " << t << "\n";
+ out << " " << t << ",\n";
+ }
+ out << " };\n";
+ out << "\n";
+ out << " " << token << "()\n";
+ out << " : fKind(Kind::INVALID)\n";
+ out << " , fOffset(-1)\n";
+ out << " , fLength(-1) {}\n";
+ out << "\n";
+ out << " " << token << "(Kind kind, int offset, int length)\n";
+ out << " : fKind(kind)\n";
+ out << " , fOffset(offset)\n";
+ out << " , fLength(length) {}\n";
+ out << "\n";
+ out << " Kind fKind;\n";
+ out << " int fOffset;\n";
+ out << " int fLength;\n";
+ out << "};\n";
+ out << "\n";
+ out << "class " << lexer << " {\n";
+ out << "public:\n";
+ out << " void start(const char* text, size_t length) {\n";
+ out << " fText = text;\n";
+ out << " fLength = length;\n";
+ out << " fOffset = 0;\n";
+ out << " }\n";
+ out << "\n";
+ out << " " << token << " next();\n";
+ out << "\n";
+ out << "private:\n";
+ out << " const char* fText;\n";
+ out << " int fLength;\n";
+ out << " int fOffset;\n";
+ out << "};\n";
+ out << "\n";
+ out << "} // namespace\n";
+ out << "#endif\n";
+}
+
+void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
+ const char* cppPath) {
+ std::ofstream out(cppPath);
+ ASSERT(out.good());
+ out << HEADER;
+ out << "#include \"" << include << "\"\n";
+ out << "\n";
+ out << "namespace SkSL {\n";
+ out << "\n";
+
+ size_t states = 0;
+ for (const auto& row : dfa.fTransitions) {
+ states = std::max(states, row.size());
+ }
+ out << "static int16_t transitions[" << DFA::END_CHAR + 1 << "][" << states << "] = {\n";
+ for (char c = 0; c <= DFA::END_CHAR; ++c) {
+ out << " {";
+ for (size_t i = 0; i < states; ++i) {
+ if ((size_t) c < dfa.fTransitions.size() && i < dfa.fTransitions[c].size()) {
+ out << " " << dfa.fTransitions[c][i] << ",";
+ } else {
+ out << " 0,";
+ }
+ }
+ out << " },\n";
+ }
+ out << "};\n";
+ out << "\n";
+
+ out << "static int16_t accepts[" << states << "] = {";
+ for (size_t i = 0; i < states; ++i) {
+ if (i < dfa.fAccepts.size()) {
+ out << " " << dfa.fAccepts[i] << ",";
+ } else {
+ out << " -1,";
+ }
+ }
+ out << " };\n";
+ out << "\n";
+
+ out << token << " " << lexer << "::next() {\n";;
+ out << " int startOffset = fOffset;\n";
+ out << " if (startOffset == fLength) {\n";
+ out << " return " << token << "(" << token << "::END_OF_FILE, startOffset, 0);\n";
+ out << " }\n";
+ out << " int offset = startOffset;\n";
+ out << " int state = 1;\n";
+ out << " " << token << "::Kind lastAccept = " << token << "::Kind::INVALID;\n";
+ out << " int lastAcceptEnd = startOffset + 1;\n";
+ out << " while (offset < fLength) {\n";
+ out << " state = transitions[(int) fText[offset]][state];\n";
+ out << " ++offset;\n";
+ out << " if (!state) {\n";
+ out << " break;\n";
+ out << " }\n";
+ out << " if (accepts[state]) {\n";
+ out << " lastAccept = (" << token << "::Kind) accepts[state];\n";
+ out << " lastAcceptEnd = offset;\n";
+ out << " }\n";
+ out << " }\n";
+ out << " fOffset = lastAcceptEnd;\n";
+ out << " return " << token << "(lastAccept, startOffset, lastAcceptEnd - startOffset);\n";
+ out << "}\n";
+ out << "\n";
+ out << "} // namespace\n";
+}
+
+void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
+ const char* cppPath) {
+ NFA nfa;
+ std::vector<std::string> tokens;
+ tokens.push_back("END_OF_FILE");
+ std::string line;
+ std::ifstream in(inPath);
+ while (std::getline(in, line)) {
+ std::istringstream split(line);
+ std::string name, delimiter, pattern;
+ if (split >> name >> delimiter >> pattern) {
+ ASSERT(split.eof());
+ ASSERT(name != "");
+ ASSERT(delimiter == "=");
+ ASSERT(pattern != "");
+ tokens.push_back(name);
+ if (pattern[0] == '"') {
+ ASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
+ RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
+ for (size_t i = 2; i < pattern.size() - 1; ++i) {
+ node = RegexNode(RegexNode::kConcat_Kind, node,
+ RegexNode(RegexNode::kChar_Kind, pattern[i]));
+ }
+ nfa.addRegex(node);
+ }
+ else {
+ nfa.addRegex(RegexParser().parse(pattern));
+ }
+ }
+ }
+ NFAtoDFA converter(&nfa);
+ DFA dfa = converter.convert();
+ writeH(dfa, lexer, token, tokens, hPath);
+ writeCPP(dfa, lexer, token, (std::string("SkSL") + lexer + ".h").c_str(), cppPath);
+}
+
+int main(int argc, const char** argv) {
+ if (argc != 6) {
+ printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
+ exit(1);
+ }
+ process(argv[1], argv[2], argv[3], argv[4], argv[5]);
+ return 0;
+}
diff --git a/src/sksl/lex/NFA.cpp b/src/sksl/lex/NFA.cpp
new file mode 100644
index 0000000000..d1eb7497b3
--- /dev/null
+++ b/src/sksl/lex/NFA.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "NFA.h"
+
+int NFA::match(std::string s) const {
+ std::vector<int> states = fStartStates;
+ for (size_t i = 0; i < s.size(); ++i) {
+ std::vector<int> next;
+ for (int id : states) {
+ if (fStates[id].accept(s[i])) {
+ for (int nextId : fStates[id].fNext) {
+ if (fStates[nextId].fKind != NFAState::kRemapped_Kind) {
+ next.push_back(nextId);
+ } else {
+ next.insert(next.end(), fStates[nextId].fData.begin(),
+ fStates[nextId].fData.end());
+ }
+ }
+ }
+ }
+ if (!next.size()) {
+ return -1;
+ }
+ states = next;
+ }
+ int accept = -1;
+ for (int id : states) {
+ if (fStates[id].fKind == NFAState::kAccept_Kind) {
+ int result = fStates[id].fData[0];
+ if (accept == -1 || result < accept) {
+ accept = result;
+ }
+ }
+ }
+ return accept;
+}
diff --git a/src/sksl/lex/NFA.h b/src/sksl/lex/NFA.h
new file mode 100644
index 0000000000..e454d37f2a
--- /dev/null
+++ b/src/sksl/lex/NFA.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_NFA
+#define SKSL_NFA
+
+#include "NFAState.h"
+#include "RegexNode.h"
+
+/**
+ * A nondeterministic finite automaton for matching regular expressions. The NFA is initialized with
+ * a number of regular expressions, and then matches a string against all of them simultaneously.
+ */
+struct NFA {
+ /**
+ * Adds a new regular expression to the set of expressions matched by this automaton, returning
+ * its index.
+ */
+ int addRegex(const RegexNode& regex) {
+ std::vector<int> accept;
+ // we reserve token 0 for END_OF_FILE, so this starts at 1
+ accept.push_back(this->addState(NFAState(++fRegexCount)));
+ std::vector<int> startStates = regex.createStates(this, accept);
+ fStartStates.insert(fStartStates.end(), startStates.begin(), startStates.end());
+ return fStartStates.size() - 1;
+ }
+
+ /**
+ * Adds a new state to the NFA, returning its index.
+ */
+ int addState(NFAState s) {
+ fStates.push_back(std::move(s));
+ return fStates.size() - 1;
+ }
+
+ /**
+ * Matches a string against all of the regexes added to this NFA. Returns the index of the first
+ * (in addRegex order) matching expression, or -1 if no match. This is relatively slow and used
+ * only for debugging purposes; the NFA should be converted to a DFA before actual use.
+ */
+ int match(std::string s) const;
+
+ int fRegexCount = 0;
+
+ std::vector<NFAState> fStates;
+
+ std::vector<int> fStartStates;
+};
+
+#endif
diff --git a/src/sksl/lex/NFAState.h b/src/sksl/lex/NFAState.h
new file mode 100644
index 0000000000..5923507571
--- /dev/null
+++ b/src/sksl/lex/NFAState.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_NFASTATE
+#define SKSL_NFASTATE
+
+#include <string>
+#include <vector>
+
+#include "LexUtil.h"
+
+struct NFAState {
+ enum Kind {
+ // represents an accept state - if the NFA ends up in this state, we have successfully
+ // matched the token indicated by fData[0]
+ kAccept_Kind,
+ // matches the single character fChar
+ kChar_Kind,
+ // the regex '.'; matches any char but '\n'
+ kDot_Kind,
+ // a state which serves as a placeholder for the states indicated in fData. When we
+ // transition to this state, we instead transition to all of the fData states.
+ kRemapped_Kind,
+ // contains a list of true/false values in fData. fData[c] tells us whether we accept the
+ // character c.
+ kTable_Kind
+ };
+
+ NFAState(Kind kind, std::vector<int> next)
+ : fKind(kind)
+ , fNext(std::move(next)) {}
+
+ NFAState(char c, std::vector<int> next)
+ : fKind(kChar_Kind)
+ , fChar(c)
+ , fNext(std::move(next)) {}
+
+ NFAState(std::vector<int> states)
+ : fKind(kRemapped_Kind)
+ , fData(std::move(states)) {}
+
+ NFAState(bool inverse, std::vector<bool> accepts, std::vector<int> next)
+ : fKind(kTable_Kind)
+ , fInverse(inverse)
+ , fNext(std::move(next)) {
+ for (bool b : accepts) {
+ fData.push_back(b);
+ }
+ }
+
+ NFAState(int token)
+ : fKind(kAccept_Kind) {
+ fData.push_back(token);
+ }
+
+ bool accept(char c) const {
+ switch (fKind) {
+ case kAccept_Kind:
+ return false;
+ case kChar_Kind:
+ return c == fChar;
+ case kDot_Kind:
+ return c != '\n';
+ case kTable_Kind: {
+ bool value;
+ if ((size_t) c < fData.size()) {
+ value = fData[c];
+ } else {
+ value = false;
+ }
+ return value != fInverse;
+ }
+ default:
+ ABORT("unreachable");
+ }
+ }
+
+ std::string description() const {
+ switch (fKind) {
+ case kAccept_Kind:
+ return "Accept(" + std::to_string(fData[0]) + ")";
+ case kChar_Kind: {
+ std::string result = "Char('" + std::string(1, fChar) + "'";
+ for (int v : fNext) {
+ result += ", ";
+ result += std::to_string(v);
+ }
+ result += ")";
+ return result;
+ }
+ case kDot_Kind: {
+ std::string result = "Dot(";
+ const char* separator = "";
+ for (int v : fNext) {
+ result += separator;
+ result += std::to_string(v);
+ separator = ", ";
+ }
+ result += ")";
+ return result;
+ }
+ case kRemapped_Kind: {
+ std::string result = "Remapped(";
+ const char* separator = "";
+ for (int v : fData) {
+ result += separator;
+ result += std::to_string(v);
+ separator = ", ";
+ }
+ result += ")";
+ return result;
+ }
+ case kTable_Kind: {
+ std::string result = std::string("Table(") + (fInverse ? "true" : "false") + ", [";
+ const char* separator = "";
+ for (int v : fData) {
+ result += separator;
+ result += v ? "true" : "false";
+ separator = ", ";
+ }
+ result += "]";
+ for (int n : fNext) {
+ result += ", ";
+ result += std::to_string(n);
+ }
+ result += ")";
+ return result;
+ }
+ default:
+ ABORT("unreachable");
+ }
+ }
+
+ Kind fKind;
+
+ char fChar = 0;
+
+ bool fInverse = false;
+
+ std::vector<int> fData;
+
+ // states we transition to upon a succesful match from this state
+ std::vector<int> fNext;
+};
+
+#endif
diff --git a/src/sksl/lex/NFAtoDFA.h b/src/sksl/lex/NFAtoDFA.h
new file mode 100644
index 0000000000..214c6d12d6
--- /dev/null
+++ b/src/sksl/lex/NFAtoDFA.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "DFA.h"
+#include "DFAState.h"
+#include "NFA.h"
+#include "NFAState.h"
+
+#include <algorithm>
+#include <climits>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+/**
+ * Converts a nondeterministic finite automaton to a deterministic finite automaton. Since NFAs and
+ * DFAs differ only in that an NFA allows multiple states at the same time, we can find each
+ * possible combination of simultaneous NFA states and give this combination a label. These labelled
+ * nodes are our DFA nodes, since we can only be in one such unique set of NFA states at a time.
+ *
+ * As an NFA can end up in multiple accept states at the same time (for instance, the token "while"
+ * is valid for both WHILE and IDENTIFIER), we disambiguate by preferring the first matching regex
+ * (in terms of the order in which they were added to the NFA).
+ */
+class NFAtoDFA {
+public:
+ NFAtoDFA(NFA* nfa)
+ : fNFA(*nfa) {}
+
+ /**
+ * Returns a DFA created from the NFA.
+ */
+ DFA convert() {
+ // create state 0, the "reject" state
+ getState(DFAState::Label({}));
+ // create a state representing being in all of the NFA's start states at once
+ std::vector<int> startStates = fNFA.fStartStates;
+ std::sort(startStates.begin(), startStates.end());
+ // this becomes state 1, our start state
+ DFAState* start = getState(DFAState::Label(startStates));
+ this->scanState(start);
+
+ int stateCount = 0;
+ for (const auto& row : fTransitions) {
+ stateCount = std::max(stateCount, (int) row.size());
+ }
+ return DFA(fTransitions, fAccepts);
+ }
+
+private:
+ /**
+ * Returns an existing state with the given label, or creates a new one and returns it.
+ */
+ DFAState* getState(DFAState::Label label) {
+ auto found = fStates.find(label);
+ if (found == fStates.end()) {
+ int id = fStates.size();
+ fStates[label] = std::unique_ptr<DFAState>(new DFAState(id, label));
+ return fStates[label].get();
+ }
+ return found->second.get();
+ }
+
+ void add(int nfaState, std::vector<int>* states) {
+ NFAState state = fNFA.fStates[nfaState];
+ if (state.fKind == NFAState::kRemapped_Kind) {
+ for (int next : state.fData) {
+ this->add(next, states);
+ }
+ } else {
+ for (int state : *states) {
+ if (nfaState == state) {
+ return;
+ }
+ }
+ states->push_back(nfaState);
+ }
+ }
+
+ void addTransition(char c, int start, int next) {
+ while (fTransitions.size() <= (size_t) c) {
+ fTransitions.push_back(std::vector<int>());
+ }
+ std::vector<int>& row = fTransitions[c];
+ while (row.size() <= (size_t) start) {
+ row.push_back(-1);
+ }
+ row[start] = next;
+ }
+
+ void scanState(DFAState* state) {
+ state->fIsScanned = true;
+ for (char c = 9; c <= DFA::END_CHAR; ++c) {
+ std::vector<int> next;
+ int bestAccept = INT_MAX;
+ for (int idx : state->fLabel.fStates) {
+ const NFAState& nfaState = fNFA.fStates[idx];
+ if (nfaState.accept(c)) {
+ for (int nextState : nfaState.fNext) {
+ if (fNFA.fStates[nextState].fKind == NFAState::kAccept_Kind) {
+ bestAccept = std::min(bestAccept, fNFA.fStates[nextState].fData[0]);
+ }
+ this->add(nextState, &next);
+ }
+ }
+ }
+ std::sort(next.begin(), next.end());
+ DFAState* nextState = this->getState(DFAState::Label(next));
+ this->addTransition(c, state->fId, nextState->fId);
+ if (bestAccept != INT_MAX) {
+ while (fAccepts.size() <= (size_t) nextState->fId) {
+ fAccepts.push_back(-1);
+ }
+ fAccepts[nextState->fId] = bestAccept;
+ }
+ if (!nextState->fIsScanned) {
+ this->scanState(nextState);
+ }
+ }
+ }
+
+ const NFA& fNFA;
+ std::unordered_map<DFAState::Label, std::unique_ptr<DFAState>> fStates;
+ std::vector<std::vector<int>> fTransitions;
+ std::vector<int> fAccepts;
+};
diff --git a/src/sksl/lex/RegexNode.cpp b/src/sksl/lex/RegexNode.cpp
new file mode 100644
index 0000000000..8915c60c44
--- /dev/null
+++ b/src/sksl/lex/RegexNode.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "RegexNode.h"
+
+#include "NFA.h"
+
+std::vector<int> RegexNode::createStates(NFA* nfa, const std::vector<int>& accept) const {
+ std::vector<int> result;
+ switch (fKind) {
+ case kChar_Kind:
+ result.push_back(nfa->addState(NFAState(fPayload.fChar, accept)));
+ break;
+ case kCharset_Kind: {
+ std::vector<bool> chars;
+ for (const RegexNode& child : fChildren) {
+ if (child.fKind == kChar_Kind) {
+ while (chars.size() <= (size_t) child.fPayload.fChar) {
+ chars.push_back(false);
+ }
+ chars[child.fPayload.fChar] = true;
+ } else {
+ ASSERT(child.fKind == kRange_Kind);
+ while (chars.size() <= (size_t) child.fChildren[1].fPayload.fChar) {
+ chars.push_back(false);
+ }
+ for (char c = child.fChildren[0].fPayload.fChar;
+ c <= child.fChildren[1].fPayload.fChar;
+ ++c) {
+ chars[c] = true;
+ }
+ }
+ }
+ result.push_back(nfa->addState(NFAState(fPayload.fBool, chars, accept)));
+ break;
+ }
+ case kConcat_Kind: {
+ std::vector<int> right = fChildren[1].createStates(nfa, accept);
+ result = fChildren[0].createStates(nfa, right);
+ break;
+ }
+ case kDot_Kind:
+ result.push_back(nfa->addState(NFAState(NFAState::kDot_Kind, accept)));
+ break;
+ case kOr_Kind: {
+ std::vector<int> states = fChildren[0].createStates(nfa, accept);
+ result.insert(result.end(), states.begin(), states.end());
+ states = fChildren[1].createStates(nfa, accept);
+ result.insert(result.end(), states.begin(), states.end());
+ break;
+ }
+ case kPlus_Kind: {
+ std::vector<int> next = accept;
+ std::vector<int> placeholder;
+ int id = nfa->addState(NFAState(placeholder));
+ next.push_back(id);
+ result = fChildren[0].createStates(nfa, next);
+ nfa->fStates[id] = NFAState(result);
+ break;
+ }
+ case kQuestion_Kind:
+ result = fChildren[0].createStates(nfa, accept);
+ result.insert(result.end(), accept.begin(), accept.end());
+ break;
+ case kRange_Kind:
+ ABORT("unreachable");
+ case kStar_Kind: {
+ std::vector<int> next = accept;
+ std::vector<int> placeholder;
+ int id = nfa->addState(NFAState(placeholder));
+ next.push_back(id);
+ result = fChildren[0].createStates(nfa, next);
+ result.insert(result.end(), accept.begin(), accept.end());
+ nfa->fStates[id] = NFAState(result);
+ break;
+ }
+ }
+ return result;
+}
+
+std::string RegexNode::description() const {
+ switch (fKind) {
+ case kChar_Kind:
+ return std::string(1, fPayload.fChar);
+ case kCharset_Kind: {
+ std::string result("[");
+ if (fPayload.fBool) {
+ result += "^";
+ }
+ for (const RegexNode& c : fChildren) {
+ result += c.description();
+ }
+ result += "]";
+ return result;
+ }
+ case kConcat_Kind:
+ return fChildren[0].description() + fChildren[1].description();
+ case kDot_Kind:
+ return ".";
+ case kOr_Kind:
+ return "(" + fChildren[0].description() + "|" + fChildren[1].description() + ")";
+ case kPlus_Kind:
+ return fChildren[0].description() + "+";
+ case kQuestion_Kind:
+ return fChildren[0].description() + "?";
+ case kRange_Kind:
+ return fChildren[0].description() + "-" + fChildren[1].description();
+ case kStar_Kind:
+ return fChildren[0].description() + "*";
+ default:
+ return "<" + std::to_string(fKind) + ">";
+ }
+}
diff --git a/src/sksl/lex/RegexNode.h b/src/sksl/lex/RegexNode.h
new file mode 100644
index 0000000000..e3aa65b3d1
--- /dev/null
+++ b/src/sksl/lex/RegexNode.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_REGEXNODE
+#define SKSL_REGEXNODE
+
+#include <string>
+#include <vector>
+
+struct NFA;
+
+/**
+ * Represents a node in the parse tree of a regular expression.
+ */
+struct RegexNode {
+ enum Kind {
+ kChar_Kind,
+ kCharset_Kind,
+ kConcat_Kind,
+ kDot_Kind,
+ kOr_Kind,
+ kPlus_Kind,
+ kRange_Kind,
+ kQuestion_Kind,
+ kStar_Kind
+ };
+
+ RegexNode(Kind kind)
+ : fKind(kind) {}
+
+ RegexNode(Kind kind, char payload)
+ : fKind(kind) {
+ fPayload.fChar = payload;
+ }
+
+ RegexNode(Kind kind, const char* children)
+ : fKind(kind) {
+ fPayload.fBool = false;
+ while (*children != '\0') {
+ fChildren.emplace_back(kChar_Kind, *children);
+ ++children;
+ }
+ }
+
+ RegexNode(Kind kind, RegexNode child)
+ : fKind(kind) {
+ fChildren.push_back(std::move(child));
+ }
+
+ RegexNode(Kind kind, RegexNode child1, RegexNode child2)
+ : fKind(kind) {
+ fChildren.push_back(std::move(child1));
+ fChildren.push_back(std::move(child2));
+ }
+
+ /**
+ * Creates NFA states for this node, with a successful match against this node resulting in a
+ * transition to all of the states in the accept vector.
+ */
+ std::vector<int> createStates(NFA* nfa, const std::vector<int>& accept) const;
+
+ std::string description() const;
+
+ Kind fKind;
+
+ union Payload {
+ char fChar;
+ bool fBool;
+ } fPayload;
+
+ std::vector<RegexNode> fChildren;
+};
+
+#endif
diff --git a/src/sksl/lex/RegexParser.cpp b/src/sksl/lex/RegexParser.cpp
new file mode 100644
index 0000000000..154df159db
--- /dev/null
+++ b/src/sksl/lex/RegexParser.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "RegexParser.h"
+
+#include "LexUtil.h"
+
+RegexNode RegexParser::parse(std::string source) {
+ fSource = source;
+ fIndex = 0;
+ ASSERT(fStack.size() == 0);
+ this->regex();
+ ASSERT(fStack.size() == 1);
+ ASSERT(fIndex == source.size());
+ return this->pop();
+}
+
+char RegexParser::peek() {
+ if (fIndex >= fSource.size()) {
+ return END;
+ }
+ return fSource[fIndex];
+}
+
+void RegexParser::expect(char c) {
+ if (this->peek() != c) {
+ printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
+ exit(1);
+ }
+ ++fIndex;
+}
+
+RegexNode RegexParser::pop() {
+ RegexNode result = fStack.top();
+ fStack.pop();
+ return result;
+}
+
+void RegexParser::term() {
+ switch (this->peek()) {
+ case '(': this->group(); break;
+ case '[': this->set(); break;
+ case '.': this->dot(); break;
+ default: this->literal();
+ }
+}
+
+void RegexParser::quantifiedTerm() {
+ this->term();
+ switch (this->peek()) {
+ case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break;
+ case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break;
+ case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
+ default: break;
+ }
+}
+
+void RegexParser::sequence() {
+ this->quantifiedTerm();
+ for (;;) {
+ switch (this->peek()) {
+ case END: // fall through
+ case '|': // fall through
+ case ')': return;
+ default:
+ this->sequence();
+ RegexNode right = this->pop();
+ RegexNode left = this->pop();
+ fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
+ }
+ }
+}
+
+RegexNode RegexParser::escapeSequence(char c) {
+ switch (c) {
+ case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
+ case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
+ case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
+ case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
+ default: return RegexNode(RegexNode::kChar_Kind, c);
+ }
+}
+
+void RegexParser::literal() {
+ char c = this->peek();
+ if (c == '\\') {
+ ++fIndex;
+ fStack.push(this->escapeSequence(peek()));
+ ++fIndex;
+ }
+ else {
+ fStack.push(RegexNode(RegexNode::kChar_Kind, c));
+ ++fIndex;
+ }
+}
+
+void RegexParser::dot() {
+ this->expect('.');
+ fStack.push(RegexNode(RegexNode::kDot_Kind));
+}
+
+void RegexParser::group() {
+ this->expect('(');
+ this->regex();
+ this->expect(')');
+}
+
+void RegexParser::setItem() {
+ this->literal();
+ if (this->peek() == '-') {
+ ++fIndex;
+ if (peek() == ']') {
+ fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
+ }
+ else {
+ literal();
+ RegexNode end = this->pop();
+ ASSERT(end.fKind == RegexNode::kChar_Kind);
+ RegexNode start = this->pop();
+ ASSERT(start.fKind == RegexNode::kChar_Kind);
+ fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
+ }
+ }
+}
+
+void RegexParser::set() {
+ expect('[');
+ size_t depth = fStack.size();
+ RegexNode set(RegexNode::kCharset_Kind);
+ if (this->peek() == '^') {
+ ++fIndex;
+ set.fPayload.fBool = true;
+ }
+ else {
+ set.fPayload.fBool = false;
+ }
+ for (;;) {
+ switch (this->peek()) {
+ case ']':
+ ++fIndex;
+ while (fStack.size() > depth) {
+ set.fChildren.push_back(this->pop());
+ }
+ fStack.push(std::move(set));
+ return;
+ case END:
+ printf("unterminated character set\n");
+ exit(1);
+ default:
+ this->setItem();
+ }
+ }
+}
+
+void RegexParser::regex() {
+ this->sequence();
+ switch (this->peek()) {
+ case '|': {
+ ++fIndex;
+ this->regex();
+ RegexNode right = this->pop();
+ RegexNode left = this->pop();
+ fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
+ break;
+ }
+ case END: // fall through
+ case ')':
+ return;
+ default:
+ ASSERT(false);
+ }
+}
diff --git a/src/sksl/lex/RegexParser.h b/src/sksl/lex/RegexParser.h
new file mode 100644
index 0000000000..7de546fc17
--- /dev/null
+++ b/src/sksl/lex/RegexParser.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKSL_REGEXPARSER
+#define SKSL_REGEXPARSER
+
+#include "RegexNode.h"
+
+#include <stack>
+#include <string>
+
+/**
+ * Turns a simple regular expression into a parse tree. The regular expression syntax supports only
+ * the basic quantifiers ('*', '+', and '?'), alternation ('|'), character sets ('[a-z]'), and
+ * groups ('()').
+ */
+class RegexParser {
+public:
+ RegexNode parse(std::string source);
+
+private:
+ static constexpr char END = '\0';
+
+ char peek();
+
+ void expect(char c);
+
+ RegexNode pop();
+
+ /**
+ * Matches a char literal, parenthesized group, character set, or dot ('.').
+ */
+ void term();
+
+ /**
+ * Matches a term followed by an optional quantifier ('*', '+', or '?').
+ */
+ void quantifiedTerm();
+
+ /**
+ * Matches a sequence of quantifiedTerms.
+ */
+ void sequence();
+
+ /**
+ * Returns a node representing the given escape character (e.g. escapeSequence('n') returns a
+ * node which matches a newline character).
+ */
+ RegexNode escapeSequence(char c);
+
+ /**
+ * Matches a literal character or escape sequence.
+ */
+ void literal();
+
+ /**
+ * Matches a dot ('.').
+ */
+ void dot();
+
+ /**
+ * Matches a parenthesized group.
+ */
+ void group();
+
+ /**
+ * Matches a literal character, escape sequence, or character range from a character set.
+ */
+ void setItem();
+
+ /**
+ * Matches a character set.
+ */
+ void set();
+
+ void regex();
+
+ std::string fSource;
+
+ size_t fIndex;
+
+ std::stack<RegexNode> fStack;
+};
+
+#endif
diff --git a/src/sksl/lex/sksl.lex b/src/sksl/lex/sksl.lex
new file mode 100644
index 0000000000..ada5e248de
--- /dev/null
+++ b/src/sksl/lex/sksl.lex
@@ -0,0 +1,95 @@
+FLOAT_LITERAL = [0-9]*\.[0-9]+([eE][+-]?[0-9]+)?|[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?|[0-9]+([eE][+-]?[0-9]+)
+INT_LITERAL = [0-9]+|0x[0-9a-fA-F]+
+TRUE_LITERAL = "true"
+FALSE_LITERAL = "false"
+IF = "if"
+STATIC_IF = "@if"
+ELSE = "else"
+FOR = "for"
+WHILE = "while"
+DO = "do"
+SWITCH = "switch"
+STATIC_SWITCH = "@switch"
+CASE = "case"
+DEFAULT = "default"
+BREAK = "break"
+CONTINUE = "continue"
+DISCARD = "discard"
+RETURN = "return"
+IN = "in"
+OUT = "out"
+INOUT = "inout"
+UNIFORM = "uniform"
+CONST = "const"
+LOWP = "lowp"
+MEDIUMP = "mediump"
+HIGHP = "highp"
+FLAT = "flat"
+NOPERSPECTIVE = "noperspective"
+READONLY = "readonly"
+WRITEONLY = "writeonly"
+COHERENT = "coherent"
+VOLATILE = "volatile"
+RESTRICT = "restrict"
+BUFFER = "buffer"
+HASSIDEEFFECTS = "sk_has_side_effects"
+STRUCT = "struct"
+LAYOUT = "layout"
+PRECISION = "precision"
+IDENTIFIER = [a-zA-Z_$]([0-9]|[a-zA-Z_$])*
+DIRECTIVE = #[a-zA-Z_$]([0-9]|[a-zA-Z_$])*
+SECTION = @[a-zA-Z_$]([0-9]|[a-zA-Z_$])*
+LPAREN = "("
+RPAREN = ")"
+LBRACE = "{"
+RBRACE = "}"
+LBRACKET = "["
+RBRACKET = "]"
+DOT = "."
+COMMA = ","
+PLUSPLUS = "++"
+MINUSMINUS = "--"
+PLUS = "+"
+MINUS = "-"
+STAR = "*"
+SLASH = "/"
+PERCENT = "%"
+SHL = "<<"
+SHR = ">>"
+BITWISEOR = "|"
+BITWISEXOR = "^"
+BITWISEAND = "&"
+BITWISENOT = "~"
+LOGICALOR = "||"
+LOGICALXOR = "^^"
+LOGICALAND = "&&"
+LOGICALNOT = "!"
+QUESTION = "?"
+COLON = ":"
+EQ = "="
+EQEQ = "=="
+NEQ = "!="
+GT = ">"
+LT = "<"
+GTEQ = ">="
+LTEQ = "<="
+PLUSEQ = "+="
+MINUSEQ = "-="
+STAREQ = "*="
+SLASHEQ = "/="
+PERCENTEQ = "%="
+SHLEQ = "<<="
+SHREQ = ">>="
+BITWISEOREQ = "|="
+BITWISEXOREQ = "^="
+BITWISEANDEQ = "&="
+LOGICALOREQ = "||="
+LOGICALXOREQ = "^^="
+LOGICALANDEQ = "&&="
+SEMICOLON = ";"
+ARROW = "->"
+COLONCOLON = "::"
+WHITESPACE = \s+
+LINE_COMMENT = //.*
+BLOCK_COMMENT = /\*([^*]|\*[^/])*\*/
+INVALID = . \ No newline at end of file