diff options
author | Ethan Nicholas <ethannicholas@google.com> | 2017-09-06 23:15:27 +0000 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2017-09-06 23:15:39 +0000 |
commit | 3ed4781ee1bd5af9b3ee2623e5356e86ce36e812 (patch) | |
tree | b2f7fe892b4cc066d3e1fff8ffa87f61a8e4456b /src | |
parent | 276066b517b116afdd23bdbd72902469de2ceb01 (diff) |
Revert "Initial checkin of SkSL lexical analyzer generator (not actually in use yet)."
This reverts commit 3de0e496b5215bcf4b26db2d69dc84376dfd68d2.
Reason for revert: Google3 roll failure
Original change's description:
> Initial checkin of SkSL lexical analyzer generator (not actually in use yet).
>
> Bug: skia:
> Change-Id: I283feb9ac1cef134588204107ea019ce657e37b3
> Reviewed-on: https://skia-review.googlesource.com/42084
> Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
> Reviewed-by: Brian Salomon <bsalomon@google.com>
TBR=bsalomon@google.com,ethannicholas@google.com
Change-Id: Ifa9a054aba9baa1e0d17309a2e1838b5d3b0bfec
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: skia:
Reviewed-on: https://skia-review.googlesource.com/43340
Reviewed-by: Ethan Nicholas <ethannicholas@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/sksl/lex/DFA.h | 32 | ||||
-rw-r--r-- | src/sksl/lex/DFAState.h | 68 | ||||
-rw-r--r-- | src/sksl/lex/LexUtil.h | 14 | ||||
-rw-r--r-- | src/sksl/lex/Main.cpp | 197 | ||||
-rw-r--r-- | src/sksl/lex/NFA.cpp | 41 | ||||
-rw-r--r-- | src/sksl/lex/NFA.h | 54 | ||||
-rw-r--r-- | src/sksl/lex/NFAState.h | 150 | ||||
-rw-r--r-- | src/sksl/lex/NFAtoDFA.h | 130 | ||||
-rw-r--r-- | src/sksl/lex/RegexNode.cpp | 117 | ||||
-rw-r--r-- | src/sksl/lex/RegexNode.h | 78 | ||||
-rw-r--r-- | src/sksl/lex/RegexParser.cpp | 176 | ||||
-rw-r--r-- | src/sksl/lex/RegexParser.h | 89 | ||||
-rw-r--r-- | src/sksl/lex/sksl.lex | 95 |
13 files changed, 0 insertions, 1241 deletions
diff --git a/src/sksl/lex/DFA.h b/src/sksl/lex/DFA.h deleted file mode 100644 index 51e85ca2f8..0000000000 --- a/src/sksl/lex/DFA.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SKSL_DFA -#define SKSL_DFA - -#include <string> -#include <vector> - -/** - * Tables representing a deterministic finite automaton for matching regular expressions. - */ -struct DFA { - DFA(std::vector<std::vector<int>> transitions, std::vector<int> accepts) - : fTransitions(transitions) - , fAccepts(accepts) {} - - static constexpr char END_CHAR = 126; - - // one row per character, one column per state - std::vector<std::vector<int>> fTransitions; - - // contains, for each state, the token id we should report when matching ends in that state (-1 - // for no match) - std::vector<int> fAccepts; -}; - -#endif diff --git a/src/sksl/lex/DFAState.h b/src/sksl/lex/DFAState.h deleted file mode 100644 index b2c89fd4cb..0000000000 --- a/src/sksl/lex/DFAState.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SKSL_DFASTATE -#define SKSL_DFASTATE - -struct DFAState { - struct Label { - std::vector<int> fStates; - - Label(std::vector<int> states) - : fStates(std::move(states)) {} - - bool operator==(const Label& other) const { - return fStates == other.fStates; - } - - bool operator!=(const Label& other) const { - return !(*this == other); - } - - std::string description() const { - std::string result = "<"; - const char* separator = ""; - for (int s : fStates) { - result += separator; - result += std::to_string(s); - separator = ", "; - } - result += ">"; - return result; - } - }; - - DFAState() - : fId(-1) - , fLabel({}) {} - - DFAState(int id, Label label) - : fId(id) - , fLabel(std::move(label)) {} - - DFAState(const DFAState& other) = delete; - - int fId; - - Label fLabel; - - bool fIsScanned = false; -}; - -namespace std { - template<> struct hash<DFAState::Label> { - size_t operator()(const DFAState::Label& s) const { - int result = 0; - for (int i : s.fStates) { - result = result * 101 + i; - } - return result; - } - }; -} // namespace - -#endif diff --git a/src/sksl/lex/LexUtil.h b/src/sksl/lex/LexUtil.h deleted file mode 100644 index d2c462b0ed..0000000000 --- a/src/sksl/lex/LexUtil.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SKSL_LEXUTIL -#define SKSL_LEXUTIL - -#define ABORT(...) (fprintf(stderr, __VA_ARGS__), abort()) -#define ASSERT(x) (void)((x) || (ABORT("failed assert(%s): %s:%d\n", #x, __FILE__, __LINE__), 0)) - -#endif diff --git a/src/sksl/lex/Main.cpp b/src/sksl/lex/Main.cpp deleted file mode 100644 index a5c7320356..0000000000 --- a/src/sksl/lex/Main.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "NFAtoDFA.h" -#include "RegexParser.h" - -#include <fstream> -#include <sstream> -#include <string> - -/** - * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex - * file is a text file with one token definition per line. Each line is of the form: - * <TOKEN_NAME> = <pattern> - * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string. - */ - -static constexpr const char* HEADER = - "/*\n" - " * Copyright 2017 Google Inc.\n" - " *\n" - " * Use of this source code is governed by a BSD-style license that can be\n" - " * found in the LICENSE file.\n" - " */\n" - "/*****************************************************************************************\n" - " ******************** This file was generated by sksllex. Do not edit. *******************\n" - " *****************************************************************************************/\n"; - -void writeH(const DFA& dfa, const char* lexer, const char* token, - const std::vector<std::string>& tokens, const char* hPath) { - std::ofstream out(hPath); - ASSERT(out.good()); - out << HEADER; - out << "#ifndef SKSL_" << lexer << "\n"; - out << "#define SKSL_" << lexer << "\n"; - out << "#include <cstddef>\n"; - out << "#include <cstdint>\n"; - out << "namespace SkSL {\n"; - out << "\n"; - out << "struct " << token << " {\n"; - out << " enum Kind {\n"; - for (const std::string& t : tokens) { - out << " #undef " << t << "\n"; - out << " " << t << ",\n"; - } - out << " };\n"; - out << "\n"; - out << " " << token << "()\n"; - out << " : fKind(Kind::INVALID)\n"; - out << " , fOffset(-1)\n"; - out << " , fLength(-1) {}\n"; - out << "\n"; - out << " " << token << "(Kind kind, int offset, int length)\n"; - out << " : fKind(kind)\n"; - out << " , fOffset(offset)\n"; - out << " , fLength(length) {}\n"; - out << "\n"; - out << " Kind fKind;\n"; - out << " int fOffset;\n"; - out << " int fLength;\n"; - out << "};\n"; - out << "\n"; - out << "class " << lexer << " {\n"; - out << "public:\n"; - out << " void start(const char* text, size_t length) {\n"; - out << " fText = text;\n"; - out << " fLength = length;\n"; - out << " fOffset = 0;\n"; - out << " }\n"; - out << "\n"; - out << " " << token << " next();\n"; - out << "\n"; - out << "private:\n"; - out << " const char* fText;\n"; - out << " int fLength;\n"; - out << " int fOffset;\n"; - out << "};\n"; - out << "\n"; - out << "} // namespace\n"; - out << "#endif\n"; -} - -void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include, - const char* cppPath) { - std::ofstream out(cppPath); - ASSERT(out.good()); - out << HEADER; - out << "#include \"" << include << "\"\n"; - out << "\n"; - out << "namespace SkSL {\n"; - out << "\n"; - - size_t states = 0; - for (const auto& row : dfa.fTransitions) { - states = std::max(states, row.size()); - } - out << "static int16_t transitions[" << DFA::END_CHAR + 1 << "][" << states << "] = {\n"; - for (char c = 0; c <= DFA::END_CHAR; ++c) { - out << " {"; - for (size_t i = 0; i < states; ++i) { - if ((size_t) c < dfa.fTransitions.size() && i < dfa.fTransitions[c].size()) { - out << " " << dfa.fTransitions[c][i] << ","; - } else { - out << " 0,"; - } - } - out << " },\n"; - } - out << "};\n"; - out << "\n"; - - out << "static int16_t accepts[" << states << "] = {"; - for (size_t i = 0; i < states; ++i) { - if (i < dfa.fAccepts.size()) { - out << " " << dfa.fAccepts[i] << ","; - } else { - out << " -1,"; - } - } - out << " };\n"; - out << "\n"; - - out << token << " " << lexer << "::next() {\n";; - out << " int startOffset = fOffset;\n"; - out << " if (startOffset == fLength) {\n"; - out << " return " << token << "(" << token << "::END_OF_FILE, startOffset, 0);\n"; - out << " }\n"; - out << " int offset = startOffset;\n"; - out << " int state = 1;\n"; - out << " " << token << "::Kind lastAccept = " << token << "::Kind::INVALID;\n"; - out << " int lastAcceptEnd = startOffset + 1;\n"; - out << " while (offset < fLength) {\n"; - out << " state = transitions[(int) fText[offset]][state];\n"; - out << " ++offset;\n"; - out << " if (!state) {\n"; - out << " break;\n"; - out << " }\n"; - out << " if (accepts[state]) {\n"; - out << " lastAccept = (" << token << "::Kind) accepts[state];\n"; - out << " lastAcceptEnd = offset;\n"; - out << " }\n"; - out << " }\n"; - out << " fOffset = lastAcceptEnd;\n"; - out << " return " << token << "(lastAccept, startOffset, lastAcceptEnd - startOffset);\n"; - out << "}\n"; - out << "\n"; - out << "} // namespace\n"; -} - -void process(const char* inPath, const char* lexer, const char* token, const char* hPath, - const char* cppPath) { - NFA nfa; - std::vector<std::string> tokens; - tokens.push_back("END_OF_FILE"); - std::string line; - std::ifstream in(inPath); - while (std::getline(in, line)) { - std::istringstream split(line); - std::string name, delimiter, pattern; - if (split >> name >> delimiter >> pattern) { - ASSERT(split.eof()); - ASSERT(name != ""); - ASSERT(delimiter == "="); - ASSERT(pattern != ""); - tokens.push_back(name); - if (pattern[0] == '"') { - ASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"'); - RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]); - for (size_t i = 2; i < pattern.size() - 1; ++i) { - node = RegexNode(RegexNode::kConcat_Kind, node, - RegexNode(RegexNode::kChar_Kind, pattern[i])); - } - nfa.addRegex(node); - } - else { - nfa.addRegex(RegexParser().parse(pattern)); - } - } - } - NFAtoDFA converter(&nfa); - DFA dfa = converter.convert(); - writeH(dfa, lexer, token, tokens, hPath); - writeCPP(dfa, lexer, token, (std::string("SkSL") + lexer + ".h").c_str(), cppPath); -} - -int main(int argc, const char** argv) { - if (argc != 6) { - printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n"); - exit(1); - } - process(argv[1], argv[2], argv[3], argv[4], argv[5]); - return 0; -} diff --git a/src/sksl/lex/NFA.cpp b/src/sksl/lex/NFA.cpp deleted file mode 100644 index d1eb7497b3..0000000000 --- a/src/sksl/lex/NFA.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "NFA.h" - -int NFA::match(std::string s) const { - std::vector<int> states = fStartStates; - for (size_t i = 0; i < s.size(); ++i) { - std::vector<int> next; - for (int id : states) { - if (fStates[id].accept(s[i])) { - for (int nextId : fStates[id].fNext) { - if (fStates[nextId].fKind != NFAState::kRemapped_Kind) { - next.push_back(nextId); - } else { - next.insert(next.end(), fStates[nextId].fData.begin(), - fStates[nextId].fData.end()); - } - } - } - } - if (!next.size()) { - return -1; - } - states = next; - } - int accept = -1; - for (int id : states) { - if (fStates[id].fKind == NFAState::kAccept_Kind) { - int result = fStates[id].fData[0]; - if (accept == -1 || result < accept) { - accept = result; - } - } - } - return accept; -} diff --git a/src/sksl/lex/NFA.h b/src/sksl/lex/NFA.h deleted file mode 100644 index e454d37f2a..0000000000 --- a/src/sksl/lex/NFA.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SKSL_NFA -#define SKSL_NFA - -#include "NFAState.h" -#include "RegexNode.h" - -/** - * A nondeterministic finite automaton for matching regular expressions. The NFA is initialized with - * a number of regular expressions, and then matches a string against all of them simultaneously. - */ -struct NFA { - /** - * Adds a new regular expression to the set of expressions matched by this automaton, returning - * its index. - */ - int addRegex(const RegexNode& regex) { - std::vector<int> accept; - // we reserve token 0 for END_OF_FILE, so this starts at 1 - accept.push_back(this->addState(NFAState(++fRegexCount))); - std::vector<int> startStates = regex.createStates(this, accept); - fStartStates.insert(fStartStates.end(), startStates.begin(), startStates.end()); - return fStartStates.size() - 1; - } - - /** - * Adds a new state to the NFA, returning its index. - */ - int addState(NFAState s) { - fStates.push_back(std::move(s)); - return fStates.size() - 1; - } - - /** - * Matches a string against all of the regexes added to this NFA. Returns the index of the first - * (in addRegex order) matching expression, or -1 if no match. This is relatively slow and used - * only for debugging purposes; the NFA should be converted to a DFA before actual use. - */ - int match(std::string s) const; - - int fRegexCount = 0; - - std::vector<NFAState> fStates; - - std::vector<int> fStartStates; -}; - -#endif diff --git a/src/sksl/lex/NFAState.h b/src/sksl/lex/NFAState.h deleted file mode 100644 index 5923507571..0000000000 --- a/src/sksl/lex/NFAState.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SKSL_NFASTATE -#define SKSL_NFASTATE - -#include <string> -#include <vector> - -#include "LexUtil.h" - -struct NFAState { - enum Kind { - // represents an accept state - if the NFA ends up in this state, we have successfully - // matched the token indicated by fData[0] - kAccept_Kind, - // matches the single character fChar - kChar_Kind, - // the regex '.'; matches any char but '\n' - kDot_Kind, - // a state which serves as a placeholder for the states indicated in fData. When we - // transition to this state, we instead transition to all of the fData states. - kRemapped_Kind, - // contains a list of true/false values in fData. fData[c] tells us whether we accept the - // character c. - kTable_Kind - }; - - NFAState(Kind kind, std::vector<int> next) - : fKind(kind) - , fNext(std::move(next)) {} - - NFAState(char c, std::vector<int> next) - : fKind(kChar_Kind) - , fChar(c) - , fNext(std::move(next)) {} - - NFAState(std::vector<int> states) - : fKind(kRemapped_Kind) - , fData(std::move(states)) {} - - NFAState(bool inverse, std::vector<bool> accepts, std::vector<int> next) - : fKind(kTable_Kind) - , fInverse(inverse) - , fNext(std::move(next)) { - for (bool b : accepts) { - fData.push_back(b); - } - } - - NFAState(int token) - : fKind(kAccept_Kind) { - fData.push_back(token); - } - - bool accept(char c) const { - switch (fKind) { - case kAccept_Kind: - return false; - case kChar_Kind: - return c == fChar; - case kDot_Kind: - return c != '\n'; - case kTable_Kind: { - bool value; - if ((size_t) c < fData.size()) { - value = fData[c]; - } else { - value = false; - } - return value != fInverse; - } - default: - ABORT("unreachable"); - } - } - - std::string description() const { - switch (fKind) { - case kAccept_Kind: - return "Accept(" + std::to_string(fData[0]) + ")"; - case kChar_Kind: { - std::string result = "Char('" + std::string(1, fChar) + "'"; - for (int v : fNext) { - result += ", "; - result += std::to_string(v); - } - result += ")"; - return result; - } - case kDot_Kind: { - std::string result = "Dot("; - const char* separator = ""; - for (int v : fNext) { - result += separator; - result += std::to_string(v); - separator = ", "; - } - result += ")"; - return result; - } - case kRemapped_Kind: { - std::string result = "Remapped("; - const char* separator = ""; - for (int v : fData) { - result += separator; - result += std::to_string(v); - separator = ", "; - } - result += ")"; - return result; - } - case kTable_Kind: { - std::string result = std::string("Table(") + (fInverse ? "true" : "false") + ", ["; - const char* separator = ""; - for (int v : fData) { - result += separator; - result += v ? "true" : "false"; - separator = ", "; - } - result += "]"; - for (int n : fNext) { - result += ", "; - result += std::to_string(n); - } - result += ")"; - return result; - } - default: - ABORT("unreachable"); - } - } - - Kind fKind; - - char fChar = 0; - - bool fInverse = false; - - std::vector<int> fData; - - // states we transition to upon a succesful match from this state - std::vector<int> fNext; -}; - -#endif diff --git a/src/sksl/lex/NFAtoDFA.h b/src/sksl/lex/NFAtoDFA.h deleted file mode 100644 index 214c6d12d6..0000000000 --- a/src/sksl/lex/NFAtoDFA.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "DFA.h" -#include "DFAState.h" -#include "NFA.h" -#include "NFAState.h" - -#include <algorithm> -#include <climits> -#include <memory> -#include <unordered_map> -#include <vector> - -/** - * Converts a nondeterministic finite automaton to a deterministic finite automaton. Since NFAs and - * DFAs differ only in that an NFA allows multiple states at the same time, we can find each - * possible combination of simultaneous NFA states and give this combination a label. These labelled - * nodes are our DFA nodes, since we can only be in one such unique set of NFA states at a time. - * - * As an NFA can end up in multiple accept states at the same time (for instance, the token "while" - * is valid for both WHILE and IDENTIFIER), we disambiguate by preferring the first matching regex - * (in terms of the order in which they were added to the NFA). - */ -class NFAtoDFA { -public: - NFAtoDFA(NFA* nfa) - : fNFA(*nfa) {} - - /** - * Returns a DFA created from the NFA. - */ - DFA convert() { - // create state 0, the "reject" state - getState(DFAState::Label({})); - // create a state representing being in all of the NFA's start states at once - std::vector<int> startStates = fNFA.fStartStates; - std::sort(startStates.begin(), startStates.end()); - // this becomes state 1, our start state - DFAState* start = getState(DFAState::Label(startStates)); - this->scanState(start); - - int stateCount = 0; - for (const auto& row : fTransitions) { - stateCount = std::max(stateCount, (int) row.size()); - } - return DFA(fTransitions, fAccepts); - } - -private: - /** - * Returns an existing state with the given label, or creates a new one and returns it. - */ - DFAState* getState(DFAState::Label label) { - auto found = fStates.find(label); - if (found == fStates.end()) { - int id = fStates.size(); - fStates[label] = std::unique_ptr<DFAState>(new DFAState(id, label)); - return fStates[label].get(); - } - return found->second.get(); - } - - void add(int nfaState, std::vector<int>* states) { - NFAState state = fNFA.fStates[nfaState]; - if (state.fKind == NFAState::kRemapped_Kind) { - for (int next : state.fData) { - this->add(next, states); - } - } else { - for (int state : *states) { - if (nfaState == state) { - return; - } - } - states->push_back(nfaState); - } - } - - void addTransition(char c, int start, int next) { - while (fTransitions.size() <= (size_t) c) { - fTransitions.push_back(std::vector<int>()); - } - std::vector<int>& row = fTransitions[c]; - while (row.size() <= (size_t) start) { - row.push_back(-1); - } - row[start] = next; - } - - void scanState(DFAState* state) { - state->fIsScanned = true; - for (char c = 9; c <= DFA::END_CHAR; ++c) { - std::vector<int> next; - int bestAccept = INT_MAX; - for (int idx : state->fLabel.fStates) { - const NFAState& nfaState = fNFA.fStates[idx]; - if (nfaState.accept(c)) { - for (int nextState : nfaState.fNext) { - if (fNFA.fStates[nextState].fKind == NFAState::kAccept_Kind) { - bestAccept = std::min(bestAccept, fNFA.fStates[nextState].fData[0]); - } - this->add(nextState, &next); - } - } - } - std::sort(next.begin(), next.end()); - DFAState* nextState = this->getState(DFAState::Label(next)); - this->addTransition(c, state->fId, nextState->fId); - if (bestAccept != INT_MAX) { - while (fAccepts.size() <= (size_t) nextState->fId) { - fAccepts.push_back(-1); - } - fAccepts[nextState->fId] = bestAccept; - } - if (!nextState->fIsScanned) { - this->scanState(nextState); - } - } - } - - const NFA& fNFA; - std::unordered_map<DFAState::Label, std::unique_ptr<DFAState>> fStates; - std::vector<std::vector<int>> fTransitions; - std::vector<int> fAccepts; -}; diff --git a/src/sksl/lex/RegexNode.cpp b/src/sksl/lex/RegexNode.cpp deleted file mode 100644 index 8915c60c44..0000000000 --- a/src/sksl/lex/RegexNode.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "RegexNode.h" - -#include "NFA.h" - -std::vector<int> RegexNode::createStates(NFA* nfa, const std::vector<int>& accept) const { - std::vector<int> result; - switch (fKind) { - case kChar_Kind: - result.push_back(nfa->addState(NFAState(fPayload.fChar, accept))); - break; - case kCharset_Kind: { - std::vector<bool> chars; - for (const RegexNode& child : fChildren) { - if (child.fKind == kChar_Kind) { - while (chars.size() <= (size_t) child.fPayload.fChar) { - chars.push_back(false); - } - chars[child.fPayload.fChar] = true; - } else { - ASSERT(child.fKind == kRange_Kind); - while (chars.size() <= (size_t) child.fChildren[1].fPayload.fChar) { - chars.push_back(false); - } - for (char c = child.fChildren[0].fPayload.fChar; - c <= child.fChildren[1].fPayload.fChar; - ++c) { - chars[c] = true; - } - } - } - result.push_back(nfa->addState(NFAState(fPayload.fBool, chars, accept))); - break; - } - case kConcat_Kind: { - std::vector<int> right = fChildren[1].createStates(nfa, accept); - result = fChildren[0].createStates(nfa, right); - break; - } - case kDot_Kind: - result.push_back(nfa->addState(NFAState(NFAState::kDot_Kind, accept))); - break; - case kOr_Kind: { - std::vector<int> states = fChildren[0].createStates(nfa, accept); - result.insert(result.end(), states.begin(), states.end()); - states = fChildren[1].createStates(nfa, accept); - result.insert(result.end(), states.begin(), states.end()); - break; - } - case kPlus_Kind: { - std::vector<int> next = accept; - std::vector<int> placeholder; - int id = nfa->addState(NFAState(placeholder)); - next.push_back(id); - result = fChildren[0].createStates(nfa, next); - nfa->fStates[id] = NFAState(result); - break; - } - case kQuestion_Kind: - result = fChildren[0].createStates(nfa, accept); - result.insert(result.end(), accept.begin(), accept.end()); - break; - case kRange_Kind: - ABORT("unreachable"); - case kStar_Kind: { - std::vector<int> next = accept; - std::vector<int> placeholder; - int id = nfa->addState(NFAState(placeholder)); - next.push_back(id); - result = fChildren[0].createStates(nfa, next); - result.insert(result.end(), accept.begin(), accept.end()); - nfa->fStates[id] = NFAState(result); - break; - } - } - return result; -} - -std::string RegexNode::description() const { - switch (fKind) { - case kChar_Kind: - return std::string(1, fPayload.fChar); - case kCharset_Kind: { - std::string result("["); - if (fPayload.fBool) { - result += "^"; - } - for (const RegexNode& c : fChildren) { - result += c.description(); - } - result += "]"; - return result; - } - case kConcat_Kind: - return fChildren[0].description() + fChildren[1].description(); - case kDot_Kind: - return "."; - case kOr_Kind: - return "(" + fChildren[0].description() + "|" + fChildren[1].description() + ")"; - case kPlus_Kind: - return fChildren[0].description() + "+"; - case kQuestion_Kind: - return fChildren[0].description() + "?"; - case kRange_Kind: - return fChildren[0].description() + "-" + fChildren[1].description(); - case kStar_Kind: - return fChildren[0].description() + "*"; - default: - return "<" + std::to_string(fKind) + ">"; - } -} diff --git a/src/sksl/lex/RegexNode.h b/src/sksl/lex/RegexNode.h deleted file mode 100644 index e3aa65b3d1..0000000000 --- a/src/sksl/lex/RegexNode.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SKSL_REGEXNODE -#define SKSL_REGEXNODE - -#include <string> -#include <vector> - -struct NFA; - -/** - * Represents a node in the parse tree of a regular expression. - */ -struct RegexNode { - enum Kind { - kChar_Kind, - kCharset_Kind, - kConcat_Kind, - kDot_Kind, - kOr_Kind, - kPlus_Kind, - kRange_Kind, - kQuestion_Kind, - kStar_Kind - }; - - RegexNode(Kind kind) - : fKind(kind) {} - - RegexNode(Kind kind, char payload) - : fKind(kind) { - fPayload.fChar = payload; - } - - RegexNode(Kind kind, const char* children) - : fKind(kind) { - fPayload.fBool = false; - while (*children != '\0') { - fChildren.emplace_back(kChar_Kind, *children); - ++children; - } - } - - RegexNode(Kind kind, RegexNode child) - : fKind(kind) { - fChildren.push_back(std::move(child)); - } - - RegexNode(Kind kind, RegexNode child1, RegexNode child2) - : fKind(kind) { - fChildren.push_back(std::move(child1)); - fChildren.push_back(std::move(child2)); - } - - /** - * Creates NFA states for this node, with a successful match against this node resulting in a - * transition to all of the states in the accept vector. - */ - std::vector<int> createStates(NFA* nfa, const std::vector<int>& accept) const; - - std::string description() const; - - Kind fKind; - - union Payload { - char fChar; - bool fBool; - } fPayload; - - std::vector<RegexNode> fChildren; -}; - -#endif diff --git a/src/sksl/lex/RegexParser.cpp b/src/sksl/lex/RegexParser.cpp deleted file mode 100644 index 154df159db..0000000000 --- a/src/sksl/lex/RegexParser.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "RegexParser.h" - -#include "LexUtil.h" - -RegexNode RegexParser::parse(std::string source) { - fSource = source; - fIndex = 0; - ASSERT(fStack.size() == 0); - this->regex(); - ASSERT(fStack.size() == 1); - ASSERT(fIndex == source.size()); - return this->pop(); -} - -char RegexParser::peek() { - if (fIndex >= fSource.size()) { - return END; - } - return fSource[fIndex]; -} - -void RegexParser::expect(char c) { - if (this->peek() != c) { - printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek()); - exit(1); - } - ++fIndex; -} - -RegexNode RegexParser::pop() { - RegexNode result = fStack.top(); - fStack.pop(); - return result; -} - -void RegexParser::term() { - switch (this->peek()) { - case '(': this->group(); break; - case '[': this->set(); break; - case '.': this->dot(); break; - default: this->literal(); - } -} - -void RegexParser::quantifiedTerm() { - this->term(); - switch (this->peek()) { - case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break; - case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break; - case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break; - default: break; - } -} - -void RegexParser::sequence() { - this->quantifiedTerm(); - for (;;) { - switch (this->peek()) { - case END: // fall through - case '|': // fall through - case ')': return; - default: - this->sequence(); - RegexNode right = this->pop(); - RegexNode left = this->pop(); - fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right)); - } - } -} - -RegexNode RegexParser::escapeSequence(char c) { - switch (c) { - case 'n': return RegexNode(RegexNode::kChar_Kind, '\n'); - case 'r': return RegexNode(RegexNode::kChar_Kind, '\r'); - case 't': return RegexNode(RegexNode::kChar_Kind, '\t'); - case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r"); - default: return RegexNode(RegexNode::kChar_Kind, c); - } -} - -void RegexParser::literal() { - char c = this->peek(); - if (c == '\\') { - ++fIndex; - fStack.push(this->escapeSequence(peek())); - ++fIndex; - } - else { - fStack.push(RegexNode(RegexNode::kChar_Kind, c)); - ++fIndex; - } -} - -void RegexParser::dot() { - this->expect('.'); - fStack.push(RegexNode(RegexNode::kDot_Kind)); -} - -void RegexParser::group() { - this->expect('('); - this->regex(); - this->expect(')'); -} - -void RegexParser::setItem() { - this->literal(); - if (this->peek() == '-') { - ++fIndex; - if (peek() == ']') { - fStack.push(RegexNode(RegexNode::kChar_Kind, '-')); - } - else { - literal(); - RegexNode end = this->pop(); - ASSERT(end.fKind == RegexNode::kChar_Kind); - RegexNode start = this->pop(); - ASSERT(start.fKind == RegexNode::kChar_Kind); - fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end))); - } - } -} - -void RegexParser::set() { - expect('['); - size_t depth = fStack.size(); - RegexNode set(RegexNode::kCharset_Kind); - if (this->peek() == '^') { - ++fIndex; - set.fPayload.fBool = true; - } - else { - set.fPayload.fBool = false; - } - for (;;) { - switch (this->peek()) { - case ']': - ++fIndex; - while (fStack.size() > depth) { - set.fChildren.push_back(this->pop()); - } - fStack.push(std::move(set)); - return; - case END: - printf("unterminated character set\n"); - exit(1); - default: - this->setItem(); - } - } -} - -void RegexParser::regex() { - this->sequence(); - switch (this->peek()) { - case '|': { - ++fIndex; - this->regex(); - RegexNode right = this->pop(); - RegexNode left = this->pop(); - fStack.push(RegexNode(RegexNode::kOr_Kind, left, right)); - break; - } - case END: // fall through - case ')': - return; - default: - ASSERT(false); - } -} diff --git a/src/sksl/lex/RegexParser.h b/src/sksl/lex/RegexParser.h deleted file mode 100644 index 7de546fc17..0000000000 --- a/src/sksl/lex/RegexParser.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright 2017 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SKSL_REGEXPARSER -#define SKSL_REGEXPARSER - -#include "RegexNode.h" - -#include <stack> -#include <string> - -/** - * Turns a simple regular expression into a parse tree. The regular expression syntax supports only - * the basic quantifiers ('*', '+', and '?'), alternation ('|'), character sets ('[a-z]'), and - * groups ('()'). - */ -class RegexParser { -public: - RegexNode parse(std::string source); - -private: - static constexpr char END = '\0'; - - char peek(); - - void expect(char c); - - RegexNode pop(); - - /** - * Matches a char literal, parenthesized group, character set, or dot ('.'). - */ - void term(); - - /** - * Matches a term followed by an optional quantifier ('*', '+', or '?'). - */ - void quantifiedTerm(); - - /** - * Matches a sequence of quantifiedTerms. - */ - void sequence(); - - /** - * Returns a node representing the given escape character (e.g. escapeSequence('n') returns a - * node which matches a newline character). - */ - RegexNode escapeSequence(char c); - - /** - * Matches a literal character or escape sequence. - */ - void literal(); - - /** - * Matches a dot ('.'). - */ - void dot(); - - /** - * Matches a parenthesized group. - */ - void group(); - - /** - * Matches a literal character, escape sequence, or character range from a character set. - */ - void setItem(); - - /** - * Matches a character set. - */ - void set(); - - void regex(); - - std::string fSource; - - size_t fIndex; - - std::stack<RegexNode> fStack; -}; - -#endif diff --git a/src/sksl/lex/sksl.lex b/src/sksl/lex/sksl.lex deleted file mode 100644 index ada5e248de..0000000000 --- a/src/sksl/lex/sksl.lex +++ /dev/null @@ -1,95 +0,0 @@ -FLOAT_LITERAL = [0-9]*\.[0-9]+([eE][+-]?[0-9]+)?|[0-9]+\.[0-9]*([eE][+-]?[0-9]+)?|[0-9]+([eE][+-]?[0-9]+) -INT_LITERAL = [0-9]+|0x[0-9a-fA-F]+ -TRUE_LITERAL = "true" -FALSE_LITERAL = "false" -IF = "if" -STATIC_IF = "@if" -ELSE = "else" -FOR = "for" -WHILE = "while" -DO = "do" -SWITCH = "switch" -STATIC_SWITCH = "@switch" -CASE = "case" -DEFAULT = "default" -BREAK = "break" -CONTINUE = "continue" -DISCARD = "discard" -RETURN = "return" -IN = "in" -OUT = "out" -INOUT = "inout" -UNIFORM = "uniform" -CONST = "const" -LOWP = "lowp" -MEDIUMP = "mediump" -HIGHP = "highp" -FLAT = "flat" -NOPERSPECTIVE = "noperspective" -READONLY = "readonly" -WRITEONLY = "writeonly" -COHERENT = "coherent" -VOLATILE = "volatile" -RESTRICT = "restrict" -BUFFER = "buffer" -HASSIDEEFFECTS = "sk_has_side_effects" -STRUCT = "struct" -LAYOUT = "layout" -PRECISION = "precision" -IDENTIFIER = [a-zA-Z_$]([0-9]|[a-zA-Z_$])* -DIRECTIVE = #[a-zA-Z_$]([0-9]|[a-zA-Z_$])* -SECTION = @[a-zA-Z_$]([0-9]|[a-zA-Z_$])* -LPAREN = "(" -RPAREN = ")" -LBRACE = "{" -RBRACE = "}" -LBRACKET = "[" -RBRACKET = "]" -DOT = "." -COMMA = "," -PLUSPLUS = "++" -MINUSMINUS = "--" -PLUS = "+" -MINUS = "-" -STAR = "*" -SLASH = "/" -PERCENT = "%" -SHL = "<<" -SHR = ">>" -BITWISEOR = "|" -BITWISEXOR = "^" -BITWISEAND = "&" -BITWISENOT = "~" -LOGICALOR = "||" -LOGICALXOR = "^^" -LOGICALAND = "&&" -LOGICALNOT = "!" -QUESTION = "?" -COLON = ":" -EQ = "=" -EQEQ = "==" -NEQ = "!=" -GT = ">" -LT = "<" -GTEQ = ">=" -LTEQ = "<=" -PLUSEQ = "+=" -MINUSEQ = "-=" -STAREQ = "*=" -SLASHEQ = "/=" -PERCENTEQ = "%=" -SHLEQ = "<<=" -SHREQ = ">>=" -BITWISEOREQ = "|=" -BITWISEXOREQ = "^=" -BITWISEANDEQ = "&=" -LOGICALOREQ = "||=" -LOGICALXOREQ = "^^=" -LOGICALANDEQ = "&&=" -SEMICOLON = ";" -ARROW = "->" -COLONCOLON = "::" -WHITESPACE = \s+ -LINE_COMMENT = //.* -BLOCK_COMMENT = /\*([^*]|\*[^/])*\*/ -INVALID = .
\ No newline at end of file |