diff options
author | 2017-09-19 14:38:40 -0400 | |
---|---|---|
committer | 2017-09-20 13:52:13 +0000 | |
commit | 906126eedc792f12935145a9a2f13eea1d1cd86d (patch) | |
tree | a04e3df78036d1e0359d769bb47c3ce4377f617c /src/sksl/lex/NFAtoDFA.h | |
parent | 71e37979d5087c06c314989b2a71fae04da4cbea (diff) |
Reduced size of SkSL lexer tables
The SkSL lexer tables contain duplicate rows, as many characters have the same
transitions (for instance, every lexer rule treats all digits [0-9] the
same way, so that means ten rows with identical transitions). This change
collapses such duplicate rows.
Bug: 764430
Change-Id: I83040692f847e3aff8069fddbe975a007f4b383c
Reviewed-on: https://skia-review.googlesource.com/48263
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
Diffstat (limited to 'src/sksl/lex/NFAtoDFA.h')
-rw-r--r-- | src/sksl/lex/NFAtoDFA.h | 43 |
1 files changed, 39 insertions, 4 deletions
diff --git a/src/sksl/lex/NFAtoDFA.h b/src/sksl/lex/NFAtoDFA.h index 214c6d12d6..28ccb547a2 100644 --- a/src/sksl/lex/NFAtoDFA.h +++ b/src/sksl/lex/NFAtoDFA.h @@ -14,6 +14,7 @@ #include <climits> #include <memory> #include <unordered_map> +#include <set> #include <vector> /** @@ -28,6 +29,9 @@ */ class NFAtoDFA { public: + static constexpr char START_CHAR = 9; + static constexpr char END_CHAR = 126; + NFAtoDFA(NFA* nfa) : fNFA(*nfa) {} @@ -44,11 +48,13 @@ public: DFAState* start = getState(DFAState::Label(startStates)); this->scanState(start); + this->computeMappings(); + int stateCount = 0; for (const auto& row : fTransitions) { stateCount = std::max(stateCount, (int) row.size()); } - return DFA(fTransitions, fAccepts); + return DFA(fCharMappings, fTransitions, fAccepts); } private: @@ -87,14 +93,14 @@ private: } std::vector<int>& row = fTransitions[c]; while (row.size() <= (size_t) start) { - row.push_back(-1); + row.push_back(INVALID); } row[start] = next; } void scanState(DFAState* state) { state->fIsScanned = true; - for (char c = 9; c <= DFA::END_CHAR; ++c) { + for (char c = START_CHAR; c <= END_CHAR; ++c) { std::vector<int> next; int bestAccept = INT_MAX; for (int idx : state->fLabel.fStates) { @@ -113,7 +119,7 @@ private: this->addTransition(c, state->fId, nextState->fId); if (bestAccept != INT_MAX) { while (fAccepts.size() <= (size_t) nextState->fId) { - fAccepts.push_back(-1); + fAccepts.push_back(INVALID); } fAccepts[nextState->fId] = bestAccept; } @@ -123,8 +129,37 @@ private: } } + // collapse rows with the same transitions to a single row. This is common, as each row + // represents a character and often there are many characters for which all transitions are + // identical (e.g. [0-9] are treated the same way by all lexer rules) + void computeMappings() { + // mappings[<input row>] = <output row> + std::vector<std::vector<int>*> uniques; + // this could be done more efficiently, but O(n^2) is plenty fast for our purposes + for (size_t i = 0; i < fTransitions.size(); ++i) { + int found = -1; + for (size_t j = 0; j < uniques.size(); ++j) { + if (*uniques[j] == fTransitions[i]) { + found = j; + break; + } + } + if (found == -1) { + found = (int) uniques.size(); + uniques.push_back(&fTransitions[i]); + } + fCharMappings.push_back(found); + } + std::vector<std::vector<int>> newTransitions; + for (std::vector<int>* row : uniques) { + newTransitions.push_back(*row); + } + fTransitions = newTransitions; + } + const NFA& fNFA; std::unordered_map<DFAState::Label, std::unique_ptr<DFAState>> fStates; std::vector<std::vector<int>> fTransitions; + std::vector<int> fCharMappings; std::vector<int> fAccepts; }; |