aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sksl/lex/NFAtoDFA.h
diff options
context:
space:
mode:
authorGravatar Ethan Nicholas <ethannicholas@google.com>2017-09-19 14:38:40 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-09-20 13:52:13 +0000
commit906126eedc792f12935145a9a2f13eea1d1cd86d (patch)
treea04e3df78036d1e0359d769bb47c3ce4377f617c /src/sksl/lex/NFAtoDFA.h
parent71e37979d5087c06c314989b2a71fae04da4cbea (diff)
Reduced size of SkSL lexer tables
The SkSL lexer tables contain duplicate rows, as many characters have the same transitions (for instance, every lexer rule treats all digits [0-9] the same way, so that means ten rows with identical transitions). This change collapses such duplicate rows. Bug: 764430 Change-Id: I83040692f847e3aff8069fddbe975a007f4b383c Reviewed-on: https://skia-review.googlesource.com/48263 Reviewed-by: Brian Salomon <bsalomon@google.com> Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
Diffstat (limited to 'src/sksl/lex/NFAtoDFA.h')
-rw-r--r--src/sksl/lex/NFAtoDFA.h43
1 files changed, 39 insertions, 4 deletions
diff --git a/src/sksl/lex/NFAtoDFA.h b/src/sksl/lex/NFAtoDFA.h
index 214c6d12d6..28ccb547a2 100644
--- a/src/sksl/lex/NFAtoDFA.h
+++ b/src/sksl/lex/NFAtoDFA.h
@@ -14,6 +14,7 @@
#include <climits>
#include <memory>
#include <unordered_map>
+#include <set>
#include <vector>
/**
@@ -28,6 +29,9 @@
*/
class NFAtoDFA {
public:
+ static constexpr char START_CHAR = 9;
+ static constexpr char END_CHAR = 126;
+
NFAtoDFA(NFA* nfa)
: fNFA(*nfa) {}
@@ -44,11 +48,13 @@ public:
DFAState* start = getState(DFAState::Label(startStates));
this->scanState(start);
+ this->computeMappings();
+
int stateCount = 0;
for (const auto& row : fTransitions) {
stateCount = std::max(stateCount, (int) row.size());
}
- return DFA(fTransitions, fAccepts);
+ return DFA(fCharMappings, fTransitions, fAccepts);
}
private:
@@ -87,14 +93,14 @@ private:
}
std::vector<int>& row = fTransitions[c];
while (row.size() <= (size_t) start) {
- row.push_back(-1);
+ row.push_back(INVALID);
}
row[start] = next;
}
void scanState(DFAState* state) {
state->fIsScanned = true;
- for (char c = 9; c <= DFA::END_CHAR; ++c) {
+ for (char c = START_CHAR; c <= END_CHAR; ++c) {
std::vector<int> next;
int bestAccept = INT_MAX;
for (int idx : state->fLabel.fStates) {
@@ -113,7 +119,7 @@ private:
this->addTransition(c, state->fId, nextState->fId);
if (bestAccept != INT_MAX) {
while (fAccepts.size() <= (size_t) nextState->fId) {
- fAccepts.push_back(-1);
+ fAccepts.push_back(INVALID);
}
fAccepts[nextState->fId] = bestAccept;
}
@@ -123,8 +129,37 @@ private:
}
}
+ // collapse rows with the same transitions to a single row. This is common, as each row
+ // represents a character and often there are many characters for which all transitions are
+ // identical (e.g. [0-9] are treated the same way by all lexer rules)
+ void computeMappings() {
+ // mappings[<input row>] = <output row>
+ std::vector<std::vector<int>*> uniques;
+ // this could be done more efficiently, but O(n^2) is plenty fast for our purposes
+ for (size_t i = 0; i < fTransitions.size(); ++i) {
+ int found = -1;
+ for (size_t j = 0; j < uniques.size(); ++j) {
+ if (*uniques[j] == fTransitions[i]) {
+ found = j;
+ break;
+ }
+ }
+ if (found == -1) {
+ found = (int) uniques.size();
+ uniques.push_back(&fTransitions[i]);
+ }
+ fCharMappings.push_back(found);
+ }
+ std::vector<std::vector<int>> newTransitions;
+ for (std::vector<int>* row : uniques) {
+ newTransitions.push_back(*row);
+ }
+ fTransitions = newTransitions;
+ }
+
const NFA& fNFA;
std::unordered_map<DFAState::Label, std::unique_ptr<DFAState>> fStates;
std::vector<std::vector<int>> fTransitions;
+ std::vector<int> fCharMappings;
std::vector<int> fAccepts;
};