Reduced size of SkSL lexer tables

The SkSL lexer tables contain duplicate rows, as many characters have the same transitions (for instance, every lexer rule treats all digits [0-9] the same way, so that means ten rows with identical transitions). This change collapses such duplicate rows. Bug: 764430 Change-Id: I83040692f847e3aff8069fddbe975a007f4b383c Reviewed-on: https://skia-review.googlesource.com/48263 Reviewed-by: Brian Salomon <bsalomon@google.com> Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
author: Ethan Nicholas <ethannicholas@google.com> 2017-09-19 14:38:40 -0400
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> 2017-09-20 13:52:13 +0000
commit: 906126eedc792f12935145a9a2f13eea1d1cd86d (patch)
tree: a04e3df78036d1e0359d769bb47c3ce4377f617c /src/sksl/lex/NFAtoDFA.h
parent: 71e37979d5087c06c314989b2a71fae04da4cbea (diff)
1 files changed, 39 insertions, 4 deletions
diff --git a/src/sksl/lex/NFAtoDFA.h b/src/sksl/lex/NFAtoDFA.h
index 214c6d12d6..28ccb547a2 100644
--- a/src/sksl/lex/NFAtoDFA.h
+++ b/src/sksl/lex/NFAtoDFA.h
@@ -14,6 +14,7 @@
 #include <climits>
 #include <memory>
 #include <unordered_map>
+#include <set>
 #include <vector>
 
 /**
@@ -28,6 +29,9 @@
  */
 class NFAtoDFA {
 public:
+    static constexpr char START_CHAR = 9;
+    static constexpr char END_CHAR = 126;
+
     NFAtoDFA(NFA* nfa)
     : fNFA(*nfa) {}
 
@@ -44,11 +48,13 @@ public:
         DFAState* start = getState(DFAState::Label(startStates));
         this->scanState(start);
 
+        this->computeMappings();
+
         int stateCount = 0;
         for (const auto& row : fTransitions) {
             stateCount = std::max(stateCount, (int) row.size());
         }
-        return DFA(fTransitions, fAccepts);
+        return DFA(fCharMappings, fTransitions, fAccepts);
     }
 
 private:
@@ -87,14 +93,14 @@ private:
         }
         std::vector<int>& row = fTransitions[c];
         while (row.size() <= (size_t) start) {
-            row.push_back(-1);
+            row.push_back(INVALID);
         }
         row[start] = next;
     }
 
     void scanState(DFAState* state) {
         state->fIsScanned = true;
-        for (char c = 9; c <= DFA::END_CHAR; ++c) {
+        for (char c = START_CHAR; c <= END_CHAR; ++c) {
             std::vector<int> next;
             int bestAccept = INT_MAX;
             for (int idx : state->fLabel.fStates) {
@@ -113,7 +119,7 @@ private:
             this->addTransition(c, state->fId, nextState->fId);
             if (bestAccept != INT_MAX) {
                 while (fAccepts.size() <= (size_t) nextState->fId) {
-                    fAccepts.push_back(-1);
+                    fAccepts.push_back(INVALID);
                 }
                 fAccepts[nextState->fId] = bestAccept;
             }
@@ -123,8 +129,37 @@ private:
         }
     }
 
+    // collapse rows with the same transitions to a single row. This is common, as each row
+    // represents a character and often there are many characters for which all transitions are
+    // identical (e.g. [0-9] are treated the same way by all lexer rules)
+    void computeMappings() {
+        // mappings[<input row>] = <output row>
+        std::vector<std::vector<int>*> uniques;
+        // this could be done more efficiently, but O(n^2) is plenty fast for our purposes
+        for (size_t i = 0; i < fTransitions.size(); ++i) {
+            int found = -1;
+            for (size_t j = 0; j < uniques.size(); ++j) {
+                if (*uniques[j] == fTransitions[i]) {
+                    found = j;
+                    break;
+                }
+            }
+            if (found == -1) {
+                found = (int) uniques.size();
+                uniques.push_back(&fTransitions[i]);
+            }
+            fCharMappings.push_back(found);
+        }
+        std::vector<std::vector<int>> newTransitions;
+        for (std::vector<int>* row : uniques) {
+            newTransitions.push_back(*row);
+        }
+        fTransitions = newTransitions;
+    }
+
     const NFA& fNFA;
     std::unordered_map<DFAState::Label, std::unique_ptr<DFAState>> fStates;
     std::vector<std::vector<int>> fTransitions;
+    std::vector<int> fCharMappings;
     std::vector<int> fAccepts;
 };
author	Ethan Nicholas <ethannicholas@google.com>	2017-09-19 14:38:40 -0400
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	2017-09-20 13:52:13 +0000
commit	906126eedc792f12935145a9a2f13eea1d1cd86d (patch)
tree	a04e3df78036d1e0359d769bb47c3ce4377f617c /src/sksl/lex/NFAtoDFA.h
parent	71e37979d5087c06c314989b2a71fae04da4cbea (diff)