1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "RegexNode.h"
#include "NFA.h"
std::vector<int> RegexNode::createStates(NFA* nfa, const std::vector<int>& accept) const {
std::vector<int> result;
switch (fKind) {
case kChar_Kind:
result.push_back(nfa->addState(NFAState(fPayload.fChar, accept)));
break;
case kCharset_Kind: {
std::vector<bool> chars;
for (const RegexNode& child : fChildren) {
if (child.fKind == kChar_Kind) {
while (chars.size() <= (size_t) child.fPayload.fChar) {
chars.push_back(false);
}
chars[child.fPayload.fChar] = true;
} else {
ASSERT(child.fKind == kRange_Kind);
while (chars.size() <= (size_t) child.fChildren[1].fPayload.fChar) {
chars.push_back(false);
}
for (char c = child.fChildren[0].fPayload.fChar;
c <= child.fChildren[1].fPayload.fChar;
++c) {
chars[c] = true;
}
}
}
result.push_back(nfa->addState(NFAState(fPayload.fBool, chars, accept)));
break;
}
case kConcat_Kind: {
std::vector<int> right = fChildren[1].createStates(nfa, accept);
result = fChildren[0].createStates(nfa, right);
break;
}
case kDot_Kind:
result.push_back(nfa->addState(NFAState(NFAState::kDot_Kind, accept)));
break;
case kOr_Kind: {
std::vector<int> states = fChildren[0].createStates(nfa, accept);
result.insert(result.end(), states.begin(), states.end());
states = fChildren[1].createStates(nfa, accept);
result.insert(result.end(), states.begin(), states.end());
break;
}
case kPlus_Kind: {
std::vector<int> next = accept;
std::vector<int> placeholder;
int id = nfa->addState(NFAState(placeholder));
next.push_back(id);
result = fChildren[0].createStates(nfa, next);
nfa->fStates[id] = NFAState(result);
break;
}
case kQuestion_Kind:
result = fChildren[0].createStates(nfa, accept);
result.insert(result.end(), accept.begin(), accept.end());
break;
case kRange_Kind:
ABORT("unreachable");
case kStar_Kind: {
std::vector<int> next = accept;
std::vector<int> placeholder;
int id = nfa->addState(NFAState(placeholder));
next.push_back(id);
result = fChildren[0].createStates(nfa, next);
result.insert(result.end(), accept.begin(), accept.end());
nfa->fStates[id] = NFAState(result);
break;
}
}
return result;
}
std::string RegexNode::description() const {
switch (fKind) {
case kChar_Kind:
return std::string(1, fPayload.fChar);
case kCharset_Kind: {
std::string result("[");
if (fPayload.fBool) {
result += "^";
}
for (const RegexNode& c : fChildren) {
result += c.description();
}
result += "]";
return result;
}
case kConcat_Kind:
return fChildren[0].description() + fChildren[1].description();
case kDot_Kind:
return ".";
case kOr_Kind:
return "(" + fChildren[0].description() + "|" + fChildren[1].description() + ")";
case kPlus_Kind:
return fChildren[0].description() + "+";
case kQuestion_Kind:
return fChildren[0].description() + "?";
case kRange_Kind:
return fChildren[0].description() + "-" + fChildren[1].description();
case kStar_Kind:
return fChildren[0].description() + "*";
default:
return "<" + std::to_string(fKind) + ">";
}
}
|