1 | #include "re_nullable.h" |
---|
2 | #include <re/re_alt.h> // for Alt, makeAlt |
---|
3 | #include <re/re_any.h> // for makeAny, Any |
---|
4 | #include <re/re_assertion.h> // for Assertion, Assertion::Sense, Asser... |
---|
5 | #include <re/re_diff.h> // for Diff, makeDiff |
---|
6 | #include <re/re_intersect.h> // for Intersect |
---|
7 | #include <re/re_name.h> // for Name |
---|
8 | #include <re/re_rep.h> // for Rep, makeRep |
---|
9 | #include <re/re_seq.h> // for Seq, makeSeq |
---|
10 | #include <vector> // for vector, allocator |
---|
11 | #include <llvm/Support/Casting.h> // for dyn_cast, isa |
---|
12 | |
---|
13 | /* |
---|
14 | |
---|
15 | A regular expression is nullable if it (a) matches the empty |
---|
16 | string, and (b) applies everywhere. Note that Start (^) and |
---|
17 | End ($) match the empty string, but not everywhere). |
---|
18 | |
---|
19 | */ |
---|
20 | |
---|
21 | using namespace llvm; |
---|
22 | |
---|
23 | namespace re { |
---|
24 | |
---|
25 | RE * RE_Nullable::removeNullablePrefix(RE * re) { |
---|
26 | if (Seq * seq = dyn_cast<Seq>(re)) { |
---|
27 | std::vector<RE*> list; |
---|
28 | for (auto i = seq->begin(); i != seq->end(); ++i) { |
---|
29 | if (!isNullable(*i)) { |
---|
30 | list.push_back(removeNullablePrefix(*i)); |
---|
31 | std::copy(++i, seq->end(), std::back_inserter(list)); |
---|
32 | break; |
---|
33 | } |
---|
34 | } |
---|
35 | re = makeSeq(list.begin(), list.end()); |
---|
36 | } else if (Alt * alt = dyn_cast<Alt>(re)) { |
---|
37 | std::vector<RE*> list; |
---|
38 | for (auto i = alt->begin(); i != alt->end(); ++i) { |
---|
39 | list.push_back(removeNullablePrefix(*i)); |
---|
40 | } |
---|
41 | re = makeAlt(list.begin(), list.end()); |
---|
42 | } else if (Rep * rep = dyn_cast<Rep>(re)) { |
---|
43 | if ((rep->getLB() == 0) || (isNullable(rep->getRE()))) { |
---|
44 | re = makeSeq(); |
---|
45 | } |
---|
46 | else if (hasNullablePrefix(rep->getRE())) { |
---|
47 | re = makeSeq({removeNullablePrefix(rep->getRE()), makeRep(rep->getRE(), rep->getLB() - 1, rep->getLB() - 1)}); |
---|
48 | } |
---|
49 | else { |
---|
50 | re = makeRep(rep->getRE(), rep->getLB(), rep->getLB()); |
---|
51 | } |
---|
52 | } else if (Name * name = dyn_cast<Name>(re)) { |
---|
53 | if (name->getDefinition()) { |
---|
54 | name->setDefinition(removeNullablePrefix(name->getDefinition())); |
---|
55 | } |
---|
56 | } |
---|
57 | return re; |
---|
58 | } |
---|
59 | |
---|
60 | RE * RE_Nullable::removeNullableSuffix(RE * re) { |
---|
61 | if (Seq * seq = dyn_cast<Seq>(re)) { |
---|
62 | std::vector<RE*> list; |
---|
63 | for (auto i = seq->rbegin(); i != seq->rend(); ++i) { |
---|
64 | if (!isNullable(*i)) { |
---|
65 | std::copy(seq->begin(), (i + 1).base(), std::back_inserter(list)); |
---|
66 | list.push_back(removeNullableSuffix(*i)); |
---|
67 | break; |
---|
68 | } |
---|
69 | } |
---|
70 | re = makeSeq(list.begin(), list.end()); |
---|
71 | } else if (Alt* alt = dyn_cast<Alt>(re)) { |
---|
72 | std::vector<RE*> list; |
---|
73 | for (auto i = alt->begin(); i != alt->end(); ++i) { |
---|
74 | list.push_back(removeNullableSuffix(*i)); |
---|
75 | } |
---|
76 | re = makeAlt(list.begin(), list.end()); |
---|
77 | } else if (Rep * rep = dyn_cast<Rep>(re)) { |
---|
78 | if ((rep->getLB() == 0) || (isNullable(rep->getRE()))) { |
---|
79 | re = makeSeq(); |
---|
80 | } |
---|
81 | else if (hasNullableSuffix(rep->getRE())) { |
---|
82 | re = makeSeq({makeRep(rep->getRE(), rep->getLB() - 1, rep->getLB() - 1), removeNullableSuffix(rep->getRE())}); |
---|
83 | } |
---|
84 | else { |
---|
85 | re = makeRep(rep->getRE(), rep->getLB(), rep->getLB()); |
---|
86 | } |
---|
87 | } else if (Name * name = dyn_cast<Name>(re)) { |
---|
88 | if (name->getDefinition()) { |
---|
89 | name->setDefinition(removeNullableSuffix(name->getDefinition())); |
---|
90 | } |
---|
91 | } |
---|
92 | return re; |
---|
93 | } |
---|
94 | |
---|
95 | bool RE_Nullable::isNullable(const RE * re) { |
---|
96 | if (const Seq * re_seq = dyn_cast<const Seq>(re)) { |
---|
97 | for (const RE * re : *re_seq) { |
---|
98 | if (!isNullable(re)) { |
---|
99 | return false; |
---|
100 | } |
---|
101 | } |
---|
102 | return true; |
---|
103 | } else if (const Alt * re_alt = dyn_cast<const Alt>(re)) { |
---|
104 | for (const RE * re : *re_alt) { |
---|
105 | if (isNullable(re)) { |
---|
106 | return true; |
---|
107 | } |
---|
108 | } |
---|
109 | } else if (const Rep* re_rep = dyn_cast<const Rep>(re)) { |
---|
110 | return re_rep->getLB() == 0 ? true : isNullable(re_rep->getRE()); |
---|
111 | } else if (const Diff * diff = dyn_cast<const Diff>(re)) { |
---|
112 | return isNullable(diff->getLH()) && !isNullable(diff->getRH()); |
---|
113 | } else if (const Intersect * e = dyn_cast<const Intersect>(re)) { |
---|
114 | return isNullable(e->getLH()) && isNullable(e->getRH()); |
---|
115 | } |
---|
116 | return false; |
---|
117 | } |
---|
118 | |
---|
119 | bool RE_Nullable::hasNullablePrefix(const RE * re) { |
---|
120 | bool nullable = false; |
---|
121 | if (const Seq * seq = dyn_cast<const Seq>(re)) { |
---|
122 | nullable = isNullable(seq->front()) ? true : hasNullablePrefix(seq->front()); |
---|
123 | } else if (const Alt * alt = dyn_cast<const Alt>(re)) { |
---|
124 | for (const RE * re : *alt) { |
---|
125 | if (hasNullablePrefix(re)) { |
---|
126 | nullable = true; |
---|
127 | break; |
---|
128 | } |
---|
129 | } |
---|
130 | } else if (const Rep * rep = dyn_cast<const Rep>(re)) { |
---|
131 | nullable = true; |
---|
132 | if (rep->getLB() == rep->getUB()) { |
---|
133 | nullable = hasNullablePrefix(rep->getRE()); |
---|
134 | } |
---|
135 | } |
---|
136 | return nullable; |
---|
137 | } |
---|
138 | |
---|
139 | bool RE_Nullable::hasNullableSuffix(const RE * re) { |
---|
140 | bool nullable = false; |
---|
141 | if (const Seq * seq = dyn_cast<const Seq>(re)) { |
---|
142 | nullable = isNullable(seq->back()) ? true : hasNullableSuffix(seq->back()); |
---|
143 | } else if (const Alt * alt = dyn_cast<const Alt>(re)) { |
---|
144 | for (const RE * re : *alt) { |
---|
145 | if (hasNullableSuffix(re)) { |
---|
146 | nullable = true; |
---|
147 | break; |
---|
148 | } |
---|
149 | } |
---|
150 | } else if (const Rep * rep = dyn_cast<const Rep>(re)) { |
---|
151 | nullable = true; |
---|
152 | if (rep->getLB() == rep->getUB()) { |
---|
153 | nullable = hasNullableSuffix(rep->getRE()); |
---|
154 | } |
---|
155 | } |
---|
156 | return nullable; |
---|
157 | } |
---|
158 | |
---|
159 | } |
---|