1 | #include <re/re_re.h> |
---|
2 | #include "re_name_resolve.h" |
---|
3 | #include <re/re_name.h> |
---|
4 | #include <re/re_alt.h> |
---|
5 | #include <re/re_cc.h> |
---|
6 | #include <re/re_seq.h> |
---|
7 | #include <re/re_rep.h> |
---|
8 | #include <re/re_range.h> |
---|
9 | #include <re/re_diff.h> |
---|
10 | #include <re/re_intersect.h> |
---|
11 | #include <re/re_assertion.h> |
---|
12 | #include <re/re_analysis.h> |
---|
13 | #include <re/re_group.h> |
---|
14 | #include <re/re_memoizer.hpp> |
---|
15 | #include <UCD/resolve_properties.h> |
---|
16 | #include <cc/alphabet.h> |
---|
17 | #include <boost/container/flat_set.hpp> |
---|
18 | #include <sstream> |
---|
19 | |
---|
20 | using namespace boost::container; |
---|
21 | using namespace llvm; |
---|
22 | |
---|
23 | namespace re { |
---|
24 | |
---|
25 | static inline CC * extractCC(RE * re) { |
---|
26 | if (isa<CC>(re)) { |
---|
27 | return cast<CC>(re); |
---|
28 | } else if (isa<Name>(re)) { |
---|
29 | return extractCC(cast<Name>(re)->getDefinition()); |
---|
30 | } |
---|
31 | return nullptr; |
---|
32 | } |
---|
33 | |
---|
34 | struct NameResolver { |
---|
35 | RE * resolveUnicodeProperties(RE * re) { |
---|
36 | if (Name * name = dyn_cast<Name>(re)) { |
---|
37 | auto f = mMemoizer.find(name); |
---|
38 | if (f == mMemoizer.end()) { |
---|
39 | if (LLVM_LIKELY(name->getDefinition() != nullptr)) { |
---|
40 | name->setDefinition(resolveUnicodeProperties(name->getDefinition())); |
---|
41 | } else if (LLVM_LIKELY(name->getType() == Name::Type::UnicodeProperty || name->getType() == Name::Type::ZeroWidth)) { |
---|
42 | if (UCD::resolvePropertyDefinition(name)) { |
---|
43 | name->setDefinition(resolveUnicodeProperties(name->getDefinition())); |
---|
44 | } else { |
---|
45 | name->setDefinition(makeCC(UCD::resolveUnicodeSet(name), &cc::Unicode)); |
---|
46 | } |
---|
47 | } else { |
---|
48 | UndefinedNameError(name); |
---|
49 | } |
---|
50 | } else { |
---|
51 | return *f; |
---|
52 | } |
---|
53 | } else if (Vector * vec = dyn_cast<Vector>(re)) { |
---|
54 | for (RE *& re : *vec) { |
---|
55 | re = resolveUnicodeProperties(re); |
---|
56 | } |
---|
57 | } else if (Rep * rep = dyn_cast<Rep>(re)) { |
---|
58 | rep->setRE(resolveUnicodeProperties(rep->getRE())); |
---|
59 | } else if (Assertion * a = dyn_cast<Assertion>(re)) { |
---|
60 | a->setAsserted(resolveUnicodeProperties(a->getAsserted())); |
---|
61 | } else if (Range * rg = dyn_cast<Range>(re)) { |
---|
62 | return makeRange(resolveUnicodeProperties(rg->getLo()), |
---|
63 | resolveUnicodeProperties(rg->getHi())); |
---|
64 | } else if (Diff * diff = dyn_cast<Diff>(re)) { |
---|
65 | diff->setLH(resolveUnicodeProperties(diff->getLH())); |
---|
66 | diff->setRH(resolveUnicodeProperties(diff->getRH())); |
---|
67 | } else if (Intersect * ix = dyn_cast<Intersect>(re)) { |
---|
68 | ix->setLH(resolveUnicodeProperties(ix->getLH())); |
---|
69 | ix->setRH(resolveUnicodeProperties(ix->getRH())); |
---|
70 | } else if (Group * g = dyn_cast<Group>(re)) { |
---|
71 | g->setRE(resolveUnicodeProperties(g->getRE())); |
---|
72 | } |
---|
73 | return re; |
---|
74 | } |
---|
75 | |
---|
76 | RE * resolve(RE * re) { |
---|
77 | if (Name * name = dyn_cast<Name>(re)) { |
---|
78 | auto f = mMemoizer.find(name); |
---|
79 | if (f == mMemoizer.end()) { |
---|
80 | if (LLVM_LIKELY(name->getDefinition() != nullptr)) { |
---|
81 | name->setDefinition(resolve(name->getDefinition())); |
---|
82 | } else { |
---|
83 | UndefinedNameError(name); |
---|
84 | } |
---|
85 | } else { |
---|
86 | return *f; |
---|
87 | } |
---|
88 | } else if (Vector * vec = dyn_cast<Vector>(re)) { |
---|
89 | for (RE *& re : *vec) { |
---|
90 | re = resolve(re); |
---|
91 | } |
---|
92 | } else if (Rep * rep = dyn_cast<Rep>(re)) { |
---|
93 | rep->setRE(resolve(rep->getRE())); |
---|
94 | } else if (Assertion * a = dyn_cast<Assertion>(re)) { |
---|
95 | a->setAsserted(resolve(a->getAsserted())); |
---|
96 | } else if (Range * rg = dyn_cast<Range>(re)) { |
---|
97 | return makeRange(resolve(rg->getLo()), resolve(rg->getHi())); |
---|
98 | } else if (Diff * diff = dyn_cast<Diff>(re)) { |
---|
99 | diff->setLH(resolve(diff->getLH())); |
---|
100 | diff->setRH(resolve(diff->getRH())); |
---|
101 | } else if (Intersect * ix = dyn_cast<Intersect>(re)) { |
---|
102 | ix->setLH(resolve(ix->getLH())); |
---|
103 | ix->setRH(resolve(ix->getRH())); |
---|
104 | } else if (Group * g = dyn_cast<Group>(re)) { |
---|
105 | g->setRE(resolve(g->getRE())); |
---|
106 | } |
---|
107 | return re; |
---|
108 | } |
---|
109 | |
---|
110 | private: |
---|
111 | Memoizer mMemoizer; |
---|
112 | }; |
---|
113 | |
---|
114 | RE * resolveUnicodeProperties(RE * re) { |
---|
115 | NameResolver nameResolver; |
---|
116 | return nameResolver.resolveUnicodeProperties(re); |
---|
117 | } |
---|
118 | |
---|
119 | RE * resolveNames(RE * re) { |
---|
120 | NameResolver nameResolver; |
---|
121 | return nameResolver.resolve(re); |
---|
122 | } |
---|
123 | |
---|
124 | } |
---|