1 | /* |
---|
2 | * Copyright (c) 2014 International Characters. |
---|
3 | * This software is licensed to the public under the Open Software License 3.0. |
---|
4 | * icgrep is a trademark of International Characters. |
---|
5 | */ |
---|
6 | |
---|
7 | #include "printer_re.h" |
---|
8 | |
---|
9 | //Regular Expressions |
---|
10 | #include <re/re_re.h> |
---|
11 | #include <re/re_alt.h> |
---|
12 | #include <re/re_any.h> |
---|
13 | #include <re/re_cc.h> |
---|
14 | #include <re/re_name.h> |
---|
15 | #include <re/re_end.h> |
---|
16 | #include <re/re_rep.h> |
---|
17 | #include <re/re_seq.h> |
---|
18 | #include <re/re_start.h> |
---|
19 | #include <re/re_range.h> |
---|
20 | #include <re/re_diff.h> |
---|
21 | #include <re/re_intersect.h> |
---|
22 | #include <re/re_assertion.h> |
---|
23 | #include <re/re_group.h> |
---|
24 | #include <cc/alphabet.h> |
---|
25 | |
---|
26 | using namespace re; |
---|
27 | using namespace llvm; |
---|
28 | |
---|
29 | const std::string Printer_RE::PrintRE(const RE * re) { |
---|
30 | std::string retVal = ""; |
---|
31 | |
---|
32 | if (re == nullptr) { |
---|
33 | retVal = "<NULL>"; |
---|
34 | } else if (const Alt* re_alt = dyn_cast<const Alt>(re)) { |
---|
35 | retVal += "(Alt["; |
---|
36 | bool comma = false; |
---|
37 | for (const RE * re : *re_alt) { |
---|
38 | if (comma) { |
---|
39 | retVal += ','; |
---|
40 | } |
---|
41 | retVal += PrintRE(re); |
---|
42 | comma = true; |
---|
43 | } |
---|
44 | retVal += "])"; |
---|
45 | } else if (const CC* re_cc = dyn_cast<const CC>(re)) { |
---|
46 | retVal = "CC \""; |
---|
47 | retVal += re_cc->canonicalName(CC_type::UnicodeClass); |
---|
48 | retVal += "\" "; |
---|
49 | |
---|
50 | for (const auto & i : *re_cc) { |
---|
51 | retVal += "["; |
---|
52 | retVal += std::to_string(lo_codepoint(i)); |
---|
53 | if (hi_codepoint(i) != lo_codepoint(i)) |
---|
54 | retVal += "-" + std::to_string(hi_codepoint(i)); |
---|
55 | retVal += "]"; |
---|
56 | } |
---|
57 | retVal += "/" + re_cc->getAlphabet()->getName(); |
---|
58 | } else if (const Name* re_name = dyn_cast<const Name>(re)) { |
---|
59 | retVal = "Name \""; |
---|
60 | if (re_name->hasNamespace()) { |
---|
61 | retVal += re_name->getNamespace(); |
---|
62 | retVal += ":"; |
---|
63 | } |
---|
64 | retVal += re_name->getName(); |
---|
65 | retVal += "\" "; |
---|
66 | //if (re_name->getType() == Name::Type::Capture) { |
---|
67 | retVal += "=(" + PrintRE(re_name->getDefinition()) + ")"; |
---|
68 | //} |
---|
69 | } else if (const Range* rg = dyn_cast<const Range>(re)) { |
---|
70 | retVal = "Range ("; |
---|
71 | retVal += PrintRE(rg->getLo()); |
---|
72 | retVal += " , "; |
---|
73 | retVal += PrintRE(rg->getHi()); |
---|
74 | retVal += ") "; |
---|
75 | } else if (const Assertion * a = dyn_cast<const Assertion>(re)) { |
---|
76 | retVal = (a->getSense() == Assertion::Sense::Positive) ? "" : "Negative"; |
---|
77 | switch (a->getKind()) { |
---|
78 | case Assertion::Kind::Lookahead: |
---|
79 | retVal += "Lookahead"; |
---|
80 | break; |
---|
81 | case Assertion::Kind::Lookbehind: |
---|
82 | retVal += "Lookbehind"; |
---|
83 | break; |
---|
84 | case Assertion::Kind::Boundary: |
---|
85 | retVal += "Boundary"; |
---|
86 | break; |
---|
87 | } |
---|
88 | retVal += "Assertion("; |
---|
89 | retVal += PrintRE(a->getAsserted()); |
---|
90 | retVal += ") "; |
---|
91 | } else if (const Diff* diff = dyn_cast<const Diff>(re)) { |
---|
92 | retVal = "Diff ("; |
---|
93 | retVal += PrintRE(diff->getLH()); |
---|
94 | retVal += " , "; |
---|
95 | retVal += PrintRE(diff->getRH()); |
---|
96 | retVal += ") "; |
---|
97 | } else if (const Intersect* x = dyn_cast<const Intersect>(re)) { |
---|
98 | retVal = "Intersect ("; |
---|
99 | retVal += PrintRE(x->getLH()); |
---|
100 | retVal += " , "; |
---|
101 | retVal += PrintRE(x->getRH()); |
---|
102 | retVal += ") "; |
---|
103 | } else if (isa<const End>(re)) { |
---|
104 | retVal = "End"; |
---|
105 | } else if (const Rep* re_rep = dyn_cast<const Rep>(re)) { |
---|
106 | retVal = "Rep("; |
---|
107 | retVal.append(PrintRE(re_rep->getRE())); |
---|
108 | retVal.append(","); |
---|
109 | retVal.append(std::to_string(re_rep->getLB())); |
---|
110 | retVal.append(","); |
---|
111 | if (re_rep->getUB() == Rep::UNBOUNDED_REP) { |
---|
112 | retVal.append("Unbounded"); |
---|
113 | } |
---|
114 | else { |
---|
115 | retVal.append(std::to_string(re_rep->getUB())); |
---|
116 | } |
---|
117 | retVal.append(")"); |
---|
118 | } else if (const Seq* re_seq = dyn_cast<const Seq>(re)) { |
---|
119 | retVal = "(Seq["; |
---|
120 | bool comma = false; |
---|
121 | for (const RE * re : *re_seq) { |
---|
122 | if (comma) { |
---|
123 | retVal.append(","); |
---|
124 | } |
---|
125 | retVal.append(PrintRE(re)); |
---|
126 | comma = true; |
---|
127 | } |
---|
128 | retVal.append("])"); |
---|
129 | } else if (const Group * g = dyn_cast<const Group>(re)) { |
---|
130 | retVal = "Group("; |
---|
131 | if (g->getMode() == Group::Mode::GraphemeMode) { |
---|
132 | retVal.append((g->getSense() == Group::Sense::On) ? "+g:" : "-g:"); |
---|
133 | } |
---|
134 | else if (g->getMode() == Group::Mode::CaseInsensitiveMode) { |
---|
135 | retVal.append((g->getSense() == Group::Sense::On) ? "+i:" : "-i:"); |
---|
136 | } |
---|
137 | retVal.append(PrintRE(g->getRE())); |
---|
138 | retVal.append(")"); |
---|
139 | } else if (isa<const Start>(re)) { |
---|
140 | retVal = "Start"; |
---|
141 | } else if (isa<const Any>(re)) { |
---|
142 | retVal = "Any"; |
---|
143 | } else { |
---|
144 | retVal = "???"; |
---|
145 | } |
---|
146 | return retVal; |
---|
147 | } |
---|