1 | /* |
---|
2 | * Copyright (c) 2018 International Characters. |
---|
3 | * This software is licensed to the public under the Open Software License 3.0. |
---|
4 | * icgrep is a trademark of International Characters. |
---|
5 | */ |
---|
6 | |
---|
7 | #include "cc_compiler.h" |
---|
8 | #include <cc/alphabet.h> |
---|
9 | #include <pablo/codegenstate.h> |
---|
10 | #include <pablo/boolean.h> |
---|
11 | #include <pablo/pe_zeroes.h> |
---|
12 | #include <pablo/pe_ones.h> |
---|
13 | #include <pablo/pe_infile.h> |
---|
14 | #include <pablo/builder.hpp> |
---|
15 | #include <pablo/pablo_kernel.h> |
---|
16 | #include <llvm/IR/DerivedTypes.h> |
---|
17 | #include <llvm/Support/ErrorHandling.h> |
---|
18 | #include <llvm/Support/raw_ostream.h> |
---|
19 | |
---|
20 | using namespace re; |
---|
21 | using namespace pablo; |
---|
22 | using namespace llvm; |
---|
23 | |
---|
24 | namespace cc { |
---|
25 | CC_Compiler::CC_Compiler(pablo::PabloBlock * scope) |
---|
26 | : mBuilder(scope) { |
---|
27 | } |
---|
28 | |
---|
29 | Parabix_CC_Compiler::Parabix_CC_Compiler(pablo::PabloBlock * scope, std::vector<pablo::PabloAST *> basisBitSet, cc::BitNumbering basisSetNumbering) |
---|
30 | : CC_Compiler(scope) |
---|
31 | , mEncodingBits(basisBitSet.size()) |
---|
32 | , mBasisSetNumbering(basisSetNumbering) |
---|
33 | , mBasisBit(basisBitSet) { |
---|
34 | mEncodingMask = (static_cast<unsigned>(1) << mEncodingBits) - static_cast<unsigned>(1); |
---|
35 | } |
---|
36 | |
---|
37 | PabloAST * Parabix_CC_Compiler::compileCC(const std::string & canonicalName, const CC *cc, PabloBlock & block) { |
---|
38 | PabloAST * const var = charset_expr(cc, block); |
---|
39 | if (LLVM_LIKELY(isa<Statement>(var))) { |
---|
40 | cast<Statement>(var)->setName(block.makeName(canonicalName)); |
---|
41 | } |
---|
42 | return var; |
---|
43 | } |
---|
44 | |
---|
45 | PabloAST * Parabix_CC_Compiler::compileCC(const std::string & canonicalName, const CC *cc, PabloBuilder & builder) { |
---|
46 | PabloAST * const var = charset_expr(cc, builder); |
---|
47 | if (LLVM_LIKELY(isa<Statement>(var))) { |
---|
48 | cast<Statement>(var)->setName(builder.makeName(canonicalName)); |
---|
49 | } |
---|
50 | return var; |
---|
51 | } |
---|
52 | |
---|
53 | template<typename PabloBlockOrBuilder> |
---|
54 | PabloAST * Parabix_CC_Compiler::charset_expr(const CC * cc, PabloBlockOrBuilder & pb) { |
---|
55 | if (cc->empty()) { |
---|
56 | return pb.createZeroes(); |
---|
57 | } |
---|
58 | if (cc->size() > 2) { |
---|
59 | bool combine = true; |
---|
60 | for (const interval_t & i : *cc) { |
---|
61 | if (lo_codepoint(i) != hi_codepoint(i)) { |
---|
62 | combine = false; |
---|
63 | break; |
---|
64 | } |
---|
65 | } |
---|
66 | if (combine) { |
---|
67 | auto i = cc->begin(), e = cc->end(); |
---|
68 | for (auto j = i; ++j != e; i = j) { |
---|
69 | if ((lo_codepoint(i) + 2) != lo_codepoint(j)) { |
---|
70 | combine = false; |
---|
71 | break; |
---|
72 | } |
---|
73 | } |
---|
74 | if (combine) { |
---|
75 | codepoint_t lo = lo_codepoint(cc->front()); |
---|
76 | codepoint_t hi = lo_codepoint(cc->back()); |
---|
77 | PabloAST * even_odd = getBasisVar(0, pb); |
---|
78 | if ((lo & 1) == 0) { |
---|
79 | even_odd = pb.createNot(even_odd); |
---|
80 | } |
---|
81 | lo &= (mEncodingMask - 1); |
---|
82 | hi |= (mEncodingMask ^ (mEncodingMask - 1)); |
---|
83 | PabloAST * expr = make_range(lo, hi, pb); |
---|
84 | return pb.createAnd(expr, even_odd); |
---|
85 | } |
---|
86 | } |
---|
87 | } |
---|
88 | PabloAST * expr = nullptr; |
---|
89 | for (const interval_t & i : *cc) { |
---|
90 | PabloAST * temp = char_or_range_expr(lo_codepoint(i), hi_codepoint(i), pb); |
---|
91 | expr = (expr == nullptr) ? temp : pb.createOr(expr, temp); |
---|
92 | } |
---|
93 | return pb.createInFile(expr); |
---|
94 | |
---|
95 | } |
---|
96 | |
---|
97 | template<typename PabloBlockOrBuilder> |
---|
98 | PabloAST * Parabix_CC_Compiler::bit_pattern_expr(const unsigned pattern, unsigned selected_bits, PabloBlockOrBuilder & pb) { |
---|
99 | if (LLVM_UNLIKELY(selected_bits == 0)) { |
---|
100 | return pb.createOnes(); |
---|
101 | } else { |
---|
102 | std::vector<PabloAST*> terms; |
---|
103 | for (unsigned i = 0; selected_bits; ++i) { |
---|
104 | unsigned test_bit = static_cast<unsigned>(1) << i; |
---|
105 | PabloAST * term = pb.createOnes(); |
---|
106 | if (selected_bits & test_bit) { |
---|
107 | term = getBasisVar(i, pb); |
---|
108 | if ((pattern & test_bit) == 0) { |
---|
109 | term = pb.createNot(term); |
---|
110 | } |
---|
111 | selected_bits ^= test_bit; |
---|
112 | } |
---|
113 | terms.push_back(term); |
---|
114 | } |
---|
115 | if (terms.size() > 1) { |
---|
116 | //Reduce the list so that all of the expressions are contained within a single expression. |
---|
117 | std::vector<PabloAST*> temp; |
---|
118 | temp.reserve(terms.size()); |
---|
119 | do { |
---|
120 | for (unsigned i = 0; i < (terms.size() / 2); i++) { |
---|
121 | temp.push_back(pb.createAnd(terms[2 * i], terms[(2 * i) + 1])); |
---|
122 | } |
---|
123 | if (terms.size() % 2 == 1) { |
---|
124 | temp.push_back(terms.back()); |
---|
125 | } |
---|
126 | terms.swap(temp); |
---|
127 | temp.clear(); |
---|
128 | } |
---|
129 | while (terms.size() > 1); |
---|
130 | } |
---|
131 | assert (terms.size() == 1); |
---|
132 | return terms.front(); |
---|
133 | } |
---|
134 | } |
---|
135 | |
---|
136 | template<typename PabloBlockOrBuilder> |
---|
137 | inline PabloAST * Parabix_CC_Compiler::char_test_expr(const codepoint_t ch, PabloBlockOrBuilder &pb) { |
---|
138 | return bit_pattern_expr(ch, mEncodingMask, pb); |
---|
139 | } |
---|
140 | |
---|
141 | template<typename PabloBlockOrBuilder> |
---|
142 | PabloAST * Parabix_CC_Compiler::make_range(const codepoint_t n1, const codepoint_t n2, PabloBlockOrBuilder & pb) { |
---|
143 | codepoint_t diff_count = 0; |
---|
144 | |
---|
145 | for (codepoint_t diff_bits = n1 ^ n2; diff_bits; diff_count++, diff_bits >>= 1); |
---|
146 | |
---|
147 | if ((n2 < n1) || (diff_count > mEncodingBits)) { |
---|
148 | llvm::report_fatal_error("Bad Range: [" + std::to_string(n1) + "," + |
---|
149 | std::to_string(n2) + "] for " + |
---|
150 | std::to_string(mEncodingBits) + "-bit encoding"); |
---|
151 | } |
---|
152 | |
---|
153 | const codepoint_t mask0 = (static_cast<codepoint_t>(1) << diff_count) - 1; |
---|
154 | |
---|
155 | PabloAST * common = bit_pattern_expr(n1 & ~mask0, mEncodingMask ^ mask0, pb); |
---|
156 | |
---|
157 | if (diff_count == 0) return common; |
---|
158 | |
---|
159 | const codepoint_t mask1 = (static_cast<codepoint_t>(1) << (diff_count - 1)) - 1; |
---|
160 | |
---|
161 | PabloAST* lo_test = GE_Range(diff_count - 1, n1 & mask1, pb); |
---|
162 | PabloAST* hi_test = LE_Range(diff_count - 1, n2 & mask1, pb); |
---|
163 | |
---|
164 | return pb.createAnd(common, pb.createSel(getBasisVar(diff_count - 1, pb), hi_test, lo_test)); |
---|
165 | } |
---|
166 | |
---|
167 | template<typename PabloBlockOrBuilder> |
---|
168 | PabloAST * Parabix_CC_Compiler::GE_Range(const unsigned N, const unsigned n, PabloBlockOrBuilder &pb) { |
---|
169 | if (N == 0) { |
---|
170 | return pb.createOnes(); //Return a true literal. |
---|
171 | } |
---|
172 | else if (((N % 2) == 0) && ((n >> (N - 2)) == 0)) { |
---|
173 | return pb.createOr(pb.createOr(getBasisVar(N - 1, pb), getBasisVar(N - 2, pb)), GE_Range(N - 2, n, pb)); |
---|
174 | } |
---|
175 | else if (((N % 2) == 0) && ((n >> (N - 2)) == 3)) { |
---|
176 | return pb.createAnd(pb.createAnd(getBasisVar(N - 1, pb), getBasisVar(N - 2, pb)), GE_Range(N - 2, n - (3 << (N - 2)), pb)); |
---|
177 | } |
---|
178 | else if (N >= 1) |
---|
179 | { |
---|
180 | int hi_bit = n & (1 << (N - 1)); |
---|
181 | int lo_bits = n - hi_bit; |
---|
182 | PabloAST * lo_range = GE_Range(N - 1, lo_bits, pb); |
---|
183 | if (hi_bit == 0) |
---|
184 | { |
---|
185 | /* |
---|
186 | If the hi_bit of n is not set, then whenever the corresponding bit |
---|
187 | is set in the target, the target will certaily be >=. Otherwise, |
---|
188 | the value of GE_range(N-1), lo_range) is required. |
---|
189 | */ |
---|
190 | return pb.createOr(getBasisVar(N - 1, pb), lo_range); |
---|
191 | } |
---|
192 | else |
---|
193 | { |
---|
194 | /* |
---|
195 | If the hi_bit of n is set, then the corresponding bit must be set |
---|
196 | in the target for >= and GE_range(N-1, lo_bits) must also be true. |
---|
197 | */ |
---|
198 | return pb.createAnd(getBasisVar(N - 1, pb), lo_range); |
---|
199 | } |
---|
200 | } |
---|
201 | throw std::runtime_error("Unexpected input given to ge_range: " + std::to_string(N) + ", " + std::to_string(n)); |
---|
202 | } |
---|
203 | |
---|
204 | template<typename PabloBlockOrBuilder> |
---|
205 | PabloAST * Parabix_CC_Compiler::LE_Range(const unsigned N, const unsigned n, PabloBlockOrBuilder & pb) { |
---|
206 | /* |
---|
207 | If an N-bit pattern is all ones, then it is always true that any n-bit value is LE this pattern. |
---|
208 | Handling this as a special case avoids an overflow issue with n+1 requiring more than N bits. |
---|
209 | */ |
---|
210 | if ((n + 1) == (1UL << N)) { |
---|
211 | return pb.createOnes(); //True. |
---|
212 | } else { |
---|
213 | return pb.createNot(GE_Range(N, n + 1, pb)); |
---|
214 | } |
---|
215 | } |
---|
216 | |
---|
217 | template<typename PabloBlockOrBuilder> |
---|
218 | inline PabloAST * Parabix_CC_Compiler::char_or_range_expr(const codepoint_t lo, const codepoint_t hi, PabloBlockOrBuilder &pb) { |
---|
219 | if (lo == hi) { |
---|
220 | return char_test_expr(lo, pb); |
---|
221 | } else if (lo < hi) { |
---|
222 | return make_range(lo, hi, pb); |
---|
223 | } |
---|
224 | llvm::report_fatal_error(std::string("Invalid Character Set Range: [") + std::to_string(lo) + "," + std::to_string(hi) + "]"); |
---|
225 | } |
---|
226 | |
---|
227 | template<typename PabloBlockOrBuilder> |
---|
228 | inline PabloAST * Parabix_CC_Compiler::getBasisVar(const unsigned i, PabloBlockOrBuilder & pb) const { |
---|
229 | assert (i < mEncodingBits); |
---|
230 | if (mBasisSetNumbering == cc::BitNumbering::BigEndian) |
---|
231 | return mBasisBit[mEncodingBits - i - 1]; |
---|
232 | else return mBasisBit[i]; |
---|
233 | } |
---|
234 | |
---|
235 | |
---|
236 | PabloAST * compileCCfromCodeUnitStream(const CC * cc, PabloAST * codeUnitStream, PabloBuilder & pb) { |
---|
237 | const Alphabet * a = cc->getAlphabet(); |
---|
238 | if (!isa<CodeUnitAlphabet>(a)) { |
---|
239 | llvm::report_fatal_error("compileCCfromCodeUnitStream must be applied to a CC with a CodeUnitAlphabet"); |
---|
240 | } |
---|
241 | unsigned codeUnitWidth = cast<const CodeUnitAlphabet>(a)->getCodeUnitBitWidth(); |
---|
242 | unsigned topBit = 1 << (codeUnitWidth - 1); |
---|
243 | unsigned maxCodeVal = (topBit - 1) | topBit; |
---|
244 | // |
---|
245 | // Optimization if there are isolated codepoints that are not in the set. |
---|
246 | UCD::UnicodeSet negatedIsolates = (~(*cc)).isolates(); |
---|
247 | UCD::UnicodeSet withNegatedIsolates = (*cc + negatedIsolates); |
---|
248 | PabloAST * ccStrm = pb.createZeroes(); |
---|
249 | for (const auto & interval : withNegatedIsolates) { |
---|
250 | unsigned lo = re::lo_codepoint(interval); |
---|
251 | unsigned hi = re::hi_codepoint(interval); |
---|
252 | if (lo == hi) { |
---|
253 | PabloAST * testVal = pb.createRepeat(codeUnitWidth, lo); |
---|
254 | ccStrm = pb.createOr(ccStrm, pb.createEquals(codeUnitStream, testVal)); |
---|
255 | } else if (lo == 0) { |
---|
256 | if (hi == maxCodeVal) { |
---|
257 | // All code units |
---|
258 | ccStrm = pb.createOnes(); |
---|
259 | } else { |
---|
260 | PabloAST * testVal = pb.createRepeat(codeUnitWidth, hi+1); |
---|
261 | ccStrm = pb.createOr(ccStrm, pb.createLessThan(codeUnitStream, testVal)); |
---|
262 | } |
---|
263 | } else if (hi == maxCodeVal) { |
---|
264 | PabloAST * testVal = pb.createRepeat(codeUnitWidth, lo); |
---|
265 | ccStrm = pb.createOr(ccStrm, pb.createNot(pb.createLessThan(codeUnitStream, testVal))); |
---|
266 | } else { |
---|
267 | PabloAST * testVal_lo = pb.createRepeat(codeUnitWidth, lo); |
---|
268 | PabloAST * testVal_hi = pb.createRepeat(codeUnitWidth, hi + 1); |
---|
269 | ccStrm = pb.createOr(ccStrm, pb.createAnd(pb.createNot(pb.createLessThan(codeUnitStream, testVal_lo)), |
---|
270 | pb.createLessThan(codeUnitStream, testVal_hi))); |
---|
271 | } |
---|
272 | } |
---|
273 | if (!negatedIsolates.empty()) { |
---|
274 | PabloAST * toExclude = pb.createZeroes(); |
---|
275 | for (const auto & interval : negatedIsolates) { |
---|
276 | PabloAST * testVal = pb.createRepeat(codeUnitWidth, re::lo_codepoint(interval)); |
---|
277 | toExclude = pb.createOr(toExclude, pb.createEquals(codeUnitStream, testVal)); |
---|
278 | } |
---|
279 | ccStrm = pb.createAnd(ccStrm, pb.createNot(toExclude)); |
---|
280 | } |
---|
281 | return pb.createInFile(ccStrm); |
---|
282 | } |
---|
283 | |
---|
284 | Direct_CC_Compiler::Direct_CC_Compiler(pablo::PabloBlock * scope, pablo::PabloAST * codeUnitStream) |
---|
285 | : CC_Compiler(scope) |
---|
286 | , mCodeUnitStream(codeUnitStream) { |
---|
287 | } |
---|
288 | |
---|
289 | pablo::PabloAST * Direct_CC_Compiler::compileCC(const std::string & name, const re::CC *cc, pablo::PabloBlock & block) { |
---|
290 | PabloBuilder pb(&block); |
---|
291 | return compileCC(name, cc, pb); |
---|
292 | } |
---|
293 | |
---|
294 | pablo::PabloAST * Direct_CC_Compiler::compileCC(const std::string & name, const re::CC *cc, pablo::PabloBuilder & b) { |
---|
295 | return compileCCfromCodeUnitStream(cc, mCodeUnitStream, b); |
---|
296 | } |
---|
297 | |
---|
298 | } // end of namespace cc |
---|