source: icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp @ 5682

Last change on this file since 5682 was 5646, checked in by nmedfort, 22 months ago

Minor clean up. Bug fix for object cache when the same cached kernel is used twice in a single run. Improvement to RE Minimizer.

File size: 11.2 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "grep_kernel.h"
7#include <boost/uuid/sha1.hpp>
8#include <re/printer_re.h>
9#include <re/re_toolchain.h>
10#include <pablo/pablo_toolchain.h>
11#include <kernels/kernel_builder.h>
12#include <pablo/builder.hpp>
13#include <pablo/pe_ones.h>          // for Ones
14#include <pablo/pe_var.h>           // for Var
15#include <pablo/pe_zeroes.h>        // for Zeroes
16#include <pablo/boolean.h>
17#include <pablo/pe_count.h>
18#include <pablo/pe_matchstar.h>
19#include "cc/cc_compiler.h"         // for CC_Compiler
20
21#include <llvm/Support/raw_ostream.h>
22
23using namespace kernel;
24using namespace pablo;
25using namespace re;
26using namespace llvm;
27
28inline static std::string sha1sum(const std::string & str) {
29    char buffer[41];    // 40 hex-digits and the terminating null
30    uint32_t digest[5]; // 160 bits in total
31    boost::uuids::detail::sha1 sha1;
32    sha1.process_bytes(str.c_str(), str.size());
33    sha1.get_digest(digest);
34    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
35             digest[0], digest[1], digest[2], digest[3], digest[4]);
36    return std::string(buffer);
37}
38
39RegularExpressionOptimizer::RegularExpressionOptimizer(re::RE * const re_ast)
40: mRE(regular_expression_passes(re_ast))
41, mSignature(Printer_RE::PrintRE(mRE)) {
42
43}
44
45void RequiredStreams_UTF8::generatePabloMethod() {
46   
47    cc::CC_Compiler ccc(this, getInput(0));
48    auto & pb = ccc.getBuilder();
49    Zeroes * const zero = pb.createZeroes();
50    PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
51    PabloAST * CR = ccc.compileCC(makeCC(0x0D));
52
53    Var * crlf = pb.createVar("crlf", zero);
54    PabloBuilder crb = PabloBuilder::Create(pb);
55    PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
56    crb.createAssign(crlf, crb.createAnd(cr1, LF));
57    pb.createIf(CR, crb);
58   
59    Var * u8invalid = pb.createVar("u8invalid", zero);
60    Var * valid_pfx = pb.createVar("valid_pfx", zero);
61    Var * nonFinal = pb.createVar("nonfinal", zero);
62    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
63   
64    PabloBuilder it = PabloBuilder::Create(pb);
65
66    pb.createIf(u8pfx, it);
67    PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
68    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
69    PabloAST * u8pfx4 = ccc.compileCC(makeCC(0xF0, 0xF4), it);
70    PabloAST * u8suffix = ccc.compileCC("u8suffix", makeCC(0x80, 0xBF), it);
71   
72    //
73    // Two-byte sequences
74    Var * u8scope22 = it.createVar("u8scope22", zero);
75    PabloBuilder it2 = PabloBuilder::Create(it);
76    it2.createAssign(u8scope22, it2.createAdvance(u8pfx2, 1));
77    it.createIf(u8pfx2, it2);
78    //
79    // Three-byte sequences
80   
81    Var * u8scope32 = it.createVar("u8scope32", zero);
82    Var * u8scope3X = it.createVar("u8scope3X", zero);
83    Var * EX_invalid = it.createVar("EX_invalid", zero);
84    PabloBuilder it3 = PabloBuilder::Create(it);
85    it.createIf(u8pfx3, it3);
86    it3.createAssign(u8scope32, it3.createAdvance(u8pfx3, 1));
87    PabloAST * u8scope33 = it3.createAdvance(u8pfx3, 2);
88    it3.createAssign(u8scope3X, it3.createOr(u8scope32, u8scope33));
89    PabloAST * E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE0), it3), 1), ccc.compileCC(makeCC(0x80, 0x9F), it3));
90    PabloAST * ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xED), it3), 1), ccc.compileCC(makeCC(0xA0, 0xBF), it3));
91    it3.createAssign(EX_invalid, it3.createOr(E0_invalid, ED_invalid));
92   
93    //
94    // Four-byte sequences
95    Var * u8scope4nonfinal = it.createVar("u8scope4nonfinal", zero);
96    Var * u8scope4X = it.createVar("u8scope4X", zero);
97    Var * FX_invalid = it.createVar("FX_invalid", zero);
98    PabloBuilder it4 = PabloBuilder::Create(it);
99    it.createIf(u8pfx4, it4);
100    PabloAST * u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
101    PabloAST * u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
102    PabloAST * u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
103    it4.createAssign(u8scope4nonfinal, it4.createOr(u8scope42, u8scope43));
104    it4.createAssign(u8scope4X, it4.createOr(u8scope4nonfinal, u8scope44));
105    PabloAST * F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF0), it4), 1), ccc.compileCC(makeCC(0x80, 0x8F), it4));
106    PabloAST * F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF4), it4), 1), ccc.compileCC(makeCC(0x90, 0xBF), it4));
107    it4.createAssign(FX_invalid, it4.createOr(F0_invalid, F4_invalid));
108   
109    //
110    // Invalid cases
111    PabloAST * anyscope = it.createOr(u8scope22, it.createOr(u8scope3X, u8scope4X));
112    PabloAST * legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
113    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
114    //  a scope is a mismatch, i.e., invalid UTF-8.
115    PabloAST * mismatch = it.createXor(anyscope, u8suffix);
116    //
117    PabloAST * EF_invalid = it.createOr(EX_invalid, FX_invalid);
118    PabloAST * pfx_invalid = it.createXor(u8pfx, legalpfx);
119    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
120    PabloAST * u8valid = it.createNot(u8invalid, "u8valid");
121    //
122    //
123   
124    it.createAssign(valid_pfx, it.createAnd(u8pfx, u8valid));
125    it.createAssign(nonFinal, it.createAnd(it.createOr(it.createOr(u8pfx, u8scope32), u8scope4nonfinal), u8valid));
126   
127    PabloAST * u8single = pb.createAnd(ccc.compileCC(makeCC(0x00, 0x7F)), pb.createNot(u8invalid));
128   
129    Var * const required = getOutputStreamVar("required");
130    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u8single, valid_pfx, "initial"));
131    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
132    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(nonFinal, u8invalid), "final"));
133    pb.createAssign(pb.createExtract(required, pb.getInteger(3)), crlf);
134}
135
136RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb)
137: PabloKernel(kb, "RequiredStreams_UTF8",               
138              {Binding{kb->getStreamSetTy(8), "basis"}}, 
139              {Binding{kb->getStreamSetTy(4), "required"}},
140              {},
141              {}) {
142}
143
144void RequiredStreams_UTF16::generatePabloMethod() {
145   
146    cc::CC_Compiler ccc(this, getInput(0));
147    auto & pb = ccc.getBuilder();
148   
149    PabloAST * LF = ccc.compileCC("LF", makeCC(0x000A), pb);
150    PabloAST * CR = ccc.compileCC("CR", makeCC(0x000D), pb);
151    PabloAST * cr1 = pb.createAdvance(CR, 1, "cr1");
152   
153    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF));    //u16hi_hi_surrogate = [\xD8-\xDB]
154    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF));    //u16hi_lo_surrogate = [\xDC-\xDF]
155   
156    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
157    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
158    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
159   
160    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF)), ccc.compileCC(makeCC(0xE000, 0xFFFF)));
161    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
162
163    Var * const required = getOutputStreamVar("required");
164    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u16single, u16hi_hi_surrogate, "initial"));
165    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal"));
166    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(u16hi_hi_surrogate, u16invalid), "final"));
167    pb.createAssign(pb.createExtract(required, pb.getInteger(3)), pb.createAnd(cr1, LF, "crlf"));
168}
169
170RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
171: PabloKernel(kb, "RequiredStreams_UTF16",               
172              {Binding{kb->getStreamSetTy(16), "basis"}}, 
173              {Binding{kb->getStreamSetTy(4), "required"}},
174              {},
175              {}) {
176}
177
178
179ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re, unsigned numOfCharacterClasses)
180: RegularExpressionOptimizer(re)
181, PabloKernel(iBuilder,
182              "ic" + sha1sum(mSignature),
183              {Binding{iBuilder->getStreamSetTy(numOfCharacterClasses), "basis"},
184               Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"},
185               Binding{iBuilder->getStreamSetTy(4, 1), "required"}},
186
187              {Binding{iBuilder->getStreamSetTy(1, 1), "matches"}}) {
188
189}
190
191std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
192    return mSignature;
193}
194
195void ICGrepKernel::generatePabloMethod() {
196    PabloAST * const match_post = re2pablo_compiler(this, mRE);
197    PabloBlock * const pb = getEntryBlock();
198    Var * const output = getOutputStreamVar("matches");
199    pb->createAssign(pb->createExtract(output, pb->getInteger(0)), match_post);
200}
201
202void MatchedLinesKernel::generatePabloMethod() {
203    auto pb = this->getEntryBlock();
204    PabloAST * matchResults = pb->createExtract(getInputStreamVar("matchResults"), pb->getInteger(0));
205    PabloAST * lineBreaks = pb->createExtract(getInputStreamVar("lineBreaks"), pb->getInteger(0));
206    PabloAST * notLB = pb->createNot(lineBreaks);
207    PabloAST * match_follow = pb->createMatchStar(matchResults, notLB);
208    Var * const matchedLines = getOutputStreamVar("matchedLines");
209    pb->createAssign(pb->createExtract(matchedLines, pb->getInteger(0)), pb->createAnd(match_follow, lineBreaks));
210}
211
212MatchedLinesKernel::MatchedLinesKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
213: PabloKernel(iBuilder, "MatchedLines",
214              {Binding{iBuilder->getStreamSetTy(1), "matchResults"}, Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
215              {Binding{iBuilder->getStreamSetTy(1), "matchedLines"}},
216              {},
217              {}) {
218}
219
220
221void InvertMatchesKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
222    Value * input = iBuilder->loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
223    Value * lbs = iBuilder->loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
224    Value * inverted = iBuilder->CreateXor(input, lbs);
225    iBuilder->storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
226}
227
228InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder)
229: BlockOrientedKernel("Invert", {Binding{builder->getStreamSetTy(1, 1), "matchedLines"}, Binding{builder->getStreamSetTy(1, 1), "lineBreaks"}}, {Binding{builder->getStreamSetTy(1, 1), "nonMatches"}}, {}, {}, {}) {
230    setNoTerminateAttribute(true);   
231}
232
233
234void PopcountKernel::generatePabloMethod() {
235    auto pb = this->getEntryBlock();
236    const auto toCount = pb->createExtract(getInputStreamVar("toCount"), pb->getInteger(0));
237    pablo::Var * countResult = getOutputScalarVar("countResult");
238    pb->createAssign(countResult, pb->createCount(toCount));
239}
240
241PopcountKernel::PopcountKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
242: PabloKernel(iBuilder, "Popcount",
243              {Binding{iBuilder->getStreamSetTy(1), "toCount"}},
244              {},
245              {},
246              {Binding{iBuilder->getSizeTy(), "countResult"}}) {
247}
248
Note: See TracBrowser for help on using the repository browser.