source: icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp @ 6173

Last change on this file since 6173 was 6147, checked in by xwa163, 13 months ago

Fix bug of multiplexing grep kernel cause by cacheable (Since two multiplexed RE with them same representation “mpx_1” may have different alphabets, while for now GrepKernel? use the string representation of RE AST as cache key)

File size: 25.5 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "grep_kernel.h"
7#include <boost/uuid/sha1.hpp>
8#include <re/printer_re.h>
9#include <re/re_cc.h>
10#include <re/re_name.h>
11#include <re/re_toolchain.h>
12#include <re/re_reverse.h>
13#include <grep/grep_engine.h>
14#include <pablo/codegenstate.h>
15#include <pablo/pablo_toolchain.h>
16#include <kernels/kernel_builder.h>
17#include <pablo/builder.hpp>
18#include <pablo/pe_ones.h>          // for Ones
19#include <pablo/pe_var.h>           // for Var
20#include <pablo/pe_zeroes.h>        // for Zeroes
21#include <pablo/pe_infile.h>
22#include <pablo/boolean.h>
23#include <pablo/pe_count.h>
24#include <pablo/pe_matchstar.h>
25#include <pablo/pe_pack.h>
26#include <cc/cc_compiler.h>         // for CC_Compiler
27#include <cc/alphabet.h>
28#include <cc/multiplex_CCs.h>
29#include <re/re_compiler.h>
30#include <UCD/ucd_compiler.hpp>
31#include <llvm/IR/Module.h>
32#include <llvm/Support/raw_ostream.h>
33
34using namespace kernel;
35using namespace pablo;
36using namespace re;
37using namespace llvm;
38
39inline static std::string sha1sum(const std::string & str) {
40    char buffer[41];    // 40 hex-digits and the terminating null
41    uint32_t digest[5]; // 160 bits in total
42    boost::uuids::detail::sha1 sha1;
43    sha1.process_bytes(str.c_str(), str.size());
44    sha1.get_digest(digest);
45    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
46             digest[0], digest[1], digest[2], digest[3], digest[4]);
47    return std::string(buffer);
48}
49
50
51UnicodeLineBreakKernel::UnicodeLineBreakKernel(const std::unique_ptr<kernel::KernelBuilder> & kb)
52: PabloKernel(kb,
53              "UTF8_LB",
54              {Binding{kb->getStreamSetTy(8), "basis"}, Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
55              {Binding{kb->getStreamSetTy(1, 1), "UTF8_LB", FixedRate()}}) {
56}
57
58void UnicodeLineBreakKernel::generatePabloMethod() {
59        PabloBuilder pb(getEntryScope());
60        cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
61        UCD::UCDCompiler ucdCompiler(ccc);
62   
63    Name * breakChars = re::makeName("breakChars", makeCC(makeCC(makeCC(0x0A, 0x0D), makeCC(0x85)), makeCC(0x2028,0x2029)));
64    UCD::UCDCompiler::NameMap nameMap;
65    nameMap.emplace(breakChars, nullptr);
66    ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
67    auto f = nameMap.find(breakChars);
68    if (f == nameMap.end()) llvm::report_fatal_error("UnicodeLineBreakKernel compilation failure");
69    PabloAST * breakStream = f-> second;
70    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
71    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
72    Var * const CR_before_LF = pb.createVar("CR_before_LFCR_before_LF", pb.createZeroes());
73    auto crb = pb.createScope();
74    pb.createIf(CR, crb);
75    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
76    crb.createAssign(CR_before_LF, crb.createAnd(CR, lookaheadLF));
77    breakStream = pb.createXor(breakStream, CR_before_LF);  // Remove CR_before_LF from breakStream
78    Var * const UTF8_LB = getOutputStreamVar("UTF8_LB");
79    pb.createAssign(pb.createExtract(UTF8_LB, pb.getInteger(0)), breakStream);
80}
81
82void RequiredStreams_UTF8::generatePabloMethod() {
83    PabloBuilder pb(getEntryScope());
84    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
85   
86    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
87    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
88    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
89    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
90   
91    // Remove the CR of any CR+LF
92    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
93    auto crb = pb.createScope();
94    pb.createIf(CR, crb);
95    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
96    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
97    crb.createAssign(CRLF, crlf);
98    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
99    crb.createAssign(LineBreak, removedCRLF);
100
101   
102    Zeroes * const ZEROES = pb.createZeroes();
103    PabloAST * const u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
104
105
106    Var * const nonFinal = pb.createVar("nonFinal", u8pfx);
107    Var * const u8invalid = pb.createVar("u8invalid", ZEROES);
108    Var * const valid_pfx = pb.createVar("valid_pfx", u8pfx);
109
110    auto it = pb.createScope();
111    pb.createIf(u8pfx, it);
112    PabloAST * const u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
113    PabloAST * const u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
114    PabloAST * const u8pfx4 = ccc.compileCC(makeByte(0xF0, 0xF4), it);
115    PabloAST * const u8suffix = ccc.compileCC("u8suffix", makeByte(0x80, 0xBF), it);
116   
117    //
118    // Two-byte sequences
119    Var * const anyscope = it.createVar("anyscope", ZEROES);
120    auto it2 = it.createScope();
121    it.createIf(u8pfx2, it2);
122    it2.createAssign(anyscope, it2.createAdvance(u8pfx2, 1));
123    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
124    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
125
126
127    //
128    // Three-byte sequences   
129    Var * const EF_invalid = it.createVar("EF_invalid", ZEROES);
130    auto it3 = it.createScope();
131    it.createIf(u8pfx3, it3);
132    PabloAST * const u8scope32 = it3.createAdvance(u8pfx3, 1);
133    it3.createAssign(nonFinal, it3.createOr(nonFinal, u8scope32));
134    PabloAST * const u8scope33 = it3.createAdvance(u8pfx3, 2);
135    PabloAST * const u8scope3X = it3.createOr(u8scope32, u8scope33);
136    it3.createAssign(anyscope, it3.createOr(anyscope, u8scope3X));
137    PabloAST * const E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE0), it3), 1), ccc.compileCC(makeByte(0x80, 0x9F), it3));
138    PabloAST * const ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xED), it3), 1), ccc.compileCC(makeByte(0xA0, 0xBF), it3));
139    PabloAST * const EX_invalid = it3.createOr(E0_invalid, ED_invalid);
140    it3.createAssign(EF_invalid, EX_invalid);
141    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
142    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
143    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
144
145    //
146    // Four-byte sequences
147    auto it4 = it.createScope();
148    it.createIf(u8pfx4, it4);
149    PabloAST * const u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
150    PabloAST * const u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
151    PabloAST * const u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
152    PabloAST * const u8scope4nonfinal = it4.createOr(u8scope42, u8scope43);
153    it4.createAssign(nonFinal, it4.createOr(nonFinal, u8scope4nonfinal));
154    PabloAST * const u8scope4X = it4.createOr(u8scope4nonfinal, u8scope44);
155    it4.createAssign(anyscope, it4.createOr(anyscope, u8scope4X));
156    PabloAST * const F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeByte(0xF0), it4), 1), ccc.compileCC(makeByte(0x80, 0x8F), it4));
157    PabloAST * const F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeByte(0xF4), it4), 1), ccc.compileCC(makeByte(0x90, 0xBF), it4));
158    PabloAST * const FX_invalid = it4.createOr(F0_invalid, F4_invalid);
159    it4.createAssign(EF_invalid, it4.createOr(EF_invalid, FX_invalid));
160   
161    //
162    // Invalid cases
163    PabloAST * const legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
164    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
165    //  a scope is a mismatch, i.e., invalid UTF-8.
166    PabloAST * const mismatch = it.createXor(anyscope, u8suffix);
167    //
168    PabloAST * const pfx_invalid = it.createXor(valid_pfx, legalpfx);
169    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
170    PabloAST * const u8valid = it.createNot(u8invalid, "u8valid");
171    //
172    //
173    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
174    pb.createAssign(nonFinal, pb.createOr(nonFinal, CRLF));
175    //PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
176   
177    Var * const required = getOutputStreamVar("nonFinal");
178    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), nonFinal);
179    pb.createAssign(pb.createExtract(getOutputStreamVar("UnicodeLB"), pb.getInteger(0)), LineBreak);//pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
180}
181
182RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb)
183: PabloKernel(kb, "RequiredStreams_UTF8",
184// input
185{Binding{kb->getStreamSetTy(8), "basis"},
186 Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
187// output
188{Binding{kb->getStreamSetTy(1), "nonFinal", FixedRate()},
189 Binding{kb->getStreamSetTy(1), "UnicodeLB", FixedRate()}}) {
190
191}
192
193void RequiredStreams_UTF16::generatePabloMethod() {
194    PabloBuilder pb(getEntryScope());
195    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
196   
197    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF, &cc::UTF16));    //u16hi_hi_surrogate = [\xD8-\xDB]
198    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF, &cc::UTF16));    //u16hi_lo_surrogate = [\xDC-\xDF]
199   
200    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
201    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
202
203    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
204    PabloAST * nonFinal = pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal");
205
206    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF, &cc::UTF16)), ccc.compileCC(makeCC(0xE000, 0xFFFF, &cc::UTF16)));
207    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
208
209    PabloAST * const nonFinalCodeUnits = pb.createExtract(getInput(1), pb.getInteger(0));
210    PabloAST * const initial = pb.createOr(u16single, u16hi_hi_surrogate, "initial");
211    PabloAST * const final = pb.createNot(pb.createOr(pb.createOr(u16hi_hi_surrogate, u16invalid), nonFinalCodeUnits), "final");
212
213    Var * const required = getOutputStreamVar("required");
214    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
215    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
216    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
217
218}
219
220RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
221: PabloKernel(kb, "RequiredStreams_UTF16",               
222// inputs
223{Binding{kb->getStreamSetTy(8), "basis"}},
224// output
225{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
226
227}
228
229ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
230: mRE(re_ast)
231, mSignature(Printer_RE::PrintRE(mRE)) {
232
233}
234
235// Helper to compute stream set inputs to pass into PabloKernel constructor.
236inline std::vector<Binding> icGrepInputs(const std::unique_ptr<kernel::KernelBuilder> & b,
237                                         const std::vector<std::string> & externals,
238                                         const std::vector<cc::Alphabet *> & alphabets) {
239    std::vector<Binding> streamSetInputs = {
240        Binding{b->getStreamSetTy(8), "basis"},
241    };
242    for (auto & e : externals) {
243        streamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
244    }
245    for (const auto & alphabet : alphabets) {
246        unsigned basis_size = cast<cc::MultiplexedAlphabet>(alphabet)->getMultiplexedCCs().size();
247        streamSetInputs.push_back(Binding{b->getStreamSetTy(basis_size, 1), alphabet->getName() + "_basis"});
248    }
249    return streamSetInputs;
250}
251
252ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const re, std::vector<std::string> externals, std::vector<cc::Alphabet *> alphabets, cc::BitNumbering basisSetNumbering)
253: ICGrepSignature(re)
254, PabloKernel(b, "ic" + sha1sum(mSignature),
255// inputs
256icGrepInputs(b, externals, alphabets),
257// output
258{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
259, mExternals(externals)
260, mAlphabets(alphabets)
261, mBasisSetNumbering(basisSetNumbering)
262, mIsCachable(true) {
263}
264
265std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
266    return mSignature;
267}
268
269void ICGrepKernel::generatePabloMethod() {
270    PabloBuilder pb(getEntryScope());
271    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
272    RE_Compiler re_compiler(getEntryScope(), ccc, mBasisSetNumbering);
273    for (auto & e : mExternals) {
274        re_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
275    }
276    for (auto a : mAlphabets) {
277        auto mpx_basis = getInputStreamSet(a->getName() + "_basis");
278        re_compiler.addAlphabet(a, mpx_basis);
279    }
280    PabloAST * const matches = re_compiler.compile(mRE);
281    Var * const output = getOutputStreamVar("matches");
282    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
283}
284
285
286ByteGrepSignature::ByteGrepSignature(RE * re)
287: mRE(re)
288, mSignature(Printer_RE::PrintRE(re) ) {
289}
290
291ByteGrepKernel::ByteGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const re, std::vector<std::string> externals)
292: ByteGrepSignature(re)
293, PabloKernel(b, "byteGrep" + sha1sum(mSignature),
294              // inputs
295{Binding{b->getStreamSetTy(1, 8), "byteData"}},
296              // output
297{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
298, mExternals(externals) {
299    for (auto & e : externals) {
300        mStreamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
301    }
302}
303
304std::string ByteGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
305    return mSignature;
306}
307
308
309void ByteGrepKernel::generatePabloMethod() {
310    PabloBuilder pb(getEntryScope());
311    PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
312    cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
313    RE_Compiler re_byte_compiler(getEntryScope(), dcc);
314    for (auto & e : mExternals) {
315        re_byte_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
316    }
317    PabloAST * const matches = re_byte_compiler.compile(mRE);
318   
319    Var * const output = getOutputStreamVar("matches");
320    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
321}
322
323// Helper to compute stream set inputs to pass into PabloKernel constructor.
324inline std::vector<Binding> byteBitGrepInputs(const std::unique_ptr<kernel::KernelBuilder> & b,
325                                              const std::vector<std::string> & externals) {
326    std::vector<Binding> streamSetInputs = {
327        Binding{b->getStreamSetTy(1, 8), "bytedata"},
328    };
329    for (auto & e : externals) {
330        streamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
331    }
332    return streamSetInputs;
333}
334
335ByteBitGrepSignature::ByteBitGrepSignature(RE * prefix, RE * suffix)
336: mPrefixRE(prefix)
337, mSuffixRE(suffix)
338, mSignature(Printer_RE::PrintRE(mPrefixRE) + Printer_RE::PrintRE(mSuffixRE) ) {
339}
340
341ByteBitGrepKernel::ByteBitGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const prefixRE, RE * const suffixRE, std::vector<std::string> externals)
342: ByteBitGrepSignature(prefixRE, suffixRE)
343, PabloKernel(b, "bBc" + sha1sum(mSignature),
344              // inputs
345              byteBitGrepInputs(b, externals),
346              // output
347{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
348, mExternals(externals) {
349}
350
351std::string ByteBitGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
352    return mSignature;
353}
354
355
356void ByteBitGrepKernel::generatePabloMethod() {
357    PabloBuilder pb(getEntryScope());
358    PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
359    cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
360    RE_Compiler re_byte_compiler(getEntryScope(), dcc);
361    for (auto & e : mExternals) {
362        re_byte_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
363    }
364    PabloAST * const prefixMatches = re_byte_compiler.compile(mPrefixRE);
365    Var * const final_matches = pb.createVar("final_matches", pb.createZeroes());
366    PabloBlock * scope1 = getEntryScope()->createScope();
367    pb.createIf(prefixMatches, scope1);
368   
369    PabloAST * nybbles[2];
370    nybbles[0] = scope1->createPackL(scope1->getInteger(8), u8bytes);
371    nybbles[1] = scope1->createPackH(scope1->getInteger(8), u8bytes);
372   
373    PabloAST * bitpairs[4];
374    for (unsigned i = 0; i < 2; i++) {
375        bitpairs[2*i] = scope1->createPackL(scope1->getInteger(4), nybbles[i]);
376        bitpairs[2*i + 1] = scope1->createPackH(scope1->getInteger(4), nybbles[i]);
377    }
378   
379    std::vector<PabloAST *> basis(8);
380    for (unsigned i = 0; i < 4; i++) {
381        // The subtraction 7-bit is because of the confusion between
382        // little-endian and big-endian bit numbering of bytes.
383        // We should fix this, switching to little-endian numbering throughout.
384        basis[7-2*i] = scope1->createPackL(scope1->getInteger(2), bitpairs[i]);
385        basis[7-(2*i + 1)] = scope1->createPackH(scope1->getInteger(2), bitpairs[i]);
386    }
387   
388    cc::Parabix_CC_Compiler ccc(scope1, basis);
389    RE_Compiler re_compiler(scope1, ccc);
390    scope1->createAssign(final_matches, re_compiler.compile(mSuffixRE, prefixMatches));
391    Var * const output = getOutputStreamVar("matches");
392    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), final_matches);
393}
394
395
396void MatchedLinesKernel::generatePabloMethod() {
397    PabloBuilder pb(getEntryScope());
398    PabloAST * matchResults = pb.createExtract(getInputStreamVar("matchResults"), pb.getInteger(0));
399    PabloAST * lineBreaks = pb.createExtract(getInputStreamVar("lineBreaks"), pb.getInteger(0));
400    PabloAST * notLB = pb.createNot(lineBreaks);
401    PabloAST * match_follow = pb.createMatchStar(matchResults, notLB);
402    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(notLB, 1), "unterminatedLineAtEOF");
403    Var * const matchedLines = getOutputStreamVar("matchedLines");
404    pb.createAssign(pb.createExtract(matchedLines, pb.getInteger(0)), pb.createAnd(match_follow, pb.createOr(lineBreaks, unterminatedLineAtEOF)));
405}
406
407MatchedLinesKernel::MatchedLinesKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
408: PabloKernel(iBuilder, "MatchedLines",
409// inputs
410{Binding{iBuilder->getStreamSetTy(1), "matchResults"}
411,Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
412// output
413{Binding{iBuilder->getStreamSetTy(1), "matchedLines", FixedRate(), Add1()}}) {
414
415}
416
417
418void InvertMatchesKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
419    Value * input = iBuilder->loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
420    Value * lbs = iBuilder->loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
421    Value * inverted = iBuilder->CreateXor(input, lbs);
422    iBuilder->storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
423}
424
425InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder)
426: BlockOrientedKernel("Invert",
427// Inputs
428{Binding{builder->getStreamSetTy(1, 1), "matchedLines"}, Binding{builder->getStreamSetTy(1, 1), "lineBreaks"}},
429// Outputs
430{Binding{builder->getStreamSetTy(1, 1), "nonMatches"}},
431// Input/Output Scalars and internal state
432{}, {}, {}) {
433
434}
435
436
437void PopcountKernel::generatePabloMethod() {
438    auto pb = this->getEntryScope();
439    const auto toCount = pb->createExtract(getInputStreamVar("toCount"), pb->getInteger(0));
440    pablo::Var * countResult = getOutputScalarVar("countResult");
441   
442    pb->createAssign(countResult, pb->createCount(pb->createInFile(toCount)));
443}
444
445PopcountKernel::PopcountKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
446: PabloKernel(iBuilder, "Popcount",
447{Binding{iBuilder->getStreamSetTy(1), "toCount"}},
448{},
449{},
450{Binding{iBuilder->getSizeTy(), "countResult"}}) {
451
452}
453
454
455void AbortOnNull::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) {
456    Module * const m = b->getModule();
457    DataLayout DL(m);
458    IntegerType * const intPtrTy = DL.getIntPtrType(m->getContext());
459    Type * voidPtrTy = b->getVoidPtrTy();
460    const auto blocksPerStride = getStride() / b->getBitBlockWidth();
461    Constant * const BLOCKS_PER_STRIDE = b->getSize(blocksPerStride);
462    BasicBlock * const entry = b->GetInsertBlock();
463    BasicBlock * const strideLoop = b->CreateBasicBlock("strideLoop");
464    BasicBlock * const stridesDone = b->CreateBasicBlock("stridesDone");
465    BasicBlock * const nullByteDetection = b->CreateBasicBlock("nullByteDetection");
466    BasicBlock * const nullByteFound = b->CreateBasicBlock("nullByteFound");
467    BasicBlock * const finalStride = b->CreateBasicBlock("finalStride");
468    BasicBlock * const segmentDone = b->CreateBasicBlock("segmentDone");
469
470    Value * const numOfBlocks = b->CreateMul(numOfStrides, BLOCKS_PER_STRIDE);
471    Value * availItems = b->getAvailableItemCount("bytedata");
472    //
473    // Fast loop to prove that there are no null bytes in a multiblock region.
474    // We repeatedly combine byte packs using a SIMD unsigned min operation
475    // (implemented as a Select/ICmpULT combination).
476    //
477    Value * byteStreamBasePtr = b->getInputStreamBlockPtr("bytedata", b->getSize(0), b->getSize(0));
478    Value * outputStreamBasePtr = b->getOutputStreamBlockPtr("untilNull", b->getSize(0), b->getSize(0));
479
480    //
481    // We set up a a set of eight accumulators to accumulate the minimum byte
482    // values seen at each position in a block.   The initial min value at
483    // each position is 0xFF (all ones).
484    Value * blockMin[8];
485    for (unsigned i = 0; i < 8; i++) {
486        blockMin[i] = b->fwCast(8, b->allOnes());
487    }
488    // If we're in the final block bypass the fast loop.
489    b->CreateCondBr(mIsFinal, finalStride, strideLoop);
490   
491    b->SetInsertPoint(strideLoop);
492    PHINode * const baseBlockIndex = b->CreatePHI(b->getSizeTy(), 2);
493    baseBlockIndex->addIncoming(ConstantInt::get(baseBlockIndex->getType(), 0), entry);
494    PHINode * const blocksRemaining = b->CreatePHI(b->getSizeTy(), 2);
495    blocksRemaining->addIncoming(numOfBlocks, entry);
496    for (unsigned i = 0; i < 8; i++) {
497        Value * next = b->CreateBlockAlignedLoad(b->CreateGEP(byteStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
498        b->CreateBlockAlignedStore(next, b->CreateGEP(outputStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
499        next = b->fwCast(8, next);
500        blockMin[i] = b->CreateSelect(b->CreateICmpULT(next, blockMin[i]), next, blockMin[i]);
501    }
502    Value * nextBlockIndex = b->CreateAdd(baseBlockIndex, ConstantInt::get(baseBlockIndex->getType(), 1));
503    Value * nextRemaining = b->CreateSub(blocksRemaining, ConstantInt::get(blocksRemaining->getType(), 1));
504    baseBlockIndex->addIncoming(nextBlockIndex, strideLoop);
505    blocksRemaining->addIncoming(nextRemaining, strideLoop);
506    b->CreateCondBr(b->CreateICmpUGT(nextRemaining, ConstantInt::getNullValue(blocksRemaining->getType())), strideLoop, stridesDone);
507   
508    b->SetInsertPoint(stridesDone);
509    // Combine the 8 blockMin values.
510    for (unsigned i = 0; i < 4; i++) {
511        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+4]);
512    }
513    for (unsigned i = 0; i < 2; i++) {
514        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+2]);
515    }
516    blockMin[0] = b->CreateSelect(b->CreateICmpULT(blockMin[0], blockMin[1]), blockMin[0], blockMin[1]);
517    Value * anyNull = b->bitblock_any(b->simd_eq(8, blockMin[0], b->allZeroes()));
518   
519    b->CreateCondBr(anyNull, nullByteDetection, segmentDone);
520   
521   
522    b->SetInsertPoint(finalStride);
523    b->CreateMemCpy(b->CreatePointerCast(outputStreamBasePtr, voidPtrTy), b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), availItems, 1);
524    b->CreateBr(nullByteDetection);
525   
526    b->SetInsertPoint(nullByteDetection);
527    //  Find the exact location using memchr, which should be fast enough.
528    //
529    Value * ptrToNull = b->CreateMemChr(b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), b->getInt32(0), availItems);
530    Value * ptrAddr = b->CreatePtrToInt(ptrToNull, intPtrTy);
531    b->CreateCondBr(b->CreateICmpEQ(ptrAddr, ConstantInt::getNullValue(intPtrTy)), segmentDone, nullByteFound);
532   
533    // A null byte has been located; set the termination code and call the signal handler.
534    b->SetInsertPoint(nullByteFound);
535    Value * nullPosn = b->CreateSub(b->CreatePtrToInt(ptrToNull, intPtrTy), b->CreatePtrToInt(byteStreamBasePtr, intPtrTy));
536    b->setTerminationSignal();
537    Function * const dispatcher = m->getFunction("signal_dispatcher"); assert (dispatcher);
538    Value * handler = b->getScalarField("handler_address");
539    b->CreateCall(dispatcher, {handler, ConstantInt::get(b->getInt32Ty(), static_cast<unsigned>(grep::GrepSignal::BinaryFile))});
540    b->CreateBr(segmentDone);
541   
542    b->SetInsertPoint(segmentDone);
543    PHINode * const produced = b->CreatePHI(b->getSizeTy(), 3);
544    produced->addIncoming(nullPosn, nullByteFound);
545    produced->addIncoming(availItems, stridesDone);
546    produced->addIncoming(availItems, nullByteDetection);
547    Value * producedCount = b->getProducedItemCount("untilNull");
548    producedCount = b->CreateAdd(producedCount, produced);
549    b->setProducedItemCount("untilNull", producedCount);
550}
551
552AbortOnNull::AbortOnNull(const std::unique_ptr<kernel::KernelBuilder> & b)
553: MultiBlockKernel("AbortOnNull",
554                   // inputs
555{Binding{b->getStreamSetTy(1, 8), "bytedata"}},
556                   // outputs
557{Binding{b->getStreamSetTy(1, 8), "untilNull", FixedRate(), Deferred()}},
558                   // input scalars
559{Binding{b->getIntAddrTy(), "handler_address"}},
560{}, {}) {
561    addAttribute(CanTerminateEarly());
562}
Note: See TracBrowser for help on using the repository browser.