source: icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp @ 6133

Last change on this file since 6133 was 6133, checked in by xwa163, 11 months ago
  1. Add sourceCC in multiplexed CC
  2. Remove workaround FakeBasisBits? from ICGrep
  3. Implement Swizzled version of LZParabix
  4. Init checkin for SwizzleByGather? Kernel
File size: 25.5 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "grep_kernel.h"
7#include <boost/uuid/sha1.hpp>
8#include <re/printer_re.h>
9#include <re/re_cc.h>
10#include <re/re_name.h>
11#include <re/re_toolchain.h>
12#include <re/re_reverse.h>
13#include <grep/grep_engine.h>
14#include <pablo/codegenstate.h>
15#include <pablo/pablo_toolchain.h>
16#include <kernels/kernel_builder.h>
17#include <pablo/builder.hpp>
18#include <pablo/pe_ones.h>          // for Ones
19#include <pablo/pe_var.h>           // for Var
20#include <pablo/pe_zeroes.h>        // for Zeroes
21#include <pablo/pe_infile.h>
22#include <pablo/boolean.h>
23#include <pablo/pe_count.h>
24#include <pablo/pe_matchstar.h>
25#include <pablo/pe_pack.h>
26#include <cc/cc_compiler.h>         // for CC_Compiler
27#include <cc/alphabet.h>
28#include <cc/multiplex_CCs.h>
29#include <re/re_compiler.h>
30#include <UCD/ucd_compiler.hpp>
31#include <llvm/IR/Module.h>
32#include <llvm/Support/raw_ostream.h>
33
34using namespace kernel;
35using namespace pablo;
36using namespace re;
37using namespace llvm;
38
39inline static std::string sha1sum(const std::string & str) {
40    char buffer[41];    // 40 hex-digits and the terminating null
41    uint32_t digest[5]; // 160 bits in total
42    boost::uuids::detail::sha1 sha1;
43    sha1.process_bytes(str.c_str(), str.size());
44    sha1.get_digest(digest);
45    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
46             digest[0], digest[1], digest[2], digest[3], digest[4]);
47    return std::string(buffer);
48}
49
50
51UnicodeLineBreakKernel::UnicodeLineBreakKernel(const std::unique_ptr<kernel::KernelBuilder> & kb)
52: PabloKernel(kb,
53              "UTF8_LB",
54              {Binding{kb->getStreamSetTy(8), "basis"}, Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
55              {Binding{kb->getStreamSetTy(1, 1), "UTF8_LB", FixedRate()}}) {
56}
57
58void UnicodeLineBreakKernel::generatePabloMethod() {
59        PabloBuilder pb(getEntryScope());
60        cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
61        UCD::UCDCompiler ucdCompiler(ccc);
62   
63    Name * breakChars = re::makeName("breakChars", makeCC(makeCC(makeCC(0x0A, 0x0D), makeCC(0x85)), makeCC(0x2028,0x2029)));
64    UCD::UCDCompiler::NameMap nameMap;
65    nameMap.emplace(breakChars, nullptr);
66    ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
67    auto f = nameMap.find(breakChars);
68    if (f == nameMap.end()) llvm::report_fatal_error("UnicodeLineBreakKernel compilation failure");
69    PabloAST * breakStream = f-> second;
70    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
71    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
72    Var * const CR_before_LF = pb.createVar("CR_before_LFCR_before_LF", pb.createZeroes());
73    auto crb = pb.createScope();
74    pb.createIf(CR, crb);
75    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
76    crb.createAssign(CR_before_LF, crb.createAnd(CR, lookaheadLF));
77    breakStream = pb.createXor(breakStream, CR_before_LF);  // Remove CR_before_LF from breakStream
78    Var * const UTF8_LB = getOutputStreamVar("UTF8_LB");
79    pb.createAssign(pb.createExtract(UTF8_LB, pb.getInteger(0)), breakStream);
80}
81
82void RequiredStreams_UTF8::generatePabloMethod() {
83    PabloBuilder pb(getEntryScope());
84    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
85   
86    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
87    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
88    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
89    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
90   
91    // Remove the CR of any CR+LF
92    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
93    auto crb = pb.createScope();
94    pb.createIf(CR, crb);
95    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
96    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
97    crb.createAssign(CRLF, crlf);
98    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
99    crb.createAssign(LineBreak, removedCRLF);
100
101   
102    Zeroes * const ZEROES = pb.createZeroes();
103    PabloAST * const u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
104
105
106    Var * const nonFinal = pb.createVar("nonFinal", u8pfx);
107    Var * const u8invalid = pb.createVar("u8invalid", ZEROES);
108    Var * const valid_pfx = pb.createVar("valid_pfx", u8pfx);
109
110    auto it = pb.createScope();
111    pb.createIf(u8pfx, it);
112    PabloAST * const u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
113    PabloAST * const u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
114    PabloAST * const u8pfx4 = ccc.compileCC(makeByte(0xF0, 0xF4), it);
115    PabloAST * const u8suffix = ccc.compileCC("u8suffix", makeByte(0x80, 0xBF), it);
116   
117    //
118    // Two-byte sequences
119    Var * const anyscope = it.createVar("anyscope", ZEROES);
120    auto it2 = it.createScope();
121    it.createIf(u8pfx2, it2);
122    it2.createAssign(anyscope, it2.createAdvance(u8pfx2, 1));
123    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
124    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
125
126
127    //
128    // Three-byte sequences   
129    Var * const EF_invalid = it.createVar("EF_invalid", ZEROES);
130    auto it3 = it.createScope();
131    it.createIf(u8pfx3, it3);
132    PabloAST * const u8scope32 = it3.createAdvance(u8pfx3, 1);
133    it3.createAssign(nonFinal, it3.createOr(nonFinal, u8scope32));
134    PabloAST * const u8scope33 = it3.createAdvance(u8pfx3, 2);
135    PabloAST * const u8scope3X = it3.createOr(u8scope32, u8scope33);
136    it3.createAssign(anyscope, it3.createOr(anyscope, u8scope3X));
137    PabloAST * const E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE0), it3), 1), ccc.compileCC(makeByte(0x80, 0x9F), it3));
138    PabloAST * const ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xED), it3), 1), ccc.compileCC(makeByte(0xA0, 0xBF), it3));
139    PabloAST * const EX_invalid = it3.createOr(E0_invalid, ED_invalid);
140    it3.createAssign(EF_invalid, EX_invalid);
141    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
142    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
143    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
144
145    //
146    // Four-byte sequences
147    auto it4 = it.createScope();
148    it.createIf(u8pfx4, it4);
149    PabloAST * const u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
150    PabloAST * const u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
151    PabloAST * const u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
152    PabloAST * const u8scope4nonfinal = it4.createOr(u8scope42, u8scope43);
153    it4.createAssign(nonFinal, it4.createOr(nonFinal, u8scope4nonfinal));
154    PabloAST * const u8scope4X = it4.createOr(u8scope4nonfinal, u8scope44);
155    it4.createAssign(anyscope, it4.createOr(anyscope, u8scope4X));
156    PabloAST * const F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeByte(0xF0), it4), 1), ccc.compileCC(makeByte(0x80, 0x8F), it4));
157    PabloAST * const F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeByte(0xF4), it4), 1), ccc.compileCC(makeByte(0x90, 0xBF), it4));
158    PabloAST * const FX_invalid = it4.createOr(F0_invalid, F4_invalid);
159    it4.createAssign(EF_invalid, it4.createOr(EF_invalid, FX_invalid));
160   
161    //
162    // Invalid cases
163    PabloAST * const legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
164    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
165    //  a scope is a mismatch, i.e., invalid UTF-8.
166    PabloAST * const mismatch = it.createXor(anyscope, u8suffix);
167    //
168    PabloAST * const pfx_invalid = it.createXor(valid_pfx, legalpfx);
169    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
170    PabloAST * const u8valid = it.createNot(u8invalid, "u8valid");
171    //
172    //
173    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
174    pb.createAssign(nonFinal, pb.createOr(nonFinal, CRLF));
175    //PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
176   
177    Var * const required = getOutputStreamVar("nonFinal");
178    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), nonFinal);
179    pb.createAssign(pb.createExtract(getOutputStreamVar("UnicodeLB"), pb.getInteger(0)), LineBreak);//pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
180}
181
182RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb)
183: PabloKernel(kb, "RequiredStreams_UTF8",
184// input
185{Binding{kb->getStreamSetTy(8), "basis"},
186 Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
187// output
188{Binding{kb->getStreamSetTy(1), "nonFinal", FixedRate()},
189 Binding{kb->getStreamSetTy(1), "UnicodeLB", FixedRate()}}) {
190
191}
192
193void RequiredStreams_UTF16::generatePabloMethod() {
194    PabloBuilder pb(getEntryScope());
195    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
196   
197    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF, &cc::UTF16));    //u16hi_hi_surrogate = [\xD8-\xDB]
198    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF, &cc::UTF16));    //u16hi_lo_surrogate = [\xDC-\xDF]
199   
200    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
201    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
202
203    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
204    PabloAST * nonFinal = pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal");
205
206    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF, &cc::UTF16)), ccc.compileCC(makeCC(0xE000, 0xFFFF, &cc::UTF16)));
207    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
208
209    PabloAST * const nonFinalCodeUnits = pb.createExtract(getInput(1), pb.getInteger(0));
210    PabloAST * const initial = pb.createOr(u16single, u16hi_hi_surrogate, "initial");
211    PabloAST * const final = pb.createNot(pb.createOr(pb.createOr(u16hi_hi_surrogate, u16invalid), nonFinalCodeUnits), "final");
212
213    Var * const required = getOutputStreamVar("required");
214    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
215    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
216    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
217
218}
219
220RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
221: PabloKernel(kb, "RequiredStreams_UTF16",               
222// inputs
223{Binding{kb->getStreamSetTy(8), "basis"}},
224// output
225{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
226
227}
228
229ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
230: mRE(re_ast)
231, mSignature(Printer_RE::PrintRE(mRE)) {
232   
233}
234
235// Helper to compute stream set inputs to pass into PabloKernel constructor.
236inline std::vector<Binding> icGrepInputs(const std::unique_ptr<kernel::KernelBuilder> & b,
237                                         const std::vector<std::string> & externals,
238                                         const std::vector<cc::Alphabet *> & alphabets) {
239    std::vector<Binding> streamSetInputs = {
240        Binding{b->getStreamSetTy(8), "basis"},
241    };
242    for (auto & e : externals) {
243        streamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
244    }
245    for (const auto & alphabet : alphabets) {
246        unsigned basis_size = cast<cc::MultiplexedAlphabet>(alphabet)->getMultiplexedCCs().size();
247        streamSetInputs.push_back(Binding{b->getStreamSetTy(basis_size, 1), alphabet->getName() + "_basis"});
248    }
249    return streamSetInputs;
250}
251
252ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const re, std::vector<std::string> externals, std::vector<cc::Alphabet *> alphabets, cc::BitNumbering basisSetNumbering)
253: ICGrepSignature(re)
254, PabloKernel(b, "ic" + sha1sum(mSignature),
255// inputs
256icGrepInputs(b, externals, alphabets),
257// output
258{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
259, mExternals(externals)
260, mAlphabets(alphabets)
261, mBasisSetNumbering(basisSetNumbering) {
262}
263
264std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
265    return mSignature;
266}
267
268void ICGrepKernel::generatePabloMethod() {
269    PabloBuilder pb(getEntryScope());
270    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
271    RE_Compiler re_compiler(getEntryScope(), ccc, mBasisSetNumbering);
272    for (auto & e : mExternals) {
273        re_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
274    }
275    for (auto a : mAlphabets) {
276        auto mpx_basis = getInputStreamSet(a->getName() + "_basis");
277        re_compiler.addAlphabet(a, mpx_basis);
278    }
279    PabloAST * const matches = re_compiler.compile(mRE);
280    Var * const output = getOutputStreamVar("matches");
281    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
282}
283
284
285ByteGrepSignature::ByteGrepSignature(RE * re)
286: mRE(re)
287, mSignature(Printer_RE::PrintRE(re) ) {
288}
289
290ByteGrepKernel::ByteGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const re, std::vector<std::string> externals)
291: ByteGrepSignature(re)
292, PabloKernel(b, "byteGrep" + sha1sum(mSignature),
293              // inputs
294{Binding{b->getStreamSetTy(1, 8), "byteData"}},
295              // output
296{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
297, mExternals(externals) {
298    for (auto & e : externals) {
299        mStreamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
300    }
301}
302
303std::string ByteGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
304    return mSignature;
305}
306
307
308void ByteGrepKernel::generatePabloMethod() {
309    PabloBuilder pb(getEntryScope());
310    PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
311    cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
312    RE_Compiler re_byte_compiler(getEntryScope(), dcc);
313    for (auto & e : mExternals) {
314        re_byte_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
315    }
316    PabloAST * const matches = re_byte_compiler.compile(mRE);
317   
318    Var * const output = getOutputStreamVar("matches");
319    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
320}
321
322// Helper to compute stream set inputs to pass into PabloKernel constructor.
323inline std::vector<Binding> byteBitGrepInputs(const std::unique_ptr<kernel::KernelBuilder> & b,
324                                              const std::vector<std::string> & externals) {
325    std::vector<Binding> streamSetInputs = {
326        Binding{b->getStreamSetTy(1, 8), "bytedata"},
327    };
328    for (auto & e : externals) {
329        streamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
330    }
331    return streamSetInputs;
332}
333
334ByteBitGrepSignature::ByteBitGrepSignature(RE * prefix, RE * suffix)
335: mPrefixRE(prefix)
336, mSuffixRE(suffix)
337, mSignature(Printer_RE::PrintRE(mPrefixRE) + Printer_RE::PrintRE(mSuffixRE) ) {
338}
339
340ByteBitGrepKernel::ByteBitGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const prefixRE, RE * const suffixRE, std::vector<std::string> externals)
341: ByteBitGrepSignature(prefixRE, suffixRE)
342, PabloKernel(b, "bBc" + sha1sum(mSignature),
343              // inputs
344              byteBitGrepInputs(b, externals),
345              // output
346{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
347, mExternals(externals) {
348}
349
350std::string ByteBitGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
351    return mSignature;
352}
353
354
355void ByteBitGrepKernel::generatePabloMethod() {
356    PabloBuilder pb(getEntryScope());
357    PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
358    cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
359    RE_Compiler re_byte_compiler(getEntryScope(), dcc);
360    for (auto & e : mExternals) {
361        re_byte_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
362    }
363    PabloAST * const prefixMatches = re_byte_compiler.compile(mPrefixRE);
364    Var * const final_matches = pb.createVar("final_matches", pb.createZeroes());
365    PabloBlock * scope1 = getEntryScope()->createScope();
366    pb.createIf(prefixMatches, scope1);
367   
368    PabloAST * nybbles[2];
369    nybbles[0] = scope1->createPackL(scope1->getInteger(8), u8bytes);
370    nybbles[1] = scope1->createPackH(scope1->getInteger(8), u8bytes);
371   
372    PabloAST * bitpairs[4];
373    for (unsigned i = 0; i < 2; i++) {
374        bitpairs[2*i] = scope1->createPackL(scope1->getInteger(4), nybbles[i]);
375        bitpairs[2*i + 1] = scope1->createPackH(scope1->getInteger(4), nybbles[i]);
376    }
377   
378    std::vector<PabloAST *> basis(8);
379    for (unsigned i = 0; i < 4; i++) {
380        // The subtraction 7-bit is because of the confusion between
381        // little-endian and big-endian bit numbering of bytes.
382        // We should fix this, switching to little-endian numbering throughout.
383        basis[7-2*i] = scope1->createPackL(scope1->getInteger(2), bitpairs[i]);
384        basis[7-(2*i + 1)] = scope1->createPackH(scope1->getInteger(2), bitpairs[i]);
385    }
386   
387    cc::Parabix_CC_Compiler ccc(scope1, basis);
388    RE_Compiler re_compiler(scope1, ccc);
389    scope1->createAssign(final_matches, re_compiler.compile(mSuffixRE, prefixMatches));
390    Var * const output = getOutputStreamVar("matches");
391    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), final_matches);
392}
393
394
395void MatchedLinesKernel::generatePabloMethod() {
396    PabloBuilder pb(getEntryScope());
397    PabloAST * matchResults = pb.createExtract(getInputStreamVar("matchResults"), pb.getInteger(0));
398    PabloAST * lineBreaks = pb.createExtract(getInputStreamVar("lineBreaks"), pb.getInteger(0));
399    PabloAST * notLB = pb.createNot(lineBreaks);
400    PabloAST * match_follow = pb.createMatchStar(matchResults, notLB);
401    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(notLB, 1), "unterminatedLineAtEOF");
402    Var * const matchedLines = getOutputStreamVar("matchedLines");
403    pb.createAssign(pb.createExtract(matchedLines, pb.getInteger(0)), pb.createAnd(match_follow, pb.createOr(lineBreaks, unterminatedLineAtEOF)));
404}
405
406MatchedLinesKernel::MatchedLinesKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
407: PabloKernel(iBuilder, "MatchedLines",
408// inputs
409{Binding{iBuilder->getStreamSetTy(1), "matchResults"}
410,Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
411// output
412{Binding{iBuilder->getStreamSetTy(1), "matchedLines", FixedRate(), Add1()}}) {
413
414}
415
416
417void InvertMatchesKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
418    Value * input = iBuilder->loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
419    Value * lbs = iBuilder->loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
420    Value * inverted = iBuilder->CreateXor(input, lbs);
421    iBuilder->storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
422}
423
424InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder)
425: BlockOrientedKernel("Invert",
426// Inputs
427{Binding{builder->getStreamSetTy(1, 1), "matchedLines"}, Binding{builder->getStreamSetTy(1, 1), "lineBreaks"}},
428// Outputs
429{Binding{builder->getStreamSetTy(1, 1), "nonMatches"}},
430// Input/Output Scalars and internal state
431{}, {}, {}) {
432
433}
434
435
436void PopcountKernel::generatePabloMethod() {
437    auto pb = this->getEntryScope();
438    const auto toCount = pb->createExtract(getInputStreamVar("toCount"), pb->getInteger(0));
439    pablo::Var * countResult = getOutputScalarVar("countResult");
440   
441    pb->createAssign(countResult, pb->createCount(pb->createInFile(toCount)));
442}
443
444PopcountKernel::PopcountKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
445: PabloKernel(iBuilder, "Popcount",
446{Binding{iBuilder->getStreamSetTy(1), "toCount"}},
447{},
448{},
449{Binding{iBuilder->getSizeTy(), "countResult"}}) {
450
451}
452
453
454void AbortOnNull::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) {
455    Module * const m = b->getModule();
456    DataLayout DL(m);
457    IntegerType * const intPtrTy = DL.getIntPtrType(m->getContext());
458    Type * voidPtrTy = b->getVoidPtrTy();
459    const auto blocksPerStride = getStride() / b->getBitBlockWidth();
460    Constant * const BLOCKS_PER_STRIDE = b->getSize(blocksPerStride);
461    BasicBlock * const entry = b->GetInsertBlock();
462    BasicBlock * const strideLoop = b->CreateBasicBlock("strideLoop");
463    BasicBlock * const stridesDone = b->CreateBasicBlock("stridesDone");
464    BasicBlock * const nullByteDetection = b->CreateBasicBlock("nullByteDetection");
465    BasicBlock * const nullByteFound = b->CreateBasicBlock("nullByteFound");
466    BasicBlock * const finalStride = b->CreateBasicBlock("finalStride");
467    BasicBlock * const segmentDone = b->CreateBasicBlock("segmentDone");
468
469    Value * const numOfBlocks = b->CreateMul(numOfStrides, BLOCKS_PER_STRIDE);
470    Value * availItems = b->getAvailableItemCount("bytedata");
471    //
472    // Fast loop to prove that there are no null bytes in a multiblock region.
473    // We repeatedly combine byte packs using a SIMD unsigned min operation
474    // (implemented as a Select/ICmpULT combination).
475    //
476    Value * byteStreamBasePtr = b->getInputStreamBlockPtr("bytedata", b->getSize(0), b->getSize(0));
477    Value * outputStreamBasePtr = b->getOutputStreamBlockPtr("untilNull", b->getSize(0), b->getSize(0));
478
479    //
480    // We set up a a set of eight accumulators to accumulate the minimum byte
481    // values seen at each position in a block.   The initial min value at
482    // each position is 0xFF (all ones).
483    Value * blockMin[8];
484    for (unsigned i = 0; i < 8; i++) {
485        blockMin[i] = b->fwCast(8, b->allOnes());
486    }
487    // If we're in the final block bypass the fast loop.
488    b->CreateCondBr(mIsFinal, finalStride, strideLoop);
489   
490    b->SetInsertPoint(strideLoop);
491    PHINode * const baseBlockIndex = b->CreatePHI(b->getSizeTy(), 2);
492    baseBlockIndex->addIncoming(ConstantInt::get(baseBlockIndex->getType(), 0), entry);
493    PHINode * const blocksRemaining = b->CreatePHI(b->getSizeTy(), 2);
494    blocksRemaining->addIncoming(numOfBlocks, entry);
495    for (unsigned i = 0; i < 8; i++) {
496        Value * next = b->CreateBlockAlignedLoad(b->CreateGEP(byteStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
497        b->CreateBlockAlignedStore(next, b->CreateGEP(outputStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
498        next = b->fwCast(8, next);
499        blockMin[i] = b->CreateSelect(b->CreateICmpULT(next, blockMin[i]), next, blockMin[i]);
500    }
501    Value * nextBlockIndex = b->CreateAdd(baseBlockIndex, ConstantInt::get(baseBlockIndex->getType(), 1));
502    Value * nextRemaining = b->CreateSub(blocksRemaining, ConstantInt::get(blocksRemaining->getType(), 1));
503    baseBlockIndex->addIncoming(nextBlockIndex, strideLoop);
504    blocksRemaining->addIncoming(nextRemaining, strideLoop);
505    b->CreateCondBr(b->CreateICmpUGT(nextRemaining, ConstantInt::getNullValue(blocksRemaining->getType())), strideLoop, stridesDone);
506   
507    b->SetInsertPoint(stridesDone);
508    // Combine the 8 blockMin values.
509    for (unsigned i = 0; i < 4; i++) {
510        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+4]);
511    }
512    for (unsigned i = 0; i < 2; i++) {
513        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+2]);
514    }
515    blockMin[0] = b->CreateSelect(b->CreateICmpULT(blockMin[0], blockMin[1]), blockMin[0], blockMin[1]);
516    Value * anyNull = b->bitblock_any(b->simd_eq(8, blockMin[0], b->allZeroes()));
517   
518    b->CreateCondBr(anyNull, nullByteDetection, segmentDone);
519   
520   
521    b->SetInsertPoint(finalStride);
522    b->CreateMemCpy(b->CreatePointerCast(outputStreamBasePtr, voidPtrTy), b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), availItems, 1);
523    b->CreateBr(nullByteDetection);
524   
525    b->SetInsertPoint(nullByteDetection);
526    //  Find the exact location using memchr, which should be fast enough.
527    //
528    Value * ptrToNull = b->CreateMemChr(b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), b->getInt32(0), availItems);
529    Value * ptrAddr = b->CreatePtrToInt(ptrToNull, intPtrTy);
530    b->CreateCondBr(b->CreateICmpEQ(ptrAddr, ConstantInt::getNullValue(intPtrTy)), segmentDone, nullByteFound);
531   
532    // A null byte has been located; set the termination code and call the signal handler.
533    b->SetInsertPoint(nullByteFound);
534    Value * nullPosn = b->CreateSub(b->CreatePtrToInt(ptrToNull, intPtrTy), b->CreatePtrToInt(byteStreamBasePtr, intPtrTy));
535    b->setTerminationSignal();
536    Function * const dispatcher = m->getFunction("signal_dispatcher"); assert (dispatcher);
537    Value * handler = b->getScalarField("handler_address");
538    b->CreateCall(dispatcher, {handler, ConstantInt::get(b->getInt32Ty(), static_cast<unsigned>(grep::GrepSignal::BinaryFile))});
539    b->CreateBr(segmentDone);
540   
541    b->SetInsertPoint(segmentDone);
542    PHINode * const produced = b->CreatePHI(b->getSizeTy(), 3);
543    produced->addIncoming(nullPosn, nullByteFound);
544    produced->addIncoming(availItems, stridesDone);
545    produced->addIncoming(availItems, nullByteDetection);
546    Value * producedCount = b->getProducedItemCount("untilNull");
547    producedCount = b->CreateAdd(producedCount, produced);
548    b->setProducedItemCount("untilNull", producedCount);
549}
550
551AbortOnNull::AbortOnNull(const std::unique_ptr<kernel::KernelBuilder> & b)
552: MultiBlockKernel("AbortOnNull",
553                   // inputs
554{Binding{b->getStreamSetTy(1, 8), "bytedata"}},
555                   // outputs
556{Binding{b->getStreamSetTy(1, 8), "untilNull", FixedRate(), Deferred()}},
557                   // input scalars
558{Binding{b->getIntAddrTy(), "handler_address"}},
559{}, {}) {
560    addAttribute(CanTerminateEarly());
561}
Note: See TracBrowser for help on using the repository browser.