source: icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp @ 6135

Last change on this file since 6135 was 6133, checked in by xwa163, 9 months ago
  1. Add sourceCC in multiplexed CC
  2. Remove workaround FakeBasisBits? from ICGrep
  3. Implement Swizzled version of LZParabix
  4. Init checkin for SwizzleByGather? Kernel
File size: 25.5 KB
RevLine 
[5404]1/*
[5900]2 *  Copyright (c) 2018 International Characters.
[5404]3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "grep_kernel.h"
7#include <boost/uuid/sha1.hpp>
8#include <re/printer_re.h>
[5902]9#include <re/re_cc.h>
10#include <re/re_name.h>
[5404]11#include <re/re_toolchain.h>
[5836]12#include <re/re_reverse.h>
[5989]13#include <grep/grep_engine.h>
[5889]14#include <pablo/codegenstate.h>
[5404]15#include <pablo/pablo_toolchain.h>
[5436]16#include <kernels/kernel_builder.h>
17#include <pablo/builder.hpp>
[5561]18#include <pablo/pe_ones.h>          // for Ones
19#include <pablo/pe_var.h>           // for Var
20#include <pablo/pe_zeroes.h>        // for Zeroes
[5914]21#include <pablo/pe_infile.h>
[5548]22#include <pablo/boolean.h>
[5413]23#include <pablo/pe_count.h>
[5548]24#include <pablo/pe_matchstar.h>
[5889]25#include <pablo/pe_pack.h>
[5797]26#include <cc/cc_compiler.h>         // for CC_Compiler
27#include <cc/alphabet.h>
[5819]28#include <cc/multiplex_CCs.h>
[5816]29#include <re/re_compiler.h>
[5902]30#include <UCD/ucd_compiler.hpp>
[5989]31#include <llvm/IR/Module.h>
[5464]32#include <llvm/Support/raw_ostream.h>
33
[5404]34using namespace kernel;
35using namespace pablo;
36using namespace re;
37using namespace llvm;
38
[5408]39inline static std::string sha1sum(const std::string & str) {
[5404]40    char buffer[41];    // 40 hex-digits and the terminating null
[5408]41    uint32_t digest[5]; // 160 bits in total
[5404]42    boost::uuids::detail::sha1 sha1;
43    sha1.process_bytes(str.c_str(), str.size());
44    sha1.get_digest(digest);
45    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
46             digest[0], digest[1], digest[2], digest[3], digest[4]);
47    return std::string(buffer);
48}
49
[5902]50
51UnicodeLineBreakKernel::UnicodeLineBreakKernel(const std::unique_ptr<kernel::KernelBuilder> & kb)
52: PabloKernel(kb,
53              "UTF8_LB",
54              {Binding{kb->getStreamSetTy(8), "basis"}, Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
55              {Binding{kb->getStreamSetTy(1, 1), "UTF8_LB", FixedRate()}}) {
56}
57
58void UnicodeLineBreakKernel::generatePabloMethod() {
59        PabloBuilder pb(getEntryScope());
60        cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
61        UCD::UCDCompiler ucdCompiler(ccc);
62   
63    Name * breakChars = re::makeName("breakChars", makeCC(makeCC(makeCC(0x0A, 0x0D), makeCC(0x85)), makeCC(0x2028,0x2029)));
64    UCD::UCDCompiler::NameMap nameMap;
65    nameMap.emplace(breakChars, nullptr);
66    ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
67    auto f = nameMap.find(breakChars);
68    if (f == nameMap.end()) llvm::report_fatal_error("UnicodeLineBreakKernel compilation failure");
69    PabloAST * breakStream = f-> second;
70    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
71    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
72    Var * const CR_before_LF = pb.createVar("CR_before_LFCR_before_LF", pb.createZeroes());
73    auto crb = pb.createScope();
74    pb.createIf(CR, crb);
75    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
76    crb.createAssign(CR_before_LF, crb.createAnd(CR, lookaheadLF));
77    breakStream = pb.createXor(breakStream, CR_before_LF);  // Remove CR_before_LF from breakStream
78    Var * const UTF8_LB = getOutputStreamVar("UTF8_LB");
79    pb.createAssign(pb.createExtract(UTF8_LB, pb.getInteger(0)), breakStream);
80}
81
[5561]82void RequiredStreams_UTF8::generatePabloMethod() {
[5842]83    PabloBuilder pb(getEntryScope());
[5872]84    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
[5867]85   
86    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
87    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
88    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
89    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
90   
91    // Remove the CR of any CR+LF
92    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
93    auto crb = pb.createScope();
94    pb.createIf(CR, crb);
95    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
96    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
97    crb.createAssign(CRLF, crlf);
98    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
99    crb.createAssign(LineBreak, removedCRLF);
100
101   
[5782]102    Zeroes * const ZEROES = pb.createZeroes();
[5797]103    PabloAST * const u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
[5561]104
[5782]105
106    Var * const nonFinal = pb.createVar("nonFinal", u8pfx);
107    Var * const u8invalid = pb.createVar("u8invalid", ZEROES);
108    Var * const valid_pfx = pb.createVar("valid_pfx", u8pfx);
109
[5836]110    auto it = pb.createScope();
[5561]111    pb.createIf(u8pfx, it);
[5797]112    PabloAST * const u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
113    PabloAST * const u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
114    PabloAST * const u8pfx4 = ccc.compileCC(makeByte(0xF0, 0xF4), it);
115    PabloAST * const u8suffix = ccc.compileCC("u8suffix", makeByte(0x80, 0xBF), it);
[5561]116   
117    //
118    // Two-byte sequences
[5782]119    Var * const anyscope = it.createVar("anyscope", ZEROES);
[5836]120    auto it2 = it.createScope();
[5561]121    it.createIf(u8pfx2, it2);
[5782]122    it2.createAssign(anyscope, it2.createAdvance(u8pfx2, 1));
[5867]123    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
124    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
[5782]125
[5867]126
[5561]127    //
[5782]128    // Three-byte sequences   
129    Var * const EF_invalid = it.createVar("EF_invalid", ZEROES);
[5836]130    auto it3 = it.createScope();
[5561]131    it.createIf(u8pfx3, it3);
[5782]132    PabloAST * const u8scope32 = it3.createAdvance(u8pfx3, 1);
133    it3.createAssign(nonFinal, it3.createOr(nonFinal, u8scope32));
134    PabloAST * const u8scope33 = it3.createAdvance(u8pfx3, 2);
135    PabloAST * const u8scope3X = it3.createOr(u8scope32, u8scope33);
136    it3.createAssign(anyscope, it3.createOr(anyscope, u8scope3X));
[5797]137    PabloAST * const E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE0), it3), 1), ccc.compileCC(makeByte(0x80, 0x9F), it3));
138    PabloAST * const ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xED), it3), 1), ccc.compileCC(makeByte(0xA0, 0xBF), it3));
[5782]139    PabloAST * const EX_invalid = it3.createOr(E0_invalid, ED_invalid);
140    it3.createAssign(EF_invalid, EX_invalid);
[5867]141    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
142    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
143    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
[5782]144
[5561]145    //
146    // Four-byte sequences
[5836]147    auto it4 = it.createScope();
[5561]148    it.createIf(u8pfx4, it4);
[5782]149    PabloAST * const u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
150    PabloAST * const u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
151    PabloAST * const u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
152    PabloAST * const u8scope4nonfinal = it4.createOr(u8scope42, u8scope43);
153    it4.createAssign(nonFinal, it4.createOr(nonFinal, u8scope4nonfinal));
154    PabloAST * const u8scope4X = it4.createOr(u8scope4nonfinal, u8scope44);
155    it4.createAssign(anyscope, it4.createOr(anyscope, u8scope4X));
[5797]156    PabloAST * const F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeByte(0xF0), it4), 1), ccc.compileCC(makeByte(0x80, 0x8F), it4));
157    PabloAST * const F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeByte(0xF4), it4), 1), ccc.compileCC(makeByte(0x90, 0xBF), it4));
[5782]158    PabloAST * const FX_invalid = it4.createOr(F0_invalid, F4_invalid);
159    it4.createAssign(EF_invalid, it4.createOr(EF_invalid, FX_invalid));
[5561]160   
161    //
162    // Invalid cases
[5782]163    PabloAST * const legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
[5561]164    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
165    //  a scope is a mismatch, i.e., invalid UTF-8.
[5782]166    PabloAST * const mismatch = it.createXor(anyscope, u8suffix);
[5561]167    //
[5782]168    PabloAST * const pfx_invalid = it.createXor(valid_pfx, legalpfx);
[5561]169    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
[5782]170    PabloAST * const u8valid = it.createNot(u8invalid, "u8valid");
[5561]171    //
172    //
[5782]173    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
[5867]174    pb.createAssign(nonFinal, pb.createOr(nonFinal, CRLF));
[5900]175    //PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
[5867]176   
177    Var * const required = getOutputStreamVar("nonFinal");
[5863]178    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), nonFinal);
[5900]179    pb.createAssign(pb.createExtract(getOutputStreamVar("UnicodeLB"), pb.getInteger(0)), LineBreak);//pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
[5561]180}
181
182RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb)
[5782]183: PabloKernel(kb, "RequiredStreams_UTF8",
184// input
[5985]185{Binding{kb->getStreamSetTy(8), "basis"},
186 Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
[5782]187// output
[5867]188{Binding{kb->getStreamSetTy(1), "nonFinal", FixedRate()},
[5890]189 Binding{kb->getStreamSetTy(1), "UnicodeLB", FixedRate()}}) {
[5782]190
[5561]191}
192
193void RequiredStreams_UTF16::generatePabloMethod() {
[5842]194    PabloBuilder pb(getEntryScope());
[5872]195    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
[5561]196   
[5797]197    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF, &cc::UTF16));    //u16hi_hi_surrogate = [\xD8-\xDB]
198    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF, &cc::UTF16));    //u16hi_lo_surrogate = [\xDC-\xDF]
[5561]199   
200    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
201    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
[5782]202
[5561]203    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
[5782]204    PabloAST * nonFinal = pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal");
205
[5797]206    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF, &cc::UTF16)), ccc.compileCC(makeCC(0xE000, 0xFFFF, &cc::UTF16)));
[5561]207    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
208
[5782]209    PabloAST * const nonFinalCodeUnits = pb.createExtract(getInput(1), pb.getInteger(0));
210    PabloAST * const initial = pb.createOr(u16single, u16hi_hi_surrogate, "initial");
211    PabloAST * const final = pb.createNot(pb.createOr(pb.createOr(u16hi_hi_surrogate, u16invalid), nonFinalCodeUnits), "final");
212
[5561]213    Var * const required = getOutputStreamVar("required");
[5782]214    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
215    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
216    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
217
[5561]218}
219
220RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
221: PabloKernel(kb, "RequiredStreams_UTF16",               
[5782]222// inputs
223{Binding{kb->getStreamSetTy(8), "basis"}},
224// output
225{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
226
[5561]227}
228
[5769]229ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
230: mRE(re_ast)
231, mSignature(Printer_RE::PrintRE(mRE)) {
232   
233}
234
[5816]235// Helper to compute stream set inputs to pass into PabloKernel constructor.
[5836]236inline std::vector<Binding> icGrepInputs(const std::unique_ptr<kernel::KernelBuilder> & b,
[5881]237                                         const std::vector<std::string> & externals,
[5836]238                                         const std::vector<cc::Alphabet *> & alphabets) {
239    std::vector<Binding> streamSetInputs = {
240        Binding{b->getStreamSetTy(8), "basis"},
241    };
[5881]242    for (auto & e : externals) {
243        streamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
244    }
[5836]245    for (const auto & alphabet : alphabets) {
246        unsigned basis_size = cast<cc::MultiplexedAlphabet>(alphabet)->getMultiplexedCCs().size();
247        streamSetInputs.push_back(Binding{b->getStreamSetTy(basis_size, 1), alphabet->getName() + "_basis"});
[5816]248    }
249    return streamSetInputs;
250}
251
[6133]252ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const re, std::vector<std::string> externals, std::vector<cc::Alphabet *> alphabets, cc::BitNumbering basisSetNumbering)
[5769]253: ICGrepSignature(re)
[5836]254, PabloKernel(b, "ic" + sha1sum(mSignature),
[5782]255// inputs
[5881]256icGrepInputs(b, externals, alphabets),
[5782]257// output
[5836]258{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
[5881]259, mExternals(externals)
[6119]260, mAlphabets(alphabets)
[6133]261, mBasisSetNumbering(basisSetNumbering) {
[5404]262}
263
[5454]264std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
[5404]265    return mSignature;
266}
267
[5454]268void ICGrepKernel::generatePabloMethod() {
[5842]269    PabloBuilder pb(getEntryScope());
[6133]270    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
271    RE_Compiler re_compiler(getEntryScope(), ccc, mBasisSetNumbering);
[5881]272    for (auto & e : mExternals) {
273        re_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
274    }
[5836]275    for (auto a : mAlphabets) {
[5843]276        auto mpx_basis = getInputStreamSet(a->getName() + "_basis");
277        re_compiler.addAlphabet(a, mpx_basis);
[5816]278    }
[5836]279    PabloAST * const matches = re_compiler.compile(mRE);
[5646]280    Var * const output = getOutputStreamVar("matches");
[5842]281    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
[5404]282}
[5413]283
[5902]284
285ByteGrepSignature::ByteGrepSignature(RE * re)
286: mRE(re)
287, mSignature(Printer_RE::PrintRE(re) ) {
288}
289
290ByteGrepKernel::ByteGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const re, std::vector<std::string> externals)
291: ByteGrepSignature(re)
[5933]292, PabloKernel(b, "byteGrep" + sha1sum(mSignature),
[5902]293              // inputs
294{Binding{b->getStreamSetTy(1, 8), "byteData"}},
295              // output
296{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
297, mExternals(externals) {
298    for (auto & e : externals) {
299        mStreamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
300    }
301}
302
303std::string ByteGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
304    return mSignature;
305}
306
307
308void ByteGrepKernel::generatePabloMethod() {
309    PabloBuilder pb(getEntryScope());
310    PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
311    cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
312    RE_Compiler re_byte_compiler(getEntryScope(), dcc);
313    for (auto & e : mExternals) {
314        re_byte_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
315    }
316    PabloAST * const matches = re_byte_compiler.compile(mRE);
317   
318    Var * const output = getOutputStreamVar("matches");
319    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
320}
321
[5889]322// Helper to compute stream set inputs to pass into PabloKernel constructor.
323inline std::vector<Binding> byteBitGrepInputs(const std::unique_ptr<kernel::KernelBuilder> & b,
[5902]324                                              const std::vector<std::string> & externals) {
[5889]325    std::vector<Binding> streamSetInputs = {
326        Binding{b->getStreamSetTy(1, 8), "bytedata"},
327    };
328    for (auto & e : externals) {
329        streamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
330    }
331    return streamSetInputs;
332}
333
334ByteBitGrepSignature::ByteBitGrepSignature(RE * prefix, RE * suffix)
335: mPrefixRE(prefix)
336, mSuffixRE(suffix)
337, mSignature(Printer_RE::PrintRE(mPrefixRE) + Printer_RE::PrintRE(mSuffixRE) ) {
338}
339
340ByteBitGrepKernel::ByteBitGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const prefixRE, RE * const suffixRE, std::vector<std::string> externals)
341: ByteBitGrepSignature(prefixRE, suffixRE)
342, PabloKernel(b, "bBc" + sha1sum(mSignature),
343              // inputs
344              byteBitGrepInputs(b, externals),
345              // output
346{Binding{b->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}})
347, mExternals(externals) {
348}
349
350std::string ByteBitGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
351    return mSignature;
352}
353
354
355void ByteBitGrepKernel::generatePabloMethod() {
356    PabloBuilder pb(getEntryScope());
357    PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
358    cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
359    RE_Compiler re_byte_compiler(getEntryScope(), dcc);
360    for (auto & e : mExternals) {
361        re_byte_compiler.addPrecompiled(e, pb.createExtract(getInputStreamVar(e), pb.getInteger(0)));
362    }
363    PabloAST * const prefixMatches = re_byte_compiler.compile(mPrefixRE);
[5908]364    Var * const final_matches = pb.createVar("final_matches", pb.createZeroes());
[5889]365    PabloBlock * scope1 = getEntryScope()->createScope();
366    pb.createIf(prefixMatches, scope1);
367   
368    PabloAST * nybbles[2];
369    nybbles[0] = scope1->createPackL(scope1->getInteger(8), u8bytes);
370    nybbles[1] = scope1->createPackH(scope1->getInteger(8), u8bytes);
371   
372    PabloAST * bitpairs[4];
373    for (unsigned i = 0; i < 2; i++) {
374        bitpairs[2*i] = scope1->createPackL(scope1->getInteger(4), nybbles[i]);
375        bitpairs[2*i + 1] = scope1->createPackH(scope1->getInteger(4), nybbles[i]);
376    }
377   
378    std::vector<PabloAST *> basis(8);
379    for (unsigned i = 0; i < 4; i++) {
[5908]380        // The subtraction 7-bit is because of the confusion between
381        // little-endian and big-endian bit numbering of bytes.
382        // We should fix this, switching to little-endian numbering throughout.
[5889]383        basis[7-2*i] = scope1->createPackL(scope1->getInteger(2), bitpairs[i]);
384        basis[7-(2*i + 1)] = scope1->createPackH(scope1->getInteger(2), bitpairs[i]);
385    }
386   
387    cc::Parabix_CC_Compiler ccc(scope1, basis);
388    RE_Compiler re_compiler(scope1, ccc);
[5908]389    scope1->createAssign(final_matches, re_compiler.compile(mSuffixRE, prefixMatches));
[5889]390    Var * const output = getOutputStreamVar("matches");
[5908]391    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), final_matches);
[5889]392}
393
394
[5548]395void MatchedLinesKernel::generatePabloMethod() {
[5842]396    PabloBuilder pb(getEntryScope());
397    PabloAST * matchResults = pb.createExtract(getInputStreamVar("matchResults"), pb.getInteger(0));
398    PabloAST * lineBreaks = pb.createExtract(getInputStreamVar("lineBreaks"), pb.getInteger(0));
399    PabloAST * notLB = pb.createNot(lineBreaks);
400    PabloAST * match_follow = pb.createMatchStar(matchResults, notLB);
[5890]401    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(notLB, 1), "unterminatedLineAtEOF");
[5548]402    Var * const matchedLines = getOutputStreamVar("matchedLines");
[5890]403    pb.createAssign(pb.createExtract(matchedLines, pb.getInteger(0)), pb.createAnd(match_follow, pb.createOr(lineBreaks, unterminatedLineAtEOF)));
[5548]404}
405
406MatchedLinesKernel::MatchedLinesKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
407: PabloKernel(iBuilder, "MatchedLines",
[5782]408// inputs
409{Binding{iBuilder->getStreamSetTy(1), "matchResults"}
410,Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
411// output
[5890]412{Binding{iBuilder->getStreamSetTy(1), "matchedLines", FixedRate(), Add1()}}) {
[5782]413
[5548]414}
415
416
[5440]417void InvertMatchesKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
418    Value * input = iBuilder->loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
419    Value * lbs = iBuilder->loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
[5413]420    Value * inverted = iBuilder->CreateXor(input, lbs);
[5440]421    iBuilder->storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
[5413]422}
423
[5436]424InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder)
[5706]425: BlockOrientedKernel("Invert",
[5793]426// Inputs
427{Binding{builder->getStreamSetTy(1, 1), "matchedLines"}, Binding{builder->getStreamSetTy(1, 1), "lineBreaks"}},
428// Outputs
429{Binding{builder->getStreamSetTy(1, 1), "nonMatches"}},
430// Input/Output Scalars and internal state
431{}, {}, {}) {
432
[5413]433}
434
435
[5491]436void PopcountKernel::generatePabloMethod() {
[5836]437    auto pb = this->getEntryScope();
[5491]438    const auto toCount = pb->createExtract(getInputStreamVar("toCount"), pb->getInteger(0));
439    pablo::Var * countResult = getOutputScalarVar("countResult");
[5914]440   
441    pb->createAssign(countResult, pb->createCount(pb->createInFile(toCount)));
[5491]442}
443
[5436]444PopcountKernel::PopcountKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
[5413]445: PabloKernel(iBuilder, "Popcount",
[5836]446{Binding{iBuilder->getStreamSetTy(1), "toCount"}},
447{},
448{},
449{Binding{iBuilder->getSizeTy(), "countResult"}}) {
450
[5413]451}
[5989]452
453
454void AbortOnNull::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) {
455    Module * const m = b->getModule();
456    DataLayout DL(m);
457    IntegerType * const intPtrTy = DL.getIntPtrType(m->getContext());
458    Type * voidPtrTy = b->getVoidPtrTy();
459    const auto blocksPerStride = getStride() / b->getBitBlockWidth();
460    Constant * const BLOCKS_PER_STRIDE = b->getSize(blocksPerStride);
461    BasicBlock * const entry = b->GetInsertBlock();
462    BasicBlock * const strideLoop = b->CreateBasicBlock("strideLoop");
463    BasicBlock * const stridesDone = b->CreateBasicBlock("stridesDone");
464    BasicBlock * const nullByteDetection = b->CreateBasicBlock("nullByteDetection");
465    BasicBlock * const nullByteFound = b->CreateBasicBlock("nullByteFound");
466    BasicBlock * const finalStride = b->CreateBasicBlock("finalStride");
467    BasicBlock * const segmentDone = b->CreateBasicBlock("segmentDone");
468
469    Value * const numOfBlocks = b->CreateMul(numOfStrides, BLOCKS_PER_STRIDE);
470    Value * availItems = b->getAvailableItemCount("bytedata");
471    //
472    // Fast loop to prove that there are no null bytes in a multiblock region.
473    // We repeatedly combine byte packs using a SIMD unsigned min operation
474    // (implemented as a Select/ICmpULT combination).
475    //
476    Value * byteStreamBasePtr = b->getInputStreamBlockPtr("bytedata", b->getSize(0), b->getSize(0));
477    Value * outputStreamBasePtr = b->getOutputStreamBlockPtr("untilNull", b->getSize(0), b->getSize(0));
478
479    //
480    // We set up a a set of eight accumulators to accumulate the minimum byte
481    // values seen at each position in a block.   The initial min value at
482    // each position is 0xFF (all ones).
483    Value * blockMin[8];
484    for (unsigned i = 0; i < 8; i++) {
485        blockMin[i] = b->fwCast(8, b->allOnes());
486    }
487    // If we're in the final block bypass the fast loop.
488    b->CreateCondBr(mIsFinal, finalStride, strideLoop);
489   
490    b->SetInsertPoint(strideLoop);
491    PHINode * const baseBlockIndex = b->CreatePHI(b->getSizeTy(), 2);
492    baseBlockIndex->addIncoming(ConstantInt::get(baseBlockIndex->getType(), 0), entry);
493    PHINode * const blocksRemaining = b->CreatePHI(b->getSizeTy(), 2);
494    blocksRemaining->addIncoming(numOfBlocks, entry);
495    for (unsigned i = 0; i < 8; i++) {
496        Value * next = b->CreateBlockAlignedLoad(b->CreateGEP(byteStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
497        b->CreateBlockAlignedStore(next, b->CreateGEP(outputStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
498        next = b->fwCast(8, next);
499        blockMin[i] = b->CreateSelect(b->CreateICmpULT(next, blockMin[i]), next, blockMin[i]);
500    }
501    Value * nextBlockIndex = b->CreateAdd(baseBlockIndex, ConstantInt::get(baseBlockIndex->getType(), 1));
502    Value * nextRemaining = b->CreateSub(blocksRemaining, ConstantInt::get(blocksRemaining->getType(), 1));
503    baseBlockIndex->addIncoming(nextBlockIndex, strideLoop);
504    blocksRemaining->addIncoming(nextRemaining, strideLoop);
505    b->CreateCondBr(b->CreateICmpUGT(nextRemaining, ConstantInt::getNullValue(blocksRemaining->getType())), strideLoop, stridesDone);
506   
507    b->SetInsertPoint(stridesDone);
508    // Combine the 8 blockMin values.
509    for (unsigned i = 0; i < 4; i++) {
510        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+4]);
511    }
512    for (unsigned i = 0; i < 2; i++) {
513        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+2]);
514    }
515    blockMin[0] = b->CreateSelect(b->CreateICmpULT(blockMin[0], blockMin[1]), blockMin[0], blockMin[1]);
516    Value * anyNull = b->bitblock_any(b->simd_eq(8, blockMin[0], b->allZeroes()));
517   
518    b->CreateCondBr(anyNull, nullByteDetection, segmentDone);
519   
520   
521    b->SetInsertPoint(finalStride);
522    b->CreateMemCpy(b->CreatePointerCast(outputStreamBasePtr, voidPtrTy), b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), availItems, 1);
523    b->CreateBr(nullByteDetection);
524   
525    b->SetInsertPoint(nullByteDetection);
526    //  Find the exact location using memchr, which should be fast enough.
527    //
528    Value * ptrToNull = b->CreateMemChr(b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), b->getInt32(0), availItems);
529    Value * ptrAddr = b->CreatePtrToInt(ptrToNull, intPtrTy);
530    b->CreateCondBr(b->CreateICmpEQ(ptrAddr, ConstantInt::getNullValue(intPtrTy)), segmentDone, nullByteFound);
531   
532    // A null byte has been located; set the termination code and call the signal handler.
533    b->SetInsertPoint(nullByteFound);
534    Value * nullPosn = b->CreateSub(b->CreatePtrToInt(ptrToNull, intPtrTy), b->CreatePtrToInt(byteStreamBasePtr, intPtrTy));
535    b->setTerminationSignal();
536    Function * const dispatcher = m->getFunction("signal_dispatcher"); assert (dispatcher);
537    Value * handler = b->getScalarField("handler_address");
538    b->CreateCall(dispatcher, {handler, ConstantInt::get(b->getInt32Ty(), static_cast<unsigned>(grep::GrepSignal::BinaryFile))});
539    b->CreateBr(segmentDone);
540   
541    b->SetInsertPoint(segmentDone);
542    PHINode * const produced = b->CreatePHI(b->getSizeTy(), 3);
543    produced->addIncoming(nullPosn, nullByteFound);
544    produced->addIncoming(availItems, stridesDone);
545    produced->addIncoming(availItems, nullByteDetection);
546    Value * producedCount = b->getProducedItemCount("untilNull");
547    producedCount = b->CreateAdd(producedCount, produced);
548    b->setProducedItemCount("untilNull", producedCount);
549}
550
551AbortOnNull::AbortOnNull(const std::unique_ptr<kernel::KernelBuilder> & b)
552: MultiBlockKernel("AbortOnNull",
553                   // inputs
554{Binding{b->getStreamSetTy(1, 8), "bytedata"}},
555                   // outputs
556{Binding{b->getStreamSetTy(1, 8), "untilNull", FixedRate(), Deferred()}},
557                   // input scalars
558{Binding{b->getIntAddrTy(), "handler_address"}},
559{}, {}) {
560    addAttribute(CanTerminateEarly());
561}
Note: See TracBrowser for help on using the repository browser.