source: icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp @ 6184

Last change on this file since 6184 was 6184, checked in by nmedfort, 5 months ago

Initial version of PipelineKernel? + revised StreamSet? model.

File size: 25.2 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "grep_kernel.h"
7#include <re/printer_re.h>
8#include <re/re_cc.h>
9#include <re/re_name.h>
10#include <re/re_toolchain.h>
11#include <re/re_reverse.h>
12#include <grep/grep_engine.h>
13#include <pablo/codegenstate.h>
14#include <pablo/pablo_toolchain.h>
15#include <kernels/kernel_builder.h>
16#include <pablo/builder.hpp>
17#include <pablo/pe_ones.h>          // for Ones
18#include <pablo/pe_var.h>           // for Var
19#include <pablo/pe_zeroes.h>        // for Zeroes
20#include <pablo/pe_infile.h>
21#include <pablo/boolean.h>
22#include <pablo/pe_count.h>
23#include <pablo/pe_matchstar.h>
24#include <pablo/pe_pack.h>
25#include <cc/cc_compiler.h>         // for CC_Compiler
26#include <cc/alphabet.h>
27#include <cc/multiplex_CCs.h>
28#include <re/re_compiler.h>
29#include <UCD/ucd_compiler.hpp>
30#include <llvm/IR/Module.h>
31#include <llvm/Support/raw_ostream.h>
32
33using namespace kernel;
34using namespace pablo;
35using namespace re;
36using namespace llvm;
37
38UnicodeLineBreakKernel::UnicodeLineBreakKernel(const std::unique_ptr<kernel::KernelBuilder> & kb)
39: PabloKernel(kb,
40              "UTF8_LB",
41              {Binding{kb->getStreamSetTy(8), "basis"}, Binding{kb->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
42              {Binding{kb->getStreamSetTy(1, 1), "UTF8_LB", FixedRate()}}) {
43}
44
45void UnicodeLineBreakKernel::generatePabloMethod() {
46    PabloBuilder pb(getEntryScope());
47    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
48    UCD::UCDCompiler ucdCompiler(ccc);
49   
50    Name * breakChars = re::makeName("breakChars", makeCC(makeCC(makeCC(0x0A, 0x0D), makeCC(0x85)), makeCC(0x2028,0x2029)));
51    UCD::UCDCompiler::NameMap nameMap;
52    nameMap.emplace(breakChars, nullptr);
53    ucdCompiler.generateWithDefaultIfHierarchy(nameMap, pb);
54    auto f = nameMap.find(breakChars);
55    if (f == nameMap.end()) llvm::report_fatal_error("UnicodeLineBreakKernel compilation failure");
56    PabloAST * breakStream = f-> second;
57    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
58    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
59    Var * const CR_before_LF = pb.createVar("CR_before_LFCR_before_LF", pb.createZeroes());
60    auto crb = pb.createScope();
61    pb.createIf(CR, crb);
62    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
63    crb.createAssign(CR_before_LF, crb.createAnd(CR, lookaheadLF));
64    breakStream = pb.createXor(breakStream, CR_before_LF);  // Remove CR_before_LF from breakStream
65    Var * const UTF8_LB = getOutputStreamVar("UTF8_LB");
66    pb.createAssign(pb.createExtract(UTF8_LB, pb.getInteger(0)), breakStream);
67}
68
69void RequiredStreams_UTF8::generatePabloMethod() {
70    PabloBuilder pb(getEntryScope());
71    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
72   
73    PabloAST * const LF = pb.createExtract(getInput(1), pb.getInteger(0), "LF");
74    PabloAST * const CR = ccc.compileCC(makeByte(0x0D));
75    PabloAST * const LF_VT_FF_CR = ccc.compileCC("LF,VT,FF,CR", makeByte(0x0A, 0x0D), pb);
76    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
77   
78    // Remove the CR of any CR+LF
79    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
80    auto crb = pb.createScope();
81    pb.createIf(CR, crb);
82    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
83    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
84    crb.createAssign(CRLF, crlf);
85    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
86    crb.createAssign(LineBreak, removedCRLF);
87
88   
89    Zeroes * const ZEROES = pb.createZeroes();
90    PabloAST * const u8pfx = ccc.compileCC(makeByte(0xC0, 0xFF));
91
92
93    Var * const nonFinal = pb.createVar("nonFinal", u8pfx);
94    Var * const u8invalid = pb.createVar("u8invalid", ZEROES);
95    Var * const valid_pfx = pb.createVar("valid_pfx", u8pfx);
96
97    auto it = pb.createScope();
98    pb.createIf(u8pfx, it);
99    PabloAST * const u8pfx2 = ccc.compileCC(makeByte(0xC2, 0xDF), it);
100    PabloAST * const u8pfx3 = ccc.compileCC(makeByte(0xE0, 0xEF), it);
101    PabloAST * const u8pfx4 = ccc.compileCC(makeByte(0xF0, 0xF4), it);
102    PabloAST * const u8suffix = ccc.compileCC("u8suffix", makeByte(0x80, 0xBF), it);
103   
104    //
105    // Two-byte sequences
106    Var * const anyscope = it.createVar("anyscope", ZEROES);
107    auto it2 = it.createScope();
108    it.createIf(u8pfx2, it2);
109    it2.createAssign(anyscope, it2.createAdvance(u8pfx2, 1));
110    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeByte(0xC2), it2), 1), ccc.compileCC(makeByte(0x85), it2), "NEL");
111    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
112
113
114    //
115    // Three-byte sequences   
116    Var * const EF_invalid = it.createVar("EF_invalid", ZEROES);
117    auto it3 = it.createScope();
118    it.createIf(u8pfx3, it3);
119    PabloAST * const u8scope32 = it3.createAdvance(u8pfx3, 1);
120    it3.createAssign(nonFinal, it3.createOr(nonFinal, u8scope32));
121    PabloAST * const u8scope33 = it3.createAdvance(u8pfx3, 2);
122    PabloAST * const u8scope3X = it3.createOr(u8scope32, u8scope33);
123    it3.createAssign(anyscope, it3.createOr(anyscope, u8scope3X));
124    PabloAST * const E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE0), it3), 1), ccc.compileCC(makeByte(0x80, 0x9F), it3));
125    PabloAST * const ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xED), it3), 1), ccc.compileCC(makeByte(0xA0, 0xBF), it3));
126    PabloAST * const EX_invalid = it3.createOr(E0_invalid, ED_invalid);
127    it3.createAssign(EF_invalid, EX_invalid);
128    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeByte(0xE2), it3), 1), ccc.compileCC(makeByte(0x80), it3));
129    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeByte(0xA8,0xA9), it3), "LS_PS");
130    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
131
132    //
133    // Four-byte sequences
134    auto it4 = it.createScope();
135    it.createIf(u8pfx4, it4);
136    PabloAST * const u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
137    PabloAST * const u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
138    PabloAST * const u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
139    PabloAST * const u8scope4nonfinal = it4.createOr(u8scope42, u8scope43);
140    it4.createAssign(nonFinal, it4.createOr(nonFinal, u8scope4nonfinal));
141    PabloAST * const u8scope4X = it4.createOr(u8scope4nonfinal, u8scope44);
142    it4.createAssign(anyscope, it4.createOr(anyscope, u8scope4X));
143    PabloAST * const F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeByte(0xF0), it4), 1), ccc.compileCC(makeByte(0x80, 0x8F), it4));
144    PabloAST * const F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeByte(0xF4), it4), 1), ccc.compileCC(makeByte(0x90, 0xBF), it4));
145    PabloAST * const FX_invalid = it4.createOr(F0_invalid, F4_invalid);
146    it4.createAssign(EF_invalid, it4.createOr(EF_invalid, FX_invalid));
147   
148    //
149    // Invalid cases
150    PabloAST * const legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
151    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
152    //  a scope is a mismatch, i.e., invalid UTF-8.
153    PabloAST * const mismatch = it.createXor(anyscope, u8suffix);
154    //
155    PabloAST * const pfx_invalid = it.createXor(valid_pfx, legalpfx);
156    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
157    PabloAST * const u8valid = it.createNot(u8invalid, "u8valid");
158    //
159    //
160    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
161    pb.createAssign(nonFinal, pb.createOr(nonFinal, CRLF));
162    //PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
163   
164    Var * const required = getOutputStreamVar("nonFinal");
165    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), nonFinal);
166    pb.createAssign(pb.createExtract(getOutputStreamVar("UnicodeLB"), pb.getInteger(0)), LineBreak);//pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
167}
168
169RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb, StreamSet * BasisBits, StreamSet * LineFeedStream, StreamSet * RequiredStreams, StreamSet * UnicodeLB)
170: PabloKernel(kb, "RequiredStreams_UTF8",
171// input
172{Binding{"basis", BasisBits},
173 Binding{"lf", LineFeedStream, FixedRate(), LookAhead(1)}},
174// output
175{Binding{"nonFinal", RequiredStreams, FixedRate()},
176 Binding{"UnicodeLB", UnicodeLB, FixedRate()}}) {
177
178}
179
180void RequiredStreams_UTF16::generatePabloMethod() {
181    PabloBuilder pb(getEntryScope());
182    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"));
183   
184    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF, &cc::UTF16));    //u16hi_hi_surrogate = [\xD8-\xDB]
185    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF, &cc::UTF16));    //u16hi_lo_surrogate = [\xDC-\xDF]
186   
187    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
188    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
189
190    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
191    PabloAST * nonFinal = pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal");
192
193    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF, &cc::UTF16)), ccc.compileCC(makeCC(0xE000, 0xFFFF, &cc::UTF16)));
194    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
195
196    PabloAST * const nonFinalCodeUnits = pb.createExtract(getInput(1), pb.getInteger(0));
197    PabloAST * const initial = pb.createOr(u16single, u16hi_hi_surrogate, "initial");
198    PabloAST * const final = pb.createNot(pb.createOr(pb.createOr(u16hi_hi_surrogate, u16invalid), nonFinalCodeUnits), "final");
199
200    Var * const required = getOutputStreamVar("required");
201    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
202    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
203    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
204
205}
206
207RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
208: PabloKernel(kb, "RequiredStreams_UTF16",               
209// inputs
210{Binding{kb->getStreamSetTy(8), "basis"}},
211// output
212{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
213
214}
215
216ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
217: mRE(re_ast)
218, mSignature(Printer_RE::PrintRE(mRE)) {
219
220}
221
222// Helper to compute stream set inputs to pass into PabloKernel constructor.
223Bindings ICGrepKernel::makeInputBindings(StreamSet * const basis, const Externals & externals, const Alphabets & alphabets) {
224    Bindings inputs;
225    inputs.emplace_back("basis", basis);
226    for (const auto & e : externals) {
227        inputs.emplace_back(e.first, e.second);
228    }
229    for (const auto & a : alphabets) {
230        inputs.emplace_back(a.first->getName() + "_basis", a.second);
231    }
232    return inputs;
233}
234
235ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b,
236                           RE * const re,
237                           StreamSet * const BasisBits,
238                           StreamSet * const matches,
239                           const Externals externals,
240                           const Alphabets alphabets,
241                           const cc::BitNumbering basisSetNumbering,
242                           const bool cachable)
243: ICGrepSignature(re)
244, PabloKernel(b, "ic" + getStringHash(mSignature),
245// inputs
246std::move(makeInputBindings(BasisBits, externals, alphabets)),
247// output
248{Binding{"matches", matches, FixedRate(), Add1()}})
249, mExternals(std::move(externals))
250, mAlphabets(std::move(alphabets))
251, mBasisSetNumbering(basisSetNumbering)
252, mIsCachable(cachable) {
253}
254
255std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
256    return mSignature;
257}
258
259void ICGrepKernel::generatePabloMethod() {
260    PabloBuilder pb(getEntryScope());
261    cc::Parabix_CC_Compiler ccc(getEntryScope(), getInputStreamSet("basis"), mBasisSetNumbering);
262    RE_Compiler re_compiler(getEntryScope(), ccc, mBasisSetNumbering);
263    for (const auto & e : mExternals) {
264        re_compiler.addPrecompiled(e.first, pb.createExtract(getInputStreamVar(e.first), pb.getInteger(0)));
265    }
266    for (const auto & a : mAlphabets) {
267        auto & alpha = a.first;
268        auto mpx_basis = getInputStreamSet(alpha->getName() + "_basis");
269        re_compiler.addAlphabet(alpha, mpx_basis);
270    }
271    PabloAST * const matches = re_compiler.compile(mRE);
272    Var * const output = getOutputStreamVar("matches");
273    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
274}
275
276
277ByteGrepSignature::ByteGrepSignature(RE * re)
278: mRE(re)
279, mSignature(Printer_RE::PrintRE(re) ) {
280}
281
282ByteGrepKernel::ByteGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const re, std::vector<Binding> inputSets, StreamSet * matches)
283: ByteGrepSignature(re)
284, PabloKernel(b, "byteGrep" + getStringHash(mSignature),
285// inputs
286std::move(inputSets),
287// output
288{Binding{"matches", matches, FixedRate(), Add1()}}) {
289
290}
291
292std::string ByteGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
293    return mSignature;
294}
295
296void ByteGrepKernel::generatePabloMethod() {
297    PabloBuilder pb(getEntryScope());
298    PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
299    cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
300    RE_Compiler re_byte_compiler(getEntryScope(), dcc);
301    const auto numOfInputs = getNumOfInputs();
302    for (unsigned i = 1; i < numOfInputs; ++i) {
303        const Binding & input = getInputStreamSetBinding(i);
304        re_byte_compiler.addPrecompiled(input.getName(), pb.createExtract(getInputStreamVar(input.getName()), pb.getInteger(0)));
305    }
306    PabloAST * const matches = re_byte_compiler.compile(mRE);   
307    Var * const output = getOutputStreamVar("matches");
308    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), matches);
309}
310
311// Helper to compute stream set inputs to pass into PabloKernel constructor.
312inline std::vector<Binding> byteBitGrepInputs(const std::unique_ptr<kernel::KernelBuilder> & b,
313                                              const std::vector<std::string> & externals) {
314    std::vector<Binding> streamSetInputs = {
315        Binding{b->getStreamSetTy(1, 8), "byteData"},
316    };
317    for (auto & e : externals) {
318        streamSetInputs.push_back(Binding{b->getStreamSetTy(1, 1), e});
319    }
320    return streamSetInputs;
321}
322
323ByteBitGrepSignature::ByteBitGrepSignature(RE * prefix, RE * suffix)
324: mPrefixRE(prefix)
325, mSuffixRE(suffix)
326, mSignature(Printer_RE::PrintRE(mPrefixRE) + Printer_RE::PrintRE(mSuffixRE) ) {
327}
328
329ByteBitGrepKernel::ByteBitGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & b, RE * const prefixRE, RE * const suffixRE, std::vector<Binding> inputSets, StreamSet * matches)
330: ByteBitGrepSignature(prefixRE, suffixRE)
331, PabloKernel(b, "bBc" + getStringHash(mSignature),
332// inputs
333std::move(inputSets),
334// output
335{Binding{"matches", matches, FixedRate(), Add1()}}) {
336
337}
338
339std::string ByteBitGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
340    return mSignature;
341}
342
343
344void ByteBitGrepKernel::generatePabloMethod() {
345    PabloBuilder pb(getEntryScope());
346    PabloAST * u8bytes = pb.createExtract(getInput(0), pb.getInteger(0));
347    cc::Direct_CC_Compiler dcc(getEntryScope(), u8bytes);
348    RE_Compiler re_byte_compiler(getEntryScope(), dcc);
349
350    const auto numOfInputs = getNumOfInputs();
351    for (unsigned i = 1; i < numOfInputs; ++i) {
352        const Binding & input = getInputStreamSetBinding(i);
353        re_byte_compiler.addPrecompiled(input.getName(), pb.createExtract(getInputStreamVar(input.getName()), pb.getInteger(0)));
354    }
355
356    PabloAST * const prefixMatches = re_byte_compiler.compile(mPrefixRE);
357    Var * const final_matches = pb.createVar("final_matches", pb.createZeroes());
358    PabloBlock * scope1 = getEntryScope()->createScope();
359    pb.createIf(prefixMatches, scope1);
360   
361    PabloAST * nybbles[2];
362    nybbles[0] = scope1->createPackL(scope1->getInteger(8), u8bytes);
363    nybbles[1] = scope1->createPackH(scope1->getInteger(8), u8bytes);
364   
365    PabloAST * bitpairs[4];
366    for (unsigned i = 0; i < 2; i++) {
367        bitpairs[2*i] = scope1->createPackL(scope1->getInteger(4), nybbles[i]);
368        bitpairs[2*i + 1] = scope1->createPackH(scope1->getInteger(4), nybbles[i]);
369    }
370   
371    std::vector<PabloAST *> basis(8);
372    for (unsigned i = 0; i < 4; i++) {
373        // The subtraction 7-bit is because of the confusion between
374        // little-endian and big-endian bit numbering of bytes.
375        // We should fix this, switching to little-endian numbering throughout.
376        basis[7-2*i] = scope1->createPackL(scope1->getInteger(2), bitpairs[i]);
377        basis[7-(2*i + 1)] = scope1->createPackH(scope1->getInteger(2), bitpairs[i]);
378    }
379   
380    cc::Parabix_CC_Compiler ccc(scope1, basis);
381    RE_Compiler re_compiler(scope1, ccc);
382    scope1->createAssign(final_matches, re_compiler.compile(mSuffixRE, prefixMatches));
383    Var * const output = getOutputStreamVar("matches");
384    pb.createAssign(pb.createExtract(output, pb.getInteger(0)), final_matches);
385}
386
387
388void MatchedLinesKernel::generatePabloMethod() {
389    PabloBuilder pb(getEntryScope());
390    PabloAST * matchResults = pb.createExtract(getInputStreamVar("matchResults"), pb.getInteger(0));
391    PabloAST * lineBreaks = pb.createExtract(getInputStreamVar("lineBreaks"), pb.getInteger(0));
392    PabloAST * notLB = pb.createNot(lineBreaks);
393    PabloAST * match_follow = pb.createMatchStar(matchResults, notLB);
394    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(notLB, 1), "unterminatedLineAtEOF");
395    Var * const matchedLines = getOutputStreamVar("matchedLines");
396    pb.createAssign(pb.createExtract(matchedLines, pb.getInteger(0)), pb.createAnd(match_follow, pb.createOr(lineBreaks, unterminatedLineAtEOF)));
397}
398
399MatchedLinesKernel::MatchedLinesKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder, StreamSet * OriginalMatches, StreamSet * LineBreakStream, StreamSet * Matches)
400: PabloKernel(iBuilder, "MatchedLines",
401// inputs
402{Binding{"matchResults", OriginalMatches}
403,Binding{"lineBreaks", LineBreakStream}},
404// output
405{Binding{"matchedLines", Matches, FixedRate(), Add1()}}) {
406
407}
408
409
410void InvertMatchesKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
411    Value * input = iBuilder->loadInputStreamBlock("matchedLines", iBuilder->getInt32(0));
412    Value * lbs = iBuilder->loadInputStreamBlock("lineBreaks", iBuilder->getInt32(0));
413    Value * inverted = iBuilder->CreateXor(input, lbs);
414    iBuilder->storeOutputStreamBlock("nonMatches", iBuilder->getInt32(0), inverted);
415}
416
417InvertMatchesKernel::InvertMatchesKernel(const std::unique_ptr<kernel::KernelBuilder> & builder, StreamSet * OriginalMatches, StreamSet * LineBreakStream, StreamSet * Matches)
418: BlockOrientedKernel("Invert",
419// Inputs
420{Binding{"matchedLines", OriginalMatches},
421 Binding{"lineBreaks", LineBreakStream}},
422// Outputs
423{Binding{"nonMatches", Matches}},
424// Input/Output Scalars and internal state
425{}, {}, {}) {
426
427}
428
429
430void PopcountKernel::generatePabloMethod() {
431    auto pb = getEntryScope();
432    const auto toCount = pb->createExtract(getInputStreamVar("toCount"), pb->getInteger(0));
433    pablo::Var * countResult = getOutputScalarVar("countResult");
434   
435    pb->createAssign(countResult, pb->createCount(pb->createInFile(toCount)));
436}
437
438PopcountKernel::PopcountKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder, StreamSet * const toCount, Scalar * countResult)
439: PabloKernel(iBuilder, "Popcount",
440{Binding{"toCount", toCount}},
441{},
442{},
443{Binding{"countResult", countResult}}) {
444
445}
446
447
448void AbortOnNull::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) {
449    Module * const m = b->getModule();
450    DataLayout DL(m);
451    IntegerType * const intPtrTy = DL.getIntPtrType(m->getContext());
452    Type * voidPtrTy = b->getVoidPtrTy();
453    const auto blocksPerStride = getStride() / b->getBitBlockWidth();
454    Constant * const BLOCKS_PER_STRIDE = b->getSize(blocksPerStride);
455    BasicBlock * const entry = b->GetInsertBlock();
456    BasicBlock * const strideLoop = b->CreateBasicBlock("strideLoop");
457    BasicBlock * const stridesDone = b->CreateBasicBlock("stridesDone");
458    BasicBlock * const nullByteDetection = b->CreateBasicBlock("nullByteDetection");
459    BasicBlock * const nullByteFound = b->CreateBasicBlock("nullByteFound");
460    BasicBlock * const finalStride = b->CreateBasicBlock("finalStride");
461    BasicBlock * const segmentDone = b->CreateBasicBlock("segmentDone");
462
463    Value * const numOfBlocks = b->CreateMul(numOfStrides, BLOCKS_PER_STRIDE);
464    Value * availItems = b->getAvailableItemCount("byteData");
465    //
466    // Fast loop to prove that there are no null bytes in a multiblock region.
467    // We repeatedly combine byte packs using a SIMD unsigned min operation
468    // (implemented as a Select/ICmpULT combination).
469    //
470    Value * byteStreamBasePtr = b->getInputStreamBlockPtr("byteData", b->getSize(0), b->getSize(0));
471    Value * outputStreamBasePtr = b->getOutputStreamBlockPtr("untilNull", b->getSize(0), b->getSize(0));
472
473    //
474    // We set up a a set of eight accumulators to accumulate the minimum byte
475    // values seen at each position in a block.   The initial min value at
476    // each position is 0xFF (all ones).
477    Value * blockMin[8];
478    for (unsigned i = 0; i < 8; i++) {
479        blockMin[i] = b->fwCast(8, b->allOnes());
480    }
481    // If we're in the final block bypass the fast loop.
482    b->CreateCondBr(mIsFinal, finalStride, strideLoop);
483   
484    b->SetInsertPoint(strideLoop);
485    PHINode * const baseBlockIndex = b->CreatePHI(b->getSizeTy(), 2);
486    baseBlockIndex->addIncoming(ConstantInt::get(baseBlockIndex->getType(), 0), entry);
487    PHINode * const blocksRemaining = b->CreatePHI(b->getSizeTy(), 2);
488    blocksRemaining->addIncoming(numOfBlocks, entry);
489    for (unsigned i = 0; i < 8; i++) {
490        Value * next = b->CreateBlockAlignedLoad(b->CreateGEP(byteStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
491        b->CreateBlockAlignedStore(next, b->CreateGEP(outputStreamBasePtr, {baseBlockIndex, b->getSize(i)}));
492        next = b->fwCast(8, next);
493        blockMin[i] = b->CreateSelect(b->CreateICmpULT(next, blockMin[i]), next, blockMin[i]);
494    }
495    Value * nextBlockIndex = b->CreateAdd(baseBlockIndex, ConstantInt::get(baseBlockIndex->getType(), 1));
496    Value * nextRemaining = b->CreateSub(blocksRemaining, ConstantInt::get(blocksRemaining->getType(), 1));
497    baseBlockIndex->addIncoming(nextBlockIndex, strideLoop);
498    blocksRemaining->addIncoming(nextRemaining, strideLoop);
499    b->CreateCondBr(b->CreateICmpUGT(nextRemaining, ConstantInt::getNullValue(blocksRemaining->getType())), strideLoop, stridesDone);
500   
501    b->SetInsertPoint(stridesDone);
502    // Combine the 8 blockMin values.
503    for (unsigned i = 0; i < 4; i++) {
504        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+4]);
505    }
506    for (unsigned i = 0; i < 2; i++) {
507        blockMin[i] = b->CreateSelect(b->CreateICmpULT(blockMin[i], blockMin[i+4]), blockMin[i], blockMin[i+2]);
508    }
509    blockMin[0] = b->CreateSelect(b->CreateICmpULT(blockMin[0], blockMin[1]), blockMin[0], blockMin[1]);
510    Value * anyNull = b->bitblock_any(b->simd_eq(8, blockMin[0], b->allZeroes()));
511   
512    b->CreateCondBr(anyNull, nullByteDetection, segmentDone);
513   
514   
515    b->SetInsertPoint(finalStride);
516    b->CreateMemCpy(b->CreatePointerCast(outputStreamBasePtr, voidPtrTy), b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), availItems, 1);
517    b->CreateBr(nullByteDetection);
518   
519    b->SetInsertPoint(nullByteDetection);
520    //  Find the exact location using memchr, which should be fast enough.
521    //
522    Value * ptrToNull = b->CreateMemChr(b->CreatePointerCast(byteStreamBasePtr, voidPtrTy), b->getInt32(0), availItems);
523    Value * ptrAddr = b->CreatePtrToInt(ptrToNull, intPtrTy);
524    b->CreateCondBr(b->CreateICmpEQ(ptrAddr, ConstantInt::getNullValue(intPtrTy)), segmentDone, nullByteFound);
525   
526    // A null byte has been located; set the termination code and call the signal handler.
527    b->SetInsertPoint(nullByteFound);
528    Value * nullPosn = b->CreateSub(b->CreatePtrToInt(ptrToNull, intPtrTy), b->CreatePtrToInt(byteStreamBasePtr, intPtrTy));
529    b->setTerminationSignal();
530    Function * const dispatcher = m->getFunction("signal_dispatcher"); assert (dispatcher);
531    Value * handler = b->getScalarField("handler_address");
532    b->CreateCall(dispatcher, {handler, ConstantInt::get(b->getInt32Ty(), static_cast<unsigned>(grep::GrepSignal::BinaryFile))});
533    b->CreateBr(segmentDone);
534   
535    b->SetInsertPoint(segmentDone);
536    PHINode * const produced = b->CreatePHI(b->getSizeTy(), 3);
537    produced->addIncoming(nullPosn, nullByteFound);
538    produced->addIncoming(availItems, stridesDone);
539    produced->addIncoming(availItems, nullByteDetection);
540    Value * producedCount = b->getProducedItemCount("untilNull");
541    producedCount = b->CreateAdd(producedCount, produced);
542    b->setProducedItemCount("untilNull", producedCount);
543}
544
545AbortOnNull::AbortOnNull(const std::unique_ptr<kernel::KernelBuilder> &, StreamSet * const InputStream, StreamSet * const OutputStream, Scalar * callbackObject)
546: MultiBlockKernel("AbortOnNull",
547// inputs
548{Binding{"byteData", InputStream, FixedRate(), Principal()}},
549// outputs
550{Binding{ "untilNull", OutputStream, FixedRate(), Deferred()}},
551// input scalars
552{Binding{"handler_address", callbackObject}},
553{}, {}) {
554    addAttribute(CanTerminateEarly());
555}
Note: See TracBrowser for help on using the repository browser.