source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6043

Last change on this file since 6043 was 6043, checked in by xwa163, 12 months ago

Init checkin for lz4_grep count-only pipeline with multiplexing

File size: 25.4 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/lz4/lz4_fake_stream_generating_kernel.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/bitstream_gather_pdep_kernel.h>
27#include <re/re_toolchain.h>
28
29#include <re/collect_ccs.h>
30#include <re/replaceCC.h>
31
32#include <UCD/resolve_properties.h>
33#include <kernels/charclasses.h>
34#include <kernels/grep_kernel.h>
35#include <kernels/UCD_property_kernel.h>
36#include <kernels/grapheme_kernel.h>
37#include <kernels/linebreak_kernel.h>
38#include <kernels/streams_merge.h>
39#include <kernels/scanmatchgen.h>
40#include <kernels/until_n.h>
41#include <re/casing.h>
42#include <re/exclude_CC.h>
43#include <re/to_utf8.h>
44#include <re/re_analysis.h>
45#include <re/re_name_resolve.h>
46#include <re/re_name_gather.h>
47#include <re/re_multiplex.h>
48#include <re/re_utility.h>
49#include <re/grapheme_clusters.h>
50#include <re/printer_re.h>
51#include <llvm/Support/raw_ostream.h>
52#include <llvm/Support/Debug.h>
53
54
55
56namespace re { class CC; }
57
58using namespace llvm;
59using namespace parabix;
60using namespace kernel;
61using namespace grep;
62
63LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
64    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
65    mMoveMatchesToEOL = true;
66}
67
68void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
69    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
70        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
71    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
72        mBreakCC = re::makeByte(0);  // Null
73    } else {
74        mBreakCC = re::makeByte(0x0A); // LF
75    }
76    re::RE * anchorRE = mBreakCC;
77    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
78        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
79        anchorName->setDefinition(re::makeUnicodeBreak());
80        anchorRE = anchorName;
81    }
82
83    mREs = REs;
84    bool allAnchored = true;
85    for(unsigned i = 0; i < mREs.size(); ++i) {
86        if (!hasEndAnchor(mREs[i])) allAnchored = false;
87        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
88        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
89        mREs[i] = resolveAnchors(mREs[i], anchorRE);
90        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
91        mREs[i] = regular_expression_passes(mREs[i]);
92    }
93    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
94
95}
96
97
98parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
99//    auto mGrepDriver = &mPxDriver;
100    const unsigned baseBufferSize = this->getInputBufferBlocks();
101    auto & idb = mPxDriver.getBuilder();
102    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
103    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
104    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
105    return LineFeedStream;
106}
107
108parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
109    auto mGrepDriver = &mPxDriver;
110    auto & idb = mGrepDriver->getBuilder();
111
112    // Extract (Deletion)
113    this->generateCompressionMarker(idb);
114
115    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
116    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks());
117
118    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
119    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
120
121    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
122    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
123    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
124
125    // Deposit
126    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
127    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
128    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
129
130    // Match Copy
131    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
132    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
133    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
134
135    return matchCopiedBits;
136}
137
138parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
139    auto mGrepDriver = &mPxDriver;
140    const unsigned baseBufferSize = this->getInputBufferBlocks();
141    auto & idb = mGrepDriver->getBuilder();
142
143    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
144    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
145    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
146    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
147}
148
149
150std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs) {
151
152    this->initREs(REs);
153    auto mGrepDriver = &mPxDriver;
154
155    auto & idb = mGrepDriver->getBuilder();
156    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
157    const unsigned baseBufferSize = this->getInputBufferBlocks();
158    bool CC_Multiplexing = true;
159    int MaxCountFlag = 0;
160
161    //  Regular Expression Processing and Analysis Phase
162    const auto nREs = mREs.size();
163
164    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
165    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
166
167//    StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(matchCopiedBasisBits);
168    StreamSetBuffer * LineFeedStream = this->linefeedStreamFromCompressedBits();
169
170    if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
171        LineBreakStream = LineFeedStream;
172    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
173        // TODO fix here
174//        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
175//        mGrepDriver->makeKernelCall(breakK, {matchCopiedBasisBits}, {LineBreakStream});
176    }
177
178    std::map<std::string, StreamSetBuffer *> propertyStream;
179
180    for(unsigned i = 0; i < nREs; ++i) {
181        std::vector<std::string> externalStreamNames;
182
183        std::set<re::Name *> UnicodeProperties;
184
185
186        const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
187        StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
188
189        mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
190        mREs[i] = transformCCs(mpx.get(), mREs[i]);
191        std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
192        auto numOfCharacterClasses = mpx_basis.size();
193        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
194        kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
195        mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses}); //TODO get it from compression space
196
197        StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
198        //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
199        //                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {CharClasses});
200
201        StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
202        Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
203        mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
204
205        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
206        mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
207        MatchResultsBufs[i] = MatchResults;
208
209    }
210
211    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
212    if (mREs.size() > 1) {
213        MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
214        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
215        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
216    }
217    StreamSetBuffer * Matches = MergedResults;
218    if (mMoveMatchesToEOL) {
219        StreamSetBuffer * OriginalMatches = Matches;
220        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
221        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
222        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
223    }
224
225    if (MaxCountFlag > 0) {
226        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
227        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
228        StreamSetBuffer * const AllMatches = Matches;
229        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
230        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
231    }
232
233    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
234};
235
236std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
237        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
238
239    this->initREs(REs);
240    auto mGrepDriver = &mPxDriver;
241
242    auto & idb = mGrepDriver->getBuilder();
243    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
244    const unsigned baseBufferSize = this->getInputBufferBlocks();
245    int MaxCountFlag = 0;
246
247    //  Regular Expression Processing and Analysis Phase
248    const auto nREs = mREs.size();
249
250    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
251    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
252
253    StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
254    StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
255
256    StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
257
258    kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
259    mGrepDriver->makeKernelCall(requiredStreamsK, {decompressedBasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
260
261    if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
262        LineBreakStream = LineFeedStream;
263    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
264        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
265        mGrepDriver->makeKernelCall(breakK, {decompressedBasisBits}, {LineBreakStream});
266    } else {
267        LineBreakStream = UnicodeLB;
268    }
269
270    std::map<std::string, StreamSetBuffer *> propertyStream;
271
272    for(unsigned i = 0; i < nREs; ++i) {
273        std::vector<std::string> externalStreamNames;
274        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
275        if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
276            externalStreamNames.push_back("UTF8_LB");
277            icgrepInputSets.push_back(LineBreakStream);
278            externalStreamNames.push_back("UTF8_nonfinal");
279            icgrepInputSets.push_back(RequiredStreams);
280        }
281        std::set<re::Name *> UnicodeProperties;
282
283        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
284        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
285        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
286        MatchResultsBufs[i] = MatchResults;
287    }
288
289    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
290    if (mREs.size() > 1) {
291        MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
292        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
293        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
294    }
295    StreamSetBuffer * Matches = MergedResults;
296    if (mMoveMatchesToEOL) {
297        StreamSetBuffer * OriginalMatches = Matches;
298        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
299        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
300        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
301    }
302
303    if (MaxCountFlag > 0) {
304        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
305        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
306        StreamSetBuffer * const AllMatches = Matches;
307        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
308        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
309    }
310
311    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
312
313}
314
315void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
316    auto main = this->getScanMatchGrepMainFunction();
317    std::ostringstream s;
318    EmitMatch accum("", false, false, s);
319
320    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
321    llvm::outs() << s.str();
322}
323
324void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
325    auto & iBuilder = mPxDriver.getBuilder();
326    this->generateScanMatchMainFunc(iBuilder);
327
328    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
329
330    // GeneratePipeline
331    this->generateLoadByteStreamAndBitStream(iBuilder);
332    this->generateExtractAndDepositMarkers(iBuilder);
333
334    auto swizzle = this->generateSwizzleExtractData(iBuilder);
335
336    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
337    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
338
339    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
340    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
341
342    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
343    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
344
345    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
346    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
347
348    // Produce unswizzled bit streams
349    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
350    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
351    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
352
353    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
354    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
355
356    StreamSetBuffer * LineBreakStream;
357    StreamSetBuffer * Matches;
358    std::vector<re::RE*> res = {regex};
359    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
360
361    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
362    scanMatchK->setInitialArguments({match_accumulator});
363    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
364    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
365    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
366
367    mPxDriver.generatePipelineIR();
368    mPxDriver.deallocateBuffers();
369
370    iBuilder->CreateRetVoid();
371
372    mPxDriver.finalizeObject();
373}
374
375void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
376    auto & iBuilder = mPxDriver.getBuilder();
377    this->generateMainFunc(iBuilder);
378
379    // GeneratePipeline
380    this->generateLoadByteStreamAndBitStream(iBuilder);
381    this->generateExtractAndDepositMarkers(iBuilder);
382
383    StreamSetBuffer * LineBreakStream;
384    StreamSetBuffer * Matches;
385    std::vector<re::RE*> res = {regex};
386    if (mEnableMultiplexing) {
387        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
388    } else {
389        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
390        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
391        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
392        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
393
394        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
395        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
396        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
397
398        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
399    };
400
401    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
402    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
403    mPxDriver.generatePipelineIR();
404
405    iBuilder->setKernel(matchCountK);
406    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
407    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
408    iBuilder->CallPrintInt("aaa", matchedLineCount);
409
410    mPxDriver.deallocateBuffers();
411
412    // TODO return matchedLineCount
413//        idb->CreateRet(matchedLineCount);
414
415    iBuilder->CreateRetVoid();
416
417    mPxDriver.finalizeObject();
418}
419
420
421void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
422    auto & iBuilder = mPxDriver.getBuilder();
423    this->generateMainFunc(iBuilder);
424
425
426    // GeneratePipeline
427    this->generateLoadByteStreamAndBitStream(iBuilder);
428    this->generateExtractAndDepositMarkers(iBuilder);
429
430    auto swizzle = this->generateSwizzleExtractData(iBuilder);
431
432    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
433    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
434
435    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
436    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
437
438
439    // split PDEP into 2 kernel will be a little slower in single thread environment
440/*
441    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
442    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
443
444    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
445    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
446*/
447
448    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
449    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
450
451    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
452    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
453
454    // Produce unswizzled bit streams
455    StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
456    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
457    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
458
459    StreamSetBuffer * LineBreakStream;
460    StreamSetBuffer * Matches;
461    std::vector<re::RE*> res = {regex};
462//    if (mEnableMultiplexing) {
463//        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, matchCopiedbits);
464//    } else {
465        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
466//    };
467
468    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
469    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
470    mPxDriver.generatePipelineIR();
471
472    iBuilder->setKernel(matchCountK);
473    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
474    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
475    iBuilder->CallPrintInt("aaa", matchedLineCount);
476
477    mPxDriver.deallocateBuffers();
478
479    // TODO return matchedLineCount
480//        idb->CreateRet(matchedLineCount);
481
482    iBuilder->CreateRetVoid();
483
484    mPxDriver.finalizeObject();
485}
486
487ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
488    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
489}
490
491void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
492    Module * M = iBuilder->getModule();
493    Type * const sizeTy = iBuilder->getSizeTy();
494    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
495    Type * const voidTy = iBuilder->getVoidTy();
496    Type * const inputType = iBuilder->getInt8PtrTy();
497    Type * const intAddrTy = iBuilder->getIntAddrTy();
498
499    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
500    main->setCallingConv(CallingConv::C);
501    Function::arg_iterator args = main->arg_begin();
502    mInputStream = &*(args++);
503    mInputStream->setName("input");
504
505    mHeaderSize = &*(args++);
506    mHeaderSize->setName("mHeaderSize");
507
508    mFileSize = &*(args++);
509    mFileSize->setName("mFileSize");
510
511    mHasBlockChecksum = &*(args++);
512    mHasBlockChecksum->setName("mHasBlockChecksum");
513
514    match_accumulator = &*(args++);
515    match_accumulator->setName("match_accumulator");
516
517    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
518}
Note: See TracBrowser for help on using the repository browser.