source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6064

Last change on this file since 6064 was 6064, checked in by xwa163, 13 months ago

Init checkin for LZ4ParallelByteStreamAioKernel and related pipeline

File size: 42.2 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/lz4/lz4_fake_stream_generating_kernel.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/bitstream_gather_pdep_kernel.h>
27#include <re/re_toolchain.h>
28
29#include <re/collect_ccs.h>
30#include <re/replaceCC.h>
31
32#include <UCD/resolve_properties.h>
33#include <kernels/charclasses.h>
34#include <kernels/grep_kernel.h>
35#include <kernels/UCD_property_kernel.h>
36#include <kernels/grapheme_kernel.h>
37#include <kernels/linebreak_kernel.h>
38#include <kernels/streams_merge.h>
39#include <kernels/scanmatchgen.h>
40#include <kernels/until_n.h>
41#include <re/casing.h>
42#include <re/exclude_CC.h>
43#include <re/to_utf8.h>
44#include <re/re_analysis.h>
45#include <re/re_name_resolve.h>
46#include <re/re_name_gather.h>
47#include <re/re_multiplex.h>
48#include <re/re_utility.h>
49#include <re/grapheme_clusters.h>
50#include <re/printer_re.h>
51#include <llvm/Support/raw_ostream.h>
52#include <llvm/Support/Debug.h>
53#include <kernels/lz4/lz4_block_decoder.h>
54#include <kernels/lz4/lz4_swizzled_aio.h>
55
56
57namespace re { class CC; }
58
59using namespace llvm;
60using namespace parabix;
61using namespace kernel;
62using namespace grep;
63
64LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
65    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
66    mMoveMatchesToEOL = true;
67}
68
69void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
70    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
71        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
72    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
73        mBreakCC = re::makeByte(0);  // Null
74    } else {
75        mBreakCC = re::makeByte(0x0A); // LF
76    }
77    re::RE * anchorRE = mBreakCC;
78    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
79        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
80        anchorName->setDefinition(re::makeUnicodeBreak());
81        anchorRE = anchorName;
82    }
83
84    mREs = REs;
85    bool allAnchored = true;
86    for(unsigned i = 0; i < mREs.size(); ++i) {
87        if (!hasEndAnchor(mREs[i])) allAnchored = false;
88        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
89        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
90        mREs[i] = resolveAnchors(mREs[i], anchorRE);
91        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
92        mREs[i] = regular_expression_passes(mREs[i]);
93    }
94    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
95
96}
97
98
99parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
100//    auto mGrepDriver = &mPxDriver;
101    const unsigned baseBufferSize = this->getInputBufferBlocks();
102    auto & idb = mPxDriver.getBuilder();
103    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
104    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
105    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
106    return LineFeedStream;
107}
108
109StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithAioApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
110    auto mGrepDriver = &mPxDriver;
111    auto & iBuilder = mGrepDriver->getBuilder();
112
113    //// Decode Block Information
114    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
115    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
116    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
117
118    //// Generate Helper Markers Extenders, FX, XF
119    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
120    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
121    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
122    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
123
124
125    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
126    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
127    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
128
129
130    // Produce unswizzled bit streams
131    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
132    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "source");
133    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
134
135    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
136
137
138    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 1, 4);
139    lz4AioK->setInitialArguments({mFileSize});
140    mPxDriver.makeKernelCall(
141            lz4AioK,
142            {
143                    mCompressedByteStream,
144                    Extenders,
145
146                    // Block Data
147                    BlockData_IsCompressed,
148                    BlockData_BlockStart,
149                    BlockData_BlockEnd,
150
151                    u16Swizzle0,
152            }, {
153                    decompressedSwizzled0,
154            });
155
156
157
158    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
159    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "dst");
160    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
161
162    return decompressionBitStream;
163
164}
165
166StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
167    auto mGrepDriver = &mPxDriver;
168    auto & idb = mGrepDriver->getBuilder();
169
170    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
171    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
172    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
173
174    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
175    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
176    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
177
178    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
179    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
180    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
181
182    // Produce unswizzled bit streams
183    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
184    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
185    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
186
187    return matchCopiedBits;
188}
189parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
190    if (numberOfStream == 4) {
191        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
192    }
193
194    auto mGrepDriver = &mPxDriver;
195    auto & idb = mGrepDriver->getBuilder();
196
197    // Extract (Deletion)
198    this->generateCompressionMarker(idb);
199
200    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
201    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks());
202
203    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
204    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
205
206    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
207    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
208    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
209
210    // Deposit
211    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
212    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
213    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
214
215    // Match Copy
216    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
217    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
218    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
219
220    return matchCopiedBits;
221}
222
223parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
224    auto mGrepDriver = &mPxDriver;
225    const unsigned baseBufferSize = this->getInputBufferBlocks();
226    auto & idb = mGrepDriver->getBuilder();
227
228    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
229    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
230    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
231    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
232}
233
234void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
235    this->initREs(REs);
236    auto mGrepDriver = &mPxDriver;
237
238    auto & idb = mGrepDriver->getBuilder();
239    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
240    const unsigned baseBufferSize = this->getInputBufferBlocks();
241    bool CC_Multiplexing = true;
242    int MaxCountFlag = 0;
243
244
245    //  Regular Expression Processing and Analysis Phase
246    const auto nREs = mREs.size();
247
248    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
249
250    std::map<std::string, StreamSetBuffer *> propertyStream;
251
252    std::vector<std::string> externalStreamNames;
253    std::set<re::Name *> UnicodeProperties;
254
255    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
256    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
257
258    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
259    mREs[0] = transformCCs(mpx.get(), mREs[0]);
260    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
261    auto numOfCharacterClasses = mpx_basis.size();
262    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
263
264    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
265    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
266
267    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
268    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
269    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
270
271    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
272    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
273    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
274    StreamSetBuffer * decompressedCombinedStream = nullptr;
275
276    decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
277
278/*
279    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
280    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
281    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
282    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
283
284
285
286    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
287    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
288    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
289
290    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
291    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
292    MatchResultsBufs[0] = MatchResults;
293
294    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
295    if (mREs.size() > 1) {
296        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
297        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
298        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
299    }
300    StreamSetBuffer * Matches = MergedResults;
301    if (mMoveMatchesToEOL) {
302        StreamSetBuffer * OriginalMatches = Matches;
303        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
304        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
305        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
306    }
307
308    if (MaxCountFlag > 0) {
309        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
310        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
311        StreamSetBuffer * const AllMatches = Matches;
312        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
313        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
314    }
315
316    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
317    */
318
319};
320std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio) {
321
322    this->initREs(REs);
323    auto mGrepDriver = &mPxDriver;
324
325    auto & idb = mGrepDriver->getBuilder();
326    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
327    const unsigned baseBufferSize = this->getInputBufferBlocks();
328    bool CC_Multiplexing = true;
329    int MaxCountFlag = 0;
330
331    //  Regular Expression Processing and Analysis Phase
332    const auto nREs = mREs.size();
333
334    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
335
336
337    std::map<std::string, StreamSetBuffer *> propertyStream;
338
339    std::vector<std::string> externalStreamNames;
340    std::set<re::Name *> UnicodeProperties;
341
342    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
343    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
344
345    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
346    mREs[0] = transformCCs(mpx.get(), mREs[0]);
347    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
348    auto numOfCharacterClasses = mpx_basis.size();
349    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
350
351    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
352    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
353
354    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
355    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
356    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
357
358    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
359    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
360    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
361    StreamSetBuffer * decompressedCombinedStream = nullptr;
362
363    if (useAio) {
364        decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
365    } else {
366        decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
367    }
368
369    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
370    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
371    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
372    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
373
374    /*
375    StreamSetBuffer * LineBreakStream = this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
376    StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
377     */
378
379    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
380    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
381    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
382
383    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
384    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
385    MatchResultsBufs[0] = MatchResults;
386
387    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
388    if (mREs.size() > 1) {
389        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
390        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
391        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
392    }
393    StreamSetBuffer * Matches = MergedResults;
394    if (mMoveMatchesToEOL) {
395        StreamSetBuffer * OriginalMatches = Matches;
396        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
397        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
398        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
399    }
400
401    if (MaxCountFlag > 0) {
402        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
403        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
404        StreamSetBuffer * const AllMatches = Matches;
405        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
406        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
407    }
408
409    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
410};
411
412std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
413        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
414
415    this->initREs(REs);
416    auto mGrepDriver = &mPxDriver;
417
418    auto & idb = mGrepDriver->getBuilder();
419    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
420    const unsigned baseBufferSize = this->getInputBufferBlocks();
421    int MaxCountFlag = 0;
422
423    //  Regular Expression Processing and Analysis Phase
424    const auto nREs = mREs.size();
425
426    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
427
428    StreamSetBuffer * LineBreakStream = LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
429
430
431    std::map<std::string, StreamSetBuffer *> propertyStream;
432
433    for(unsigned i = 0; i < nREs; ++i) {
434        std::vector<std::string> externalStreamNames;
435        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
436
437        std::set<re::Name *> UnicodeProperties;
438
439        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
440        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
441        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
442        MatchResultsBufs[i] = MatchResults;
443    }
444
445    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
446    if (mREs.size() > 1) {
447        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
448        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
449        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
450    }
451    StreamSetBuffer * Matches = MergedResults;
452    if (mMoveMatchesToEOL) {
453        StreamSetBuffer * OriginalMatches = Matches;
454        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
455        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
456        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
457    }
458
459    if (MaxCountFlag > 0) {
460        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
461        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
462        StreamSetBuffer * const AllMatches = Matches;
463        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
464        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
465    }
466
467    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
468
469}
470
471void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
472    auto main = this->getScanMatchGrepMainFunction();
473    std::ostringstream s;
474    EmitMatch accum("", false, false, s);
475
476    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
477    llvm::outs() << s.str();
478}
479
480void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
481    auto & iBuilder = mPxDriver.getBuilder();
482    this->generateScanMatchMainFunc(iBuilder);
483
484    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
485
486    // GeneratePipeline
487    this->generateLoadByteStreamAndBitStream(iBuilder);
488    this->generateExtractAndDepositMarkers(iBuilder);
489
490    auto swizzle = this->generateSwizzleExtractData(iBuilder);
491
492    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
493    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
494
495    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
496    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
497
498    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
499    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
500
501    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
502    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
503
504    // Produce unswizzled bit streams
505    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
506    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
507    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
508
509    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
510    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
511
512    StreamSetBuffer * LineBreakStream;
513    StreamSetBuffer * Matches;
514    std::vector<re::RE*> res = {regex};
515    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
516
517    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
518    scanMatchK->setInitialArguments({match_accumulator});
519    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
520    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
521    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
522
523    mPxDriver.generatePipelineIR();
524    mPxDriver.deallocateBuffers();
525
526    iBuilder->CreateRetVoid();
527
528    mPxDriver.finalizeObject();
529}
530
531void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE* regex) {
532    auto & iBuilder = mPxDriver.getBuilder();
533    this->generateMainFunc(iBuilder);
534
535    // GeneratePipeline
536    this->generateLoadByteStreamAndBitStream(iBuilder);
537
538    std::vector<re::RE*> res = {regex};
539    this->generateMultiplexingCompressedBitStream(res);
540
541    mPxDriver.generatePipelineIR();
542    mPxDriver.deallocateBuffers();
543
544    iBuilder->CreateRetVoid();
545
546    mPxDriver.finalizeObject();
547}
548
549void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline2(re::RE* regex) {
550    auto & iBuilder = mPxDriver.getBuilder();
551    this->generateCountOnlyMainFunc(iBuilder);
552
553    // GeneratePipeline
554    this->generateLoadByteStreamAndBitStream(iBuilder);
555//    this->generateExtractAndDepositMarkers(iBuilder);
556
557    StreamSetBuffer * LineBreakStream;
558    StreamSetBuffer * Matches;
559    std::vector<re::RE*> res = {regex};
560    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
561
562    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
563    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
564    mPxDriver.generatePipelineIR();
565
566    iBuilder->setKernel(matchCountK);
567    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
568    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
569
570    mPxDriver.deallocateBuffers();
571
572    iBuilder->CreateRet(matchedLineCount);
573
574
575    mPxDriver.finalizeObject();
576}
577
578void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
579    auto & iBuilder = mPxDriver.getBuilder();
580    this->generateCountOnlyMainFunc(iBuilder);
581
582    // GeneratePipeline
583    this->generateLoadByteStreamAndBitStream(iBuilder);
584
585    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
586
587    StreamSetBuffer * LineBreakStream;
588    StreamSetBuffer * Matches;
589    std::vector<re::RE*> res = {regex};
590    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
591/*
592    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
593    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
594    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
595
596    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
597    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
598    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
599*/
600    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
601
602    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
603    mPxDriver.generatePipelineIR();
604
605    iBuilder->setKernel(matchCountK);
606    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
607    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
608    mPxDriver.deallocateBuffers();
609
610    iBuilder->CreateRet(matchedLineCount);
611
612    mPxDriver.finalizeObject();
613}
614
615void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex) {
616    auto & iBuilder = mPxDriver.getBuilder();
617    this->generateCountOnlyMainFunc(iBuilder);
618
619    this->generateLoadByteStreamAndBitStream(iBuilder);
620    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder);
621
622
623    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
624    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
625//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
626    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
627
628
629    StreamSetBuffer * LineBreakStream;
630    StreamSetBuffer * Matches;
631    std::vector<re::RE*> res = {regex};
632    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
633
634
635//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
636//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
637//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
638
639    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
640    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
641    mPxDriver.generatePipelineIR();
642
643    iBuilder->setKernel(matchCountK);
644    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
645    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
646
647    mPxDriver.deallocateBuffers();
648
649    iBuilder->CreateRet(matchedLineCount);
650
651    mPxDriver.finalizeObject();
652}
653
654void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
655    auto & iBuilder = mPxDriver.getBuilder();
656    this->generateCountOnlyMainFunc(iBuilder);
657
658    // GeneratePipeline
659//    this->generateLoadByteStreamAndBitStream(iBuilder);
660    this->generateLoadByteStream(iBuilder);
661    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
662
663
664    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
665    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
666//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
667    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
668
669
670    StreamSetBuffer * LineBreakStream;
671    StreamSetBuffer * Matches;
672    std::vector<re::RE*> res = {regex};
673    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
674
675
676//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
677//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
678//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
679
680    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
681    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
682    mPxDriver.generatePipelineIR();
683
684    iBuilder->setKernel(matchCountK);
685    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
686    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
687
688    mPxDriver.deallocateBuffers();
689
690    iBuilder->CreateRet(matchedLineCount);
691
692    mPxDriver.finalizeObject();
693
694}
695
696
697void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
698    auto & iBuilder = mPxDriver.getBuilder();
699    this->generateCountOnlyMainFunc(iBuilder);
700
701    // GeneratePipeline
702    this->generateLoadByteStreamAndBitStream(iBuilder);
703    this->generateExtractAndDepositMarkers(iBuilder);
704
705    StreamSetBuffer * LineBreakStream;
706    StreamSetBuffer * Matches;
707    std::vector<re::RE*> res = {regex};
708    if (mEnableMultiplexing) {
709        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
710    } else {
711        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
712        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
713        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
714        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
715
716        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
717        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
718        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
719
720        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
721    };
722
723    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
724    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
725    mPxDriver.generatePipelineIR();
726
727    iBuilder->setKernel(matchCountK);
728    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
729    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
730
731    mPxDriver.deallocateBuffers();
732
733    iBuilder->CreateRet(matchedLineCount);
734
735    mPxDriver.finalizeObject();
736}
737
738
739void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
740    auto & iBuilder = mPxDriver.getBuilder();
741    this->generateCountOnlyMainFunc(iBuilder);
742
743
744    // GeneratePipeline
745    this->generateLoadByteStreamAndBitStream(iBuilder);
746    this->generateExtractAndDepositMarkers(iBuilder);
747
748    auto swizzle = this->generateSwizzleExtractData(iBuilder);
749
750    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
751    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
752
753    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
754    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
755
756
757    // split PDEP into 2 kernel will be a little slower in single thread environment
758/*
759    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
760    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
761
762    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
763    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
764*/
765
766    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
767    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
768
769    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
770    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
771
772    // Produce unswizzled bit streams
773    StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
774    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
775    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
776
777    StreamSetBuffer * LineBreakStream;
778    StreamSetBuffer * Matches;
779    std::vector<re::RE*> res = {regex};
780//    if (mEnableMultiplexing) {
781//        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, matchCopiedbits);
782//    } else {
783        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
784//    };
785
786    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
787    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
788    mPxDriver.generatePipelineIR();
789
790    iBuilder->setKernel(matchCountK);
791    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
792    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
793
794    mPxDriver.deallocateBuffers();
795
796    iBuilder->CreateRet(matchedLineCount);
797
798    mPxDriver.finalizeObject();
799}
800
801ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
802    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
803}
804CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
805    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
806}
807
808void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
809    Module * M = iBuilder->getModule();
810    Type * const int64Ty = iBuilder->getInt64Ty();
811    Type * const sizeTy = iBuilder->getSizeTy();
812    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
813    Type * const voidTy = iBuilder->getVoidTy();
814    Type * const inputType = iBuilder->getInt8PtrTy();
815
816    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
817    main->setCallingConv(CallingConv::C);
818    Function::arg_iterator args = main->arg_begin();
819    mInputStream = &*(args++);
820    mInputStream->setName("input");
821
822    mHeaderSize = &*(args++);
823    mHeaderSize->setName("mHeaderSize");
824
825    mFileSize = &*(args++);
826    mFileSize->setName("mFileSize");
827
828    mHasBlockChecksum = &*(args++);
829    mHasBlockChecksum->setName("mHasBlockChecksum");
830    // TODO for now, we do not handle blockCheckSum
831    mHasBlockChecksum = iBuilder->getInt1(false);
832
833    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
834}
835
836void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
837    Module * M = iBuilder->getModule();
838    Type * const sizeTy = iBuilder->getSizeTy();
839    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
840    Type * const voidTy = iBuilder->getVoidTy();
841    Type * const inputType = iBuilder->getInt8PtrTy();
842    Type * const intAddrTy = iBuilder->getIntAddrTy();
843
844    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
845    main->setCallingConv(CallingConv::C);
846    Function::arg_iterator args = main->arg_begin();
847    mInputStream = &*(args++);
848    mInputStream->setName("input");
849
850    mHeaderSize = &*(args++);
851    mHeaderSize->setName("mHeaderSize");
852
853    mFileSize = &*(args++);
854    mFileSize->setName("mFileSize");
855
856    mHasBlockChecksum = &*(args++);
857    mHasBlockChecksum->setName("mHasBlockChecksum");
858
859    match_accumulator = &*(args++);
860    match_accumulator->setName("match_accumulator");
861
862    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
863}
Note: See TracBrowser for help on using the repository browser.