source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6065

Last change on this file since 6065 was 6065, checked in by xwa163, 11 months ago
  1. Fix some typo in LZ4 Grep Extract and Deposit pipeline
  2. Small fix for LZ4ParallelByteStreamAIOKernel
File size: 42.3 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/lz4/lz4_fake_stream_generating_kernel.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/bitstream_gather_pdep_kernel.h>
27#include <re/re_toolchain.h>
28
29#include <re/collect_ccs.h>
30#include <re/replaceCC.h>
31
32#include <UCD/resolve_properties.h>
33#include <kernels/charclasses.h>
34#include <kernels/grep_kernel.h>
35#include <kernels/UCD_property_kernel.h>
36#include <kernels/grapheme_kernel.h>
37#include <kernels/linebreak_kernel.h>
38#include <kernels/streams_merge.h>
39#include <kernels/scanmatchgen.h>
40#include <kernels/until_n.h>
41#include <re/casing.h>
42#include <re/exclude_CC.h>
43#include <re/to_utf8.h>
44#include <re/re_analysis.h>
45#include <re/re_name_resolve.h>
46#include <re/re_name_gather.h>
47#include <re/re_multiplex.h>
48#include <re/re_utility.h>
49#include <re/grapheme_clusters.h>
50#include <re/printer_re.h>
51#include <llvm/Support/raw_ostream.h>
52#include <llvm/Support/Debug.h>
53#include <kernels/lz4/lz4_block_decoder.h>
54#include <kernels/lz4/lz4_swizzled_aio.h>
55
56
57namespace re { class CC; }
58
59using namespace llvm;
60using namespace parabix;
61using namespace kernel;
62using namespace grep;
63
64LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
65    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
66    mMoveMatchesToEOL = true;
67}
68
69void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
70    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
71        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
72    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
73        mBreakCC = re::makeByte(0);  // Null
74    } else {
75        mBreakCC = re::makeByte(0x0A); // LF
76    }
77    re::RE * anchorRE = mBreakCC;
78    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
79        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
80        anchorName->setDefinition(re::makeUnicodeBreak());
81        anchorRE = anchorName;
82    }
83
84    mREs = REs;
85    bool allAnchored = true;
86    for(unsigned i = 0; i < mREs.size(); ++i) {
87        if (!hasEndAnchor(mREs[i])) allAnchored = false;
88        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
89        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
90        mREs[i] = resolveAnchors(mREs[i], anchorRE);
91        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
92        mREs[i] = regular_expression_passes(mREs[i]);
93    }
94    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
95
96}
97
98
99parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
100//    auto mGrepDriver = &mPxDriver;
101    const unsigned baseBufferSize = this->getInputBufferBlocks();
102    auto & idb = mPxDriver.getBuilder();
103    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
104    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
105    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
106    return LineFeedStream;
107}
108
109StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithAioApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
110    auto mGrepDriver = &mPxDriver;
111    auto & iBuilder = mGrepDriver->getBuilder();
112
113    //// Decode Block Information
114    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
115    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
116    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
117
118    //// Generate Helper Markers Extenders, FX, XF
119    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
120    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
121    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
122    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
123
124
125    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
126    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
127    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
128
129
130    // Produce unswizzled bit streams
131    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
132    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "source");
133    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
134
135    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
136
137
138    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 1, 4);
139    lz4AioK->setInitialArguments({mFileSize});
140    mPxDriver.makeKernelCall(
141            lz4AioK,
142            {
143                    mCompressedByteStream,
144                    Extenders,
145
146                    // Block Data
147                    BlockData_IsCompressed,
148                    BlockData_BlockStart,
149                    BlockData_BlockEnd,
150
151                    u16Swizzle0,
152            }, {
153                    decompressedSwizzled0,
154            });
155
156
157
158    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
159    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "dst");
160    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
161
162    return decompressionBitStream;
163
164}
165
166StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
167    auto mGrepDriver = &mPxDriver;
168    auto & idb = mGrepDriver->getBuilder();
169
170    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
171    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
172    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
173
174    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
175    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
176    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
177
178    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
179    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
180    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
181
182    // Produce unswizzled bit streams
183    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
184    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
185    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
186
187    return matchCopiedBits;
188}
189parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
190    if (numberOfStream == 4) {
191        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
192    }
193
194    auto mGrepDriver = &mPxDriver;
195    auto & idb = mGrepDriver->getBuilder();
196
197    // Extract (Deletion)
198    this->generateCompressionMarker(idb);
199
200    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
201    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks());
202
203    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
204    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
205
206    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
207    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
208    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
209
210    // Deposit
211    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
212    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
213    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
214
215    // Match Copy
216    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
217    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
218    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
219
220    return matchCopiedBits;
221}
222
223parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
224    auto mGrepDriver = &mPxDriver;
225    const unsigned baseBufferSize = this->getInputBufferBlocks();
226    auto & idb = mGrepDriver->getBuilder();
227
228    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
229    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
230    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
231    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
232}
233
234void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
235    this->initREs(REs);
236    auto mGrepDriver = &mPxDriver;
237
238    auto & idb = mGrepDriver->getBuilder();
239    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
240    const unsigned baseBufferSize = this->getInputBufferBlocks();
241    bool CC_Multiplexing = true;
242    int MaxCountFlag = 0;
243
244
245    //  Regular Expression Processing and Analysis Phase
246    const auto nREs = mREs.size();
247
248    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
249
250    std::map<std::string, StreamSetBuffer *> propertyStream;
251
252    std::vector<std::string> externalStreamNames;
253    std::set<re::Name *> UnicodeProperties;
254
255    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
256    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
257
258    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
259    mREs[0] = transformCCs(mpx.get(), mREs[0]);
260    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
261    auto numOfCharacterClasses = mpx_basis.size();
262    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
263
264    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
265    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
266
267    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
268    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
269    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
270
271    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
272    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
273    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
274    StreamSetBuffer * decompressedCombinedStream = nullptr;
275
276    decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
277
278/*
279    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
280    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
281    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
282    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
283
284
285
286    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
287    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
288    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
289
290    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
291    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
292    MatchResultsBufs[0] = MatchResults;
293
294    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
295    if (mREs.size() > 1) {
296        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
297        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
298        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
299    }
300    StreamSetBuffer * Matches = MergedResults;
301    if (mMoveMatchesToEOL) {
302        StreamSetBuffer * OriginalMatches = Matches;
303        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
304        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
305        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
306    }
307
308    if (MaxCountFlag > 0) {
309        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
310        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
311        StreamSetBuffer * const AllMatches = Matches;
312        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
313        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
314    }
315
316    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
317    */
318
319};
320std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio) {
321
322    this->initREs(REs);
323    auto mGrepDriver = &mPxDriver;
324
325    auto & idb = mGrepDriver->getBuilder();
326    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
327    const unsigned baseBufferSize = this->getInputBufferBlocks();
328    bool CC_Multiplexing = true;
329    int MaxCountFlag = 0;
330
331    //  Regular Expression Processing and Analysis Phase
332    const auto nREs = mREs.size();
333
334    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
335
336
337    std::map<std::string, StreamSetBuffer *> propertyStream;
338
339    std::vector<std::string> externalStreamNames;
340    std::set<re::Name *> UnicodeProperties;
341
342    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
343    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
344
345    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
346    mREs[0] = transformCCs(mpx.get(), mREs[0]);
347    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
348    auto numOfCharacterClasses = mpx_basis.size();
349    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
350
351    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
352    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
353
354    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
355    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
356    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
357
358    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
359    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
360    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
361    StreamSetBuffer * decompressedCombinedStream = nullptr;
362
363    if (useAio) {
364        decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
365    } else {
366        decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
367    }
368
369    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
370    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
371    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
372    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
373
374    /*
375    StreamSetBuffer * LineBreakStream = this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
376    StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
377     */
378
379    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
380    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
381    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
382
383    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
384    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
385    MatchResultsBufs[0] = MatchResults;
386
387    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
388    if (mREs.size() > 1) {
389        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
390        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
391        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
392    }
393    StreamSetBuffer * Matches = MergedResults;
394    if (mMoveMatchesToEOL) {
395        StreamSetBuffer * OriginalMatches = Matches;
396        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
397        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
398        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
399    }
400
401    if (MaxCountFlag > 0) {
402        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
403        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
404        StreamSetBuffer * const AllMatches = Matches;
405        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
406        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
407    }
408
409    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
410};
411
412std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
413        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
414
415    this->initREs(REs);
416    auto mGrepDriver = &mPxDriver;
417
418    auto & idb = mGrepDriver->getBuilder();
419    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
420    const unsigned baseBufferSize = this->getInputBufferBlocks();
421    int MaxCountFlag = 0;
422
423    //  Regular Expression Processing and Analysis Phase
424    const auto nREs = mREs.size();
425
426    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
427
428    StreamSetBuffer * LineBreakStream = LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
429
430
431    std::map<std::string, StreamSetBuffer *> propertyStream;
432
433    for(unsigned i = 0; i < nREs; ++i) {
434        std::vector<std::string> externalStreamNames;
435        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
436
437        std::set<re::Name *> UnicodeProperties;
438
439        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
440        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
441        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
442        MatchResultsBufs[i] = MatchResults;
443    }
444
445    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
446    if (mREs.size() > 1) {
447        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
448        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
449        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
450    }
451    StreamSetBuffer * Matches = MergedResults;
452    if (mMoveMatchesToEOL) {
453        StreamSetBuffer * OriginalMatches = Matches;
454        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
455        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
456        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
457    }
458
459    if (MaxCountFlag > 0) {
460        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
461        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
462        StreamSetBuffer * const AllMatches = Matches;
463        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
464        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
465    }
466
467    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
468
469}
470
471void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
472    auto main = this->getScanMatchGrepMainFunction();
473    std::ostringstream s;
474    EmitMatch accum("", false, false, s);
475
476    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
477    llvm::outs() << s.str();
478}
479
480void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
481    auto & iBuilder = mPxDriver.getBuilder();
482    this->generateScanMatchMainFunc(iBuilder);
483
484    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
485
486    // GeneratePipeline
487    this->generateLoadByteStreamAndBitStream(iBuilder);
488    this->generateExtractAndDepositMarkers(iBuilder);
489
490    auto swizzle = this->generateSwizzleExtractData(iBuilder);
491
492    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
493    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
494
495    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
496    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
497
498    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
499    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
500
501    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
502    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
503
504    // Produce unswizzled bit streams
505    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
506    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
507    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
508
509    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
510    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
511
512    StreamSetBuffer * LineBreakStream;
513    StreamSetBuffer * Matches;
514    std::vector<re::RE*> res = {regex};
515    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
516
517    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
518    scanMatchK->setInitialArguments({match_accumulator});
519    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
520    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
521    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
522
523    mPxDriver.generatePipelineIR();
524    mPxDriver.deallocateBuffers();
525
526    iBuilder->CreateRetVoid();
527
528    mPxDriver.finalizeObject();
529}
530
531void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE* regex) {
532    auto & iBuilder = mPxDriver.getBuilder();
533    this->generateMainFunc(iBuilder);
534
535    // GeneratePipeline
536    this->generateLoadByteStreamAndBitStream(iBuilder);
537
538    std::vector<re::RE*> res = {regex};
539    this->generateMultiplexingCompressedBitStream(res);
540
541    mPxDriver.generatePipelineIR();
542    mPxDriver.deallocateBuffers();
543
544    iBuilder->CreateRetVoid();
545
546    mPxDriver.finalizeObject();
547}
548
549void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline2(re::RE* regex) {
550    auto & iBuilder = mPxDriver.getBuilder();
551    this->generateCountOnlyMainFunc(iBuilder);
552
553    // GeneratePipeline
554    this->generateLoadByteStreamAndBitStream(iBuilder);
555//    this->generateExtractAndDepositMarkers(iBuilder);
556
557    StreamSetBuffer * LineBreakStream;
558    StreamSetBuffer * Matches;
559    std::vector<re::RE*> res = {regex};
560    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
561
562    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
563    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
564    mPxDriver.generatePipelineIR();
565
566    iBuilder->setKernel(matchCountK);
567    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
568    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
569
570    mPxDriver.deallocateBuffers();
571
572    iBuilder->CreateRet(matchedLineCount);
573
574
575    mPxDriver.finalizeObject();
576}
577
578void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
579    auto & iBuilder = mPxDriver.getBuilder();
580    this->generateCountOnlyMainFunc(iBuilder);
581
582    // GeneratePipeline
583    this->generateLoadByteStreamAndBitStream(iBuilder);
584
585    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
586
587    StreamSetBuffer * LineBreakStream;
588    StreamSetBuffer * Matches;
589    std::vector<re::RE*> res = {regex};
590    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
591/*
592    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
593    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
594    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
595
596    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
597    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
598    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
599*/
600    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
601
602    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
603    mPxDriver.generatePipelineIR();
604
605    iBuilder->setKernel(matchCountK);
606    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
607    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
608    mPxDriver.deallocateBuffers();
609
610    iBuilder->CreateRet(matchedLineCount);
611
612    mPxDriver.finalizeObject();
613}
614
615void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex) {
616    auto & iBuilder = mPxDriver.getBuilder();
617    this->generateCountOnlyMainFunc(iBuilder);
618
619    this->generateLoadByteStreamAndBitStream(iBuilder);
620    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder);
621
622
623    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
624    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
625//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
626    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
627
628
629    StreamSetBuffer * LineBreakStream;
630    StreamSetBuffer * Matches;
631    std::vector<re::RE*> res = {regex};
632    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
633
634
635//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
636//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
637//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
638
639    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
640    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
641    mPxDriver.generatePipelineIR();
642
643    iBuilder->setKernel(matchCountK);
644    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
645    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
646
647    mPxDriver.deallocateBuffers();
648
649    iBuilder->CreateRet(matchedLineCount);
650
651    mPxDriver.finalizeObject();
652}
653
654void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
655    auto & iBuilder = mPxDriver.getBuilder();
656    this->generateCountOnlyMainFunc(iBuilder);
657
658    // GeneratePipeline
659//    this->generateLoadByteStreamAndBitStream(iBuilder);
660    this->generateLoadByteStream(iBuilder);
661    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
662
663
664    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
665    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
666//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
667    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
668
669
670    StreamSetBuffer * LineBreakStream;
671    StreamSetBuffer * Matches;
672    std::vector<re::RE*> res = {regex};
673    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
674
675
676//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
677//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
678//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
679
680    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
681    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
682    mPxDriver.generatePipelineIR();
683
684    iBuilder->setKernel(matchCountK);
685    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
686    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
687
688    mPxDriver.deallocateBuffers();
689
690    iBuilder->CreateRet(matchedLineCount);
691
692    mPxDriver.finalizeObject();
693
694}
695
696
697void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
698    auto & iBuilder = mPxDriver.getBuilder();
699    this->generateCountOnlyMainFunc(iBuilder);
700
701    // GeneratePipeline
702    this->generateLoadByteStreamAndBitStream(iBuilder);
703    this->generateExtractAndDepositMarkers(iBuilder);
704
705    StreamSetBuffer * LineBreakStream;
706    StreamSetBuffer * Matches;
707    std::vector<re::RE*> res = {regex};
708    if (mEnableMultiplexing) {
709        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
710    } else {
711        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
712        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
713        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
714        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
715
716        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
717        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
718        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
719
720        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
721    };
722
723    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
724    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
725    mPxDriver.generatePipelineIR();
726
727    iBuilder->setKernel(matchCountK);
728    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
729    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
730
731    mPxDriver.deallocateBuffers();
732
733    iBuilder->CreateRet(matchedLineCount);
734
735    mPxDriver.finalizeObject();
736}
737
738
739void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
740    auto & iBuilder = mPxDriver.getBuilder();
741    this->generateCountOnlyMainFunc(iBuilder);
742
743
744    // GeneratePipeline
745    this->generateLoadByteStreamAndBitStream(iBuilder);
746    this->generateExtractAndDepositMarkers(iBuilder);
747
748
749    StreamSetBuffer * LineBreakStream;
750    StreamSetBuffer * Matches;
751    std::vector<re::RE*> res = {regex};
752    if (mEnableMultiplexing) {
753        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
754    } else {
755        auto swizzle = this->generateSwizzleExtractData(iBuilder);
756
757        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
758        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
759
760        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
761        mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
762
763
764        // split PDEP into 2 kernel will be a little slower in single thread environment
765/*
766    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
767    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
768
769    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
770    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
771*/
772
773        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
774        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
775
776        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
777        mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
778
779        // Produce unswizzled bit streams
780        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
781        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
782        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
783
784
785        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
786    };
787
788    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
789    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
790    mPxDriver.generatePipelineIR();
791
792    iBuilder->setKernel(matchCountK);
793    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
794    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
795
796    mPxDriver.deallocateBuffers();
797
798    iBuilder->CreateRet(matchedLineCount);
799
800    mPxDriver.finalizeObject();
801}
802
803ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
804    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
805}
806CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
807    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
808}
809
810void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
811    Module * M = iBuilder->getModule();
812    Type * const int64Ty = iBuilder->getInt64Ty();
813    Type * const sizeTy = iBuilder->getSizeTy();
814    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
815    Type * const voidTy = iBuilder->getVoidTy();
816    Type * const inputType = iBuilder->getInt8PtrTy();
817
818    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
819    main->setCallingConv(CallingConv::C);
820    Function::arg_iterator args = main->arg_begin();
821    mInputStream = &*(args++);
822    mInputStream->setName("input");
823
824    mHeaderSize = &*(args++);
825    mHeaderSize->setName("mHeaderSize");
826
827    mFileSize = &*(args++);
828    mFileSize->setName("mFileSize");
829
830    mHasBlockChecksum = &*(args++);
831    mHasBlockChecksum->setName("mHasBlockChecksum");
832    // TODO for now, we do not handle blockCheckSum
833    mHasBlockChecksum = iBuilder->getInt1(false);
834
835    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
836}
837
838void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
839    Module * M = iBuilder->getModule();
840    Type * const sizeTy = iBuilder->getSizeTy();
841    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
842    Type * const voidTy = iBuilder->getVoidTy();
843    Type * const inputType = iBuilder->getInt8PtrTy();
844    Type * const intAddrTy = iBuilder->getIntAddrTy();
845
846    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
847    main->setCallingConv(CallingConv::C);
848    Function::arg_iterator args = main->arg_begin();
849    mInputStream = &*(args++);
850    mInputStream->setName("input");
851
852    mHeaderSize = &*(args++);
853    mHeaderSize->setName("mHeaderSize");
854
855    mFileSize = &*(args++);
856    mFileSize->setName("mFileSize");
857
858    mHasBlockChecksum = &*(args++);
859    mHasBlockChecksum->setName("mHasBlockChecksum");
860
861    match_accumulator = &*(args++);
862    match_accumulator->setName("match_accumulator");
863
864    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
865}
Note: See TracBrowser for help on using the repository browser.