source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6119

Last change on this file since 6119 was 6119, checked in by xwa163, 15 months ago
  1. Add some BasisSetNumbering? option to fix bug of multiplexing
  2. Use BiigEndian? BitNumbering? for lz4 and lzparabix related pipeline
  3. Support multiplexing in LZ4BitStreamAio pipeline
File size: 44.9 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/fake_stream_generating_kernel.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/bitstream_gather_pdep_kernel.h>
27#include <re/re_toolchain.h>
28
29#include <re/collect_ccs.h>
30#include <re/replaceCC.h>
31
32#include <UCD/resolve_properties.h>
33#include <kernels/charclasses.h>
34#include <kernels/grep_kernel.h>
35#include <kernels/UCD_property_kernel.h>
36#include <kernels/grapheme_kernel.h>
37#include <kernels/linebreak_kernel.h>
38#include <kernels/streams_merge.h>
39#include <kernels/scanmatchgen.h>
40#include <kernels/until_n.h>
41#include <re/casing.h>
42#include <re/exclude_CC.h>
43#include <re/to_utf8.h>
44#include <re/re_analysis.h>
45#include <re/re_name_resolve.h>
46#include <re/re_name_gather.h>
47#include <re/re_multiplex.h>
48#include <re/re_utility.h>
49#include <re/grapheme_clusters.h>
50#include <re/printer_re.h>
51#include <llvm/Support/raw_ostream.h>
52#include <llvm/Support/Debug.h>
53#include <kernels/lz4/lz4_block_decoder.h>
54#include <kernels/lz4/aio/lz4_swizzled_aio.h>
55#include <kernels/lz4/aio/lz4_bitstream_aio.h>
56
57
58namespace re { class CC; }
59
60using namespace llvm;
61using namespace parabix;
62using namespace kernel;
63using namespace grep;
64
65LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
66    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
67    mMoveMatchesToEOL = true;
68}
69
70void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
71    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
72        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
73    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
74        mBreakCC = re::makeByte(0);  // Null
75    } else {
76        mBreakCC = re::makeByte(0x0A); // LF
77    }
78    re::RE * anchorRE = mBreakCC;
79    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
80        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
81        anchorName->setDefinition(re::makeUnicodeBreak());
82        anchorRE = anchorName;
83    }
84
85    mREs = REs;
86    bool allAnchored = true;
87    for(unsigned i = 0; i < mREs.size(); ++i) {
88        if (!hasEndAnchor(mREs[i])) allAnchored = false;
89        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
90        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
91        mREs[i] = resolveAnchors(mREs[i], anchorRE);
92        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
93        mREs[i] = regular_expression_passes(mREs[i]);
94    }
95    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
96
97}
98
99
100parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
101//    auto mGrepDriver = &mPxDriver;
102    auto & idb = mPxDriver.getBuilder();
103    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
104    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
105    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
106    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
107    return LineFeedStream;
108}
109
110
111
112StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledAioApproach(
113        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
114    auto mGrepDriver = &mPxDriver;
115    auto & iBuilder = mGrepDriver->getBuilder();
116
117    //// Decode Block Information
118    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
119    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
120    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
121
122    //// Generate Helper Markers Extenders, FX, XF
123//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
124//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
125//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
126//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
127
128
129    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
130    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
131    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
132
133
134    // Produce unswizzled bit streams
135    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
136    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "source");
137    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
138
139    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
140
141
142    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 1, 4);
143    lz4AioK->setInitialArguments({mFileSize});
144    mPxDriver.makeKernelCall(
145            lz4AioK,
146            {
147                    mCompressedByteStream,
148//                    Extenders,
149
150                    // Block Data
151                    BlockData_IsCompressed,
152                    BlockData_BlockStart,
153                    BlockData_BlockEnd,
154
155                    u16Swizzle0,
156            }, {
157                    decompressedSwizzled0,
158            });
159
160
161
162    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
163    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "dst");
164    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
165
166    return decompressionBitStream;
167
168}
169
170StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
171    auto mGrepDriver = &mPxDriver;
172    auto & idb = mGrepDriver->getBuilder();
173
174    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
175    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
176    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
177
178    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
179    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
180    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
181
182    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
183    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
184    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
185
186    // Produce unswizzled bit streams
187    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
188    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
189    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
190
191    return matchCopiedBits;
192}
193parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
194    if (numberOfStream == 4) {
195        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
196    }
197
198    auto mGrepDriver = &mPxDriver;
199    auto & idb = mGrepDriver->getBuilder();
200
201    // Extract (Deletion)
202    this->generateCompressionMarker(idb);
203
204    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
205    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
206
207    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
208    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
209
210    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
211    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
212    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
213
214    // Deposit
215    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
216    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
217    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
218
219    // Match Copy
220    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
221    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
222    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
223
224    return matchCopiedBits;
225}
226
227parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
228    auto mGrepDriver = &mPxDriver;
229    auto & idb = mGrepDriver->getBuilder();
230    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
231
232    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
233    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
234    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
235    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
236}
237
238void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
239    this->initREs(REs);
240    auto mGrepDriver = &mPxDriver;
241
242    auto & idb = mGrepDriver->getBuilder();
243    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
244    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
245
246
247    //  Regular Expression Processing and Analysis Phase
248    const auto nREs = mREs.size();
249
250    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
251
252    std::map<std::string, StreamSetBuffer *> propertyStream;
253
254    std::vector<std::string> externalStreamNames;
255    std::set<re::Name *> UnicodeProperties;
256
257    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
258    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
259
260    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
261    mREs[0] = transformCCs(mpx.get(), mREs[0]);
262    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
263    auto numOfCharacterClasses = mpx_basis.size();
264    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
265
266    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
267    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
268
269    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
270    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
271    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
272
273    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
274    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
275    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
276    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithSwizzledAioApproach(
277            combinedStream, 1 + numOfCharacterClasses, "combined");
278
279    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
280    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
281    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
282    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
283
284
285
286    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
287    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
288    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
289
290    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
291    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
292    MatchResultsBufs[0] = MatchResults;
293
294    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
295    if (mREs.size() > 1) {
296        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
297        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
298        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
299    }
300    StreamSetBuffer * Matches = MergedResults;
301    if (mMoveMatchesToEOL) {
302        StreamSetBuffer * OriginalMatches = Matches;
303        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
304        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
305        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
306    }
307
308//    if (MaxCountFlag > 0) {
309//        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
310//        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
311//        StreamSetBuffer * const AllMatches = Matches;
312//        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
313//        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
314//    }
315
316//    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
317
318};
319std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled) {
320
321    this->initREs(REs);
322    auto mGrepDriver = &mPxDriver;
323
324    auto & idb = mGrepDriver->getBuilder();
325    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
326    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
327    int MaxCountFlag = 0;
328
329    //  Regular Expression Processing and Analysis Phase
330    const auto nREs = mREs.size();
331
332    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
333
334
335    std::map<std::string, StreamSetBuffer *> propertyStream;
336
337    std::vector<std::string> externalStreamNames;
338    std::set<re::Name *> UnicodeProperties;
339
340    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
341    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
342
343    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
344    mREs[0] = transformCCs(mpx.get(), mREs[0]);
345    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
346    auto numOfCharacterClasses = mpx_basis.size();
347    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
348
349    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
350    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
351
352    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
353    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
354    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
355
356
357    StreamSetBuffer * LineBreakStream = nullptr;
358    StreamSetBuffer * decompressedCharClasses = nullptr;
359    if (useSwizzled) {
360        StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
361        kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
362        mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
363        StreamSetBuffer * decompressedCombinedStream = nullptr;
364
365        if (useAio) {
366            decompressedCombinedStream = this->convertCompressedBitsStreamWithSwizzledAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
367        } else {
368            decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
369        }
370
371        LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
372        decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
373        kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
374        mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
375    } else {
376        auto ret = this->convertCompressedBitsStreamWithBitStreamAioApproach({CompressedLineFeedStream, CharClasses}, "combined");
377        LineBreakStream = ret[0];
378        decompressedCharClasses = ret[1];
379    }
380
381    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
382    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
383    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
384
385    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
386    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
387    MatchResultsBufs[0] = MatchResults;
388
389    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
390    if (mREs.size() > 1) {
391        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
392        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
393        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
394    }
395    StreamSetBuffer * Matches = MergedResults;
396    if (mMoveMatchesToEOL) {
397        StreamSetBuffer * OriginalMatches = Matches;
398        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
399        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
400        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
401    }
402
403    if (MaxCountFlag > 0) {
404        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
405        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
406        StreamSetBuffer * const AllMatches = Matches;
407        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
408        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
409    }
410
411    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
412};
413
414std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
415        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
416
417    this->initREs(REs);
418    auto mGrepDriver = &mPxDriver;
419
420    auto & idb = mGrepDriver->getBuilder();
421    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
422    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
423    int MaxCountFlag = 0;
424
425    //  Regular Expression Processing and Analysis Phase
426    const auto nREs = mREs.size();
427
428    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
429
430    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
431
432
433    std::map<std::string, StreamSetBuffer *> propertyStream;
434
435    for(unsigned i = 0; i < nREs; ++i) {
436        std::vector<std::string> externalStreamNames;
437        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
438
439        std::set<re::Name *> UnicodeProperties;
440
441        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
442        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
443        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
444        MatchResultsBufs[i] = MatchResults;
445    }
446
447    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
448    if (mREs.size() > 1) {
449        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
450        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
451        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
452    }
453    StreamSetBuffer * Matches = MergedResults;
454    if (mMoveMatchesToEOL) {
455        StreamSetBuffer * OriginalMatches = Matches;
456        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
457        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
458        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
459    }
460
461    if (MaxCountFlag > 0) {
462        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
463        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
464        StreamSetBuffer * const AllMatches = Matches;
465        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
466        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
467    }
468
469    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
470
471}
472
473void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
474    auto main = this->getScanMatchGrepMainFunction();
475    std::ostringstream s;
476    EmitMatch accum("", false, false, s);
477
478    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
479    llvm::outs() << s.str();
480}
481
482void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
483    auto & iBuilder = mPxDriver.getBuilder();
484    this->generateScanMatchMainFunc(iBuilder);
485
486    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
487
488    // GeneratePipeline
489    this->generateLoadByteStreamAndBitStream(iBuilder);
490    this->generateExtractAndDepositMarkers(iBuilder);
491
492    auto swizzle = this->generateSwizzleExtractData(iBuilder);
493
494    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
495    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
496
497    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
498    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
499
500    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
501    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
502
503    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
504    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
505
506    // Produce unswizzled bit streams
507    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
508    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
509    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
510
511    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
512    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
513
514    StreamSetBuffer * LineBreakStream;
515    StreamSetBuffer * Matches;
516    std::vector<re::RE*> res = {regex};
517    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
518
519    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
520    scanMatchK->setInitialArguments({match_accumulator});
521    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
522    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
523    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
524
525    mPxDriver.generatePipelineIR();
526    mPxDriver.deallocateBuffers();
527
528    iBuilder->CreateRetVoid();
529
530    mPxDriver.finalizeObject();
531}
532
533void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE *regex) {
534    auto & iBuilder = mPxDriver.getBuilder();
535    this->generateCountOnlyMainFunc(iBuilder);
536
537    // GeneratePipeline
538    this->generateLoadByteStreamAndBitStream(iBuilder);
539//    this->generateExtractAndDepositMarkers(iBuilder);
540
541    StreamSetBuffer * LineBreakStream;
542    StreamSetBuffer * Matches;
543    std::vector<re::RE*> res = {regex};
544    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
545
546    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
547    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
548    mPxDriver.generatePipelineIR();
549
550    iBuilder->setKernel(matchCountK);
551    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
552    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
553
554    mPxDriver.deallocateBuffers();
555
556    iBuilder->CreateRet(matchedLineCount);
557
558
559    mPxDriver.finalizeObject();
560}
561
562void LZ4GrepGenerator::generateMultiplexingBitStreamAioPipeline(re::RE* regex) {
563    auto & iBuilder = mPxDriver.getBuilder();
564    this->generateCountOnlyMainFunc(iBuilder);
565
566    this->generateLoadByteStreamAndBitStream(iBuilder);
567    StreamSetBuffer * LineBreakStream;
568    StreamSetBuffer * Matches;
569    std::vector<re::RE*> res = {regex};
570    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false);
571
572    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
573    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
574    mPxDriver.generatePipelineIR();
575
576    iBuilder->setKernel(matchCountK);
577    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
578    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
579
580    mPxDriver.deallocateBuffers();
581
582    iBuilder->CreateRet(matchedLineCount);
583
584    mPxDriver.finalizeObject();
585}
586
587void LZ4GrepGenerator::generateBitStreamAioPipeline(re::RE* regex) {
588    auto & iBuilder = mPxDriver.getBuilder();
589    this->generateCountOnlyMainFunc(iBuilder);
590
591    // GeneratePipeline
592    this->generateLoadByteStreamAndBitStream(iBuilder);
593    StreamSetBuffer * const decompressionBitStream = this->generateBitStreamAIODecompression(iBuilder);
594
595    StreamSetBuffer * LineBreakStream;
596    StreamSetBuffer * Matches;
597    std::vector<re::RE*> res = {regex};
598    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
599
600    /*
601    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
602    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
603    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
604
605    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
606    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
607    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
608    */
609    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
610
611    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
612    mPxDriver.generatePipelineIR();
613
614    iBuilder->setKernel(matchCountK);
615    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
616    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
617    mPxDriver.deallocateBuffers();
618
619    iBuilder->CreateRet(matchedLineCount);
620
621    mPxDriver.finalizeObject();
622}
623
624void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
625    auto & iBuilder = mPxDriver.getBuilder();
626    this->generateCountOnlyMainFunc(iBuilder);
627
628    // GeneratePipeline
629    this->generateLoadByteStreamAndBitStream(iBuilder);
630
631    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
632
633    StreamSetBuffer * LineBreakStream;
634    StreamSetBuffer * Matches;
635    std::vector<re::RE*> res = {regex};
636    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
637/*
638    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
639    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
640    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
641
642    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
643    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
644    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
645*/
646    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
647
648    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
649    mPxDriver.generatePipelineIR();
650
651    iBuilder->setKernel(matchCountK);
652    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
653    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
654    mPxDriver.deallocateBuffers();
655
656    iBuilder->CreateRet(matchedLineCount);
657
658    mPxDriver.finalizeObject();
659}
660
661void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel) {
662    auto & iBuilder = mPxDriver.getBuilder();
663    this->generateCountOnlyMainFunc(iBuilder);
664
665    this->generateLoadByteStream(iBuilder);
666    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter, minParallelLevel);
667
668
669    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
670    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
671    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
672
673
674    StreamSetBuffer * LineBreakStream;
675    StreamSetBuffer * Matches;
676    std::vector<re::RE*> res = {regex};
677    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
678
679
680//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
681//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
682//    mPxDriver.makeKernelCall(outK, {decompressedByteStream}, {});
683
684    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
685    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
686    mPxDriver.generatePipelineIR();
687
688    iBuilder->setKernel(matchCountK);
689    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
690    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
691
692    mPxDriver.deallocateBuffers();
693
694    iBuilder->CreateRet(matchedLineCount);
695
696    mPxDriver.finalizeObject();
697}
698
699void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
700    auto & iBuilder = mPxDriver.getBuilder();
701    this->generateCountOnlyMainFunc(iBuilder);
702
703    // GeneratePipeline
704    this->generateLoadByteStream(iBuilder);
705//    this->generateLoadByteStreamAndBitStream(iBuilder);
706
707    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
708
709
710    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
711    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
712    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
713
714
715    StreamSetBuffer * LineBreakStream;
716    StreamSetBuffer * Matches;
717    std::vector<re::RE*> res = {regex};
718    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
719
720
721//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
722//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
723//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
724
725    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
726    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
727    mPxDriver.generatePipelineIR();
728
729    iBuilder->setKernel(matchCountK);
730    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
731    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
732
733    mPxDriver.deallocateBuffers();
734
735    iBuilder->CreateRet(matchedLineCount);
736
737    mPxDriver.finalizeObject();
738
739}
740
741
742void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
743    auto & iBuilder = mPxDriver.getBuilder();
744    this->generateCountOnlyMainFunc(iBuilder);
745
746    // GeneratePipeline
747    this->generateLoadByteStreamAndBitStream(iBuilder);
748    this->generateExtractAndDepositMarkers(iBuilder);
749
750    StreamSetBuffer * LineBreakStream;
751    StreamSetBuffer * Matches;
752    std::vector<re::RE*> res = {regex};
753    if (mEnableMultiplexing) {
754        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
755    } else {
756        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
757        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
758        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
759        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
760
761        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
762        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
763        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
764
765        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
766    };
767
768    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
769    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
770    mPxDriver.generatePipelineIR();
771
772    iBuilder->setKernel(matchCountK);
773    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
774    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
775
776    mPxDriver.deallocateBuffers();
777
778    iBuilder->CreateRet(matchedLineCount);
779
780    mPxDriver.finalizeObject();
781}
782
783
784void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
785    auto & iBuilder = mPxDriver.getBuilder();
786    this->generateCountOnlyMainFunc(iBuilder);
787
788
789    // GeneratePipeline
790    this->generateLoadByteStreamAndBitStream(iBuilder);
791    this->generateExtractAndDepositMarkers(iBuilder);
792
793
794    StreamSetBuffer * LineBreakStream;
795    StreamSetBuffer * Matches;
796    std::vector<re::RE*> res = {regex};
797    if (mEnableMultiplexing) {
798        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
799    } else {
800        auto swizzle = this->generateSwizzleExtractData(iBuilder);
801
802        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
803        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
804
805        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
806        mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
807
808
809        // split PDEP into 2 kernel will be a little slower in single thread environment
810/*
811    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
812    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
813
814    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
815    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
816*/
817
818        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
819        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
820
821        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
822        mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
823
824        // Produce unswizzled bit streams
825        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
826        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
827        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
828
829
830        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
831    };
832
833    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
834    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
835    mPxDriver.generatePipelineIR();
836
837    iBuilder->setKernel(matchCountK);
838    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
839    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
840
841    mPxDriver.deallocateBuffers();
842
843    iBuilder->CreateRet(matchedLineCount);
844
845    mPxDriver.finalizeObject();
846}
847
848ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
849    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
850}
851CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
852    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
853}
854
855void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
856    Module * M = iBuilder->getModule();
857    Type * const int64Ty = iBuilder->getInt64Ty();
858    Type * const sizeTy = iBuilder->getSizeTy();
859    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
860//    Type * const voidTy = iBuilder->getVoidTy();
861    Type * const inputType = iBuilder->getInt8PtrTy();
862
863    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
864    main->setCallingConv(CallingConv::C);
865    Function::arg_iterator args = main->arg_begin();
866    mInputStream = &*(args++);
867    mInputStream->setName("input");
868
869    mHeaderSize = &*(args++);
870    mHeaderSize->setName("mHeaderSize");
871
872    mFileSize = &*(args++);
873    mFileSize->setName("mFileSize");
874
875    mHasBlockChecksum = &*(args++);
876    mHasBlockChecksum->setName("mHasBlockChecksum");
877    // TODO for now, we do not handle blockCheckSum
878    mHasBlockChecksum = iBuilder->getInt1(false);
879
880    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
881}
882
883void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
884    Module * M = iBuilder->getModule();
885    Type * const sizeTy = iBuilder->getSizeTy();
886    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
887    Type * const voidTy = iBuilder->getVoidTy();
888    Type * const inputType = iBuilder->getInt8PtrTy();
889    Type * const intAddrTy = iBuilder->getIntAddrTy();
890
891    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
892    main->setCallingConv(CallingConv::C);
893    Function::arg_iterator args = main->arg_begin();
894    mInputStream = &*(args++);
895    mInputStream->setName("input");
896
897    mHeaderSize = &*(args++);
898    mHeaderSize->setName("mHeaderSize");
899
900    mFileSize = &*(args++);
901    mFileSize->setName("mFileSize");
902
903    mHasBlockChecksum = &*(args++);
904    mHasBlockChecksum->setName("mHasBlockChecksum");
905
906    match_accumulator = &*(args++);
907    match_accumulator->setName("match_accumulator");
908
909    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
910}
Note: See TracBrowser for help on using the repository browser.