source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6135

Last change on this file since 6135 was 6135, checked in by xwa163, 6 months ago
  1. Implement twist_kernel and untwist_kernel by PEXT and PDEP
  2. Use twist form for multiplexing lz4 grep
File size: 47.2 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/fake_stream_generating_kernel.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/bitstream_gather_pdep_kernel.h>
27#include <re/re_toolchain.h>
28
29#include <re/collect_ccs.h>
30#include <re/replaceCC.h>
31
32#include <re/casing.h>
33#include <re/exclude_CC.h>
34#include <re/to_utf8.h>
35#include <re/re_analysis.h>
36#include <re/re_name_resolve.h>
37#include <re/re_name_gather.h>
38#include <re/re_multiplex.h>
39#include <re/re_utility.h>
40
41#include <UCD/resolve_properties.h>
42#include <kernels/charclasses.h>
43#include <kernels/grep_kernel.h>
44#include <kernels/UCD_property_kernel.h>
45#include <kernels/grapheme_kernel.h>
46#include <kernels/linebreak_kernel.h>
47#include <kernels/streams_merge.h>
48#include <kernels/scanmatchgen.h>
49#include <kernels/until_n.h>
50#include <re/casing.h>
51#include <re/exclude_CC.h>
52#include <re/to_utf8.h>
53#include <re/re_analysis.h>
54#include <re/re_name_resolve.h>
55#include <re/re_name_gather.h>
56#include <re/re_multiplex.h>
57#include <re/re_utility.h>
58#include <re/grapheme_clusters.h>
59#include <re/printer_re.h>
60#include <llvm/Support/raw_ostream.h>
61#include <llvm/Support/Debug.h>
62#include <kernels/lz4/lz4_block_decoder.h>
63#include <kernels/lz4/aio/lz4_swizzled_aio.h>
64#include <kernels/lz4/aio/lz4_bitstream_aio.h>
65#include <re/re_seq.h>
66#include <kernels/lz4/aio/lz4_bytestream_aio.h>
67
68namespace re { class CC; }
69
70using namespace llvm;
71using namespace parabix;
72using namespace kernel;
73using namespace grep;
74
75LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
76    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
77    mMoveMatchesToEOL = true;
78}
79
80void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
81    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
82        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
83    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
84        mBreakCC = re::makeByte(0);  // Null
85    } else {
86        mBreakCC = re::makeByte(0x0A); // LF
87    }
88    re::RE * anchorRE = mBreakCC;
89    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
90        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
91        anchorName->setDefinition(re::makeUnicodeBreak());
92        anchorRE = anchorName;
93    }
94
95    mREs = REs;
96    bool allAnchored = true;
97    for(unsigned i = 0; i < mREs.size(); ++i) {
98        if (!hasEndAnchor(mREs[i])) allAnchored = false;
99        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
100        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
101        mREs[i] = resolveAnchors(mREs[i], anchorRE);
102        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
103        mREs[i] = regular_expression_passes(mREs[i]);
104    }
105    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
106
107}
108
109
110parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
111//    auto mGrepDriver = &mPxDriver;
112    auto & idb = mPxDriver.getBuilder();
113    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
114    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
115    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
116    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
117    return LineFeedStream;
118}
119
120parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithByteStreamAioApproach(
121        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
122    auto mGrepDriver = &mPxDriver;
123    auto & b = mGrepDriver->getBuilder();
124
125    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
126
127    StreamSetBuffer * const mtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b));
128    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(b, cc::BitNumbering::BigEndian, prefix, numberOfStream);
129    mPxDriver.makeKernelCall(p2sK, {compressedBitStream}, {mtxByteStream});
130
131    StreamSetBuffer * const decompressionMtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b), 1);
132    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(b, true);
133    lz4AioK->setInitialArguments({mFileSize});
134    mPxDriver.makeKernelCall(
135            lz4AioK,
136            {
137                    mCompressedByteStream,
138                    // Block Data
139                    blockInfo.isCompress,
140                    blockInfo.blockStart,
141                    blockInfo.blockEnd,
142                    mtxByteStream
143            }, {
144                    decompressionMtxByteStream
145            });
146
147    StreamSetBuffer * const decompressedMtxBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8), this->getDecompressedBufferBlocks(b));
148
149    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(b, cc::BitNumbering::BigEndian, true, prefix, numberOfStream);
150    mPxDriver.makeKernelCall(s2pk, {decompressionMtxByteStream}, {decompressedMtxBitStream});
151
152    return decompressedMtxBitStream;
153}
154
155StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledAioApproach(
156        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
157    auto mGrepDriver = &mPxDriver;
158    auto & b = mGrepDriver->getBuilder();
159
160    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
161
162    // Produce unswizzled bit streams
163    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
164    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "source");
165    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
166
167    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
168
169
170    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(b, 4, 1, 4);
171    lz4AioK->setInitialArguments({mFileSize});
172    mPxDriver.makeKernelCall(
173            lz4AioK,
174            {
175                    mCompressedByteStream,
176//                    Extenders,
177
178                    // Block Data
179                    blockInfo.isCompress,
180                    blockInfo.blockStart,
181                    blockInfo.blockEnd,
182
183                    u16Swizzle0,
184            }, {
185                    decompressedSwizzled0,
186            });
187
188
189
190    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(b));
191    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "dst");
192    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
193
194    return decompressionBitStream;
195
196}
197
198StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
199    auto mGrepDriver = &mPxDriver;
200    auto & idb = mGrepDriver->getBuilder();
201
202    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
203    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
204    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
205
206    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
207    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
208    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
209
210    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
211    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
212    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
213
214    // Produce unswizzled bit streams
215    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
216    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
217    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
218
219    return matchCopiedBits;
220}
221parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
222    if (numberOfStream == 4) {
223        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
224    }
225
226    auto mGrepDriver = &mPxDriver;
227    auto & idb = mGrepDriver->getBuilder();
228
229    // Extract (Deletion)
230    this->generateCompressionMarker(idb);
231
232    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
233    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
234
235    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
236    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
237
238    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
239    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
240    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
241
242    // Deposit
243    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
244    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
245    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
246
247    // Match Copy
248    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
249    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
250    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
251
252    return matchCopiedBits;
253}
254
255parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
256    auto mGrepDriver = &mPxDriver;
257    auto & idb = mGrepDriver->getBuilder();
258    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
259
260    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
261    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
262    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
263    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
264}
265
266void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
267    this->initREs(REs);
268    auto mGrepDriver = &mPxDriver;
269
270    auto & idb = mGrepDriver->getBuilder();
271    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
272    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
273
274
275    //  Regular Expression Processing and Analysis Phase
276    const auto nREs = mREs.size();
277
278    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
279
280    std::map<std::string, StreamSetBuffer *> propertyStream;
281
282    std::vector<std::string> externalStreamNames;
283    std::set<re::Name *> UnicodeProperties;
284
285    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
286    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
287
288    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
289    mREs[0] = transformCCs(mpx.get(), mREs[0]);
290    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
291    auto numOfCharacterClasses = mpx_basis.size();
292    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
293
294    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
295    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
296
297    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
298    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
299    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
300
301    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
302    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
303    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
304    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithSwizzledAioApproach(
305            combinedStream, 1 + numOfCharacterClasses, "combined");
306
307    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
308    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
309    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
310    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
311
312
313
314    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
315    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
316    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
317
318    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
319    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
320    MatchResultsBufs[0] = MatchResults;
321
322    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
323    if (mREs.size() > 1) {
324        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
325        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
326        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
327    }
328    StreamSetBuffer * Matches = MergedResults;
329    if (mMoveMatchesToEOL) {
330        StreamSetBuffer * OriginalMatches = Matches;
331        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
332        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
333        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
334    }
335
336//    if (MaxCountFlag > 0) {
337//        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
338//        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
339//        StreamSetBuffer * const AllMatches = Matches;
340//        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
341//        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
342//    }
343
344//    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
345
346};
347std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled, bool useByteStream) {
348
349    this->initREs(REs);
350    auto mGrepDriver = &mPxDriver;
351
352    auto & idb = mGrepDriver->getBuilder();
353    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
354    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
355    int MaxCountFlag = 0;
356
357    //  Regular Expression Processing and Analysis Phase
358    const auto nREs = mREs.size();
359
360    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
361
362
363    std::map<std::string, StreamSetBuffer *> propertyStream;
364
365    std::vector<std::string> externalStreamNames;
366    std::set<re::Name *> UnicodeProperties;
367
368    re::CC* linefeedCC = re::makeCC(0x0A);
369
370    re::Seq* seq = re::makeSeq();
371    seq->push_back(mREs[0]);
372    seq->push_back(std::move(linefeedCC));
373
374
375    const auto UnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
376    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
377
378    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
379    mREs[0] = transformCCs(mpx.get(), mREs[0]);
380    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
381    auto numOfCharacterClasses = mpx_basis.size();
382    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
383
384    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
385    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
386
387    StreamSetBuffer * decompressedCharClasses = nullptr;
388    if (useSwizzled) {
389        if (useAio) {
390            decompressedCharClasses = this->convertCompressedBitsStreamWithSwizzledAioApproach(CharClasses, numOfCharacterClasses, "combined");
391        } else {
392            decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "combined");
393        }
394    } else if (useByteStream){
395        decompressedCharClasses = this->convertCompressedBitsStreamWithByteStreamAioApproach(CharClasses, numOfCharacterClasses, "combined");
396    } else {
397        auto ret = this->convertCompressedBitsStreamWithBitStreamAioApproach({CharClasses}, "combined");
398        decompressedCharClasses = ret[0];
399    }
400
401    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
402    StreamSetBuffer * u8NoFinalStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), this->getInputBufferBlocks(idb), 1);
403
404    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, std::vector<unsigned>({8, 1}));
405    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits, u8NoFinalStream});
406
407    StreamSetBuffer * LineBreakStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb));
408    kernel::Kernel * lineFeedGrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, transformCCs(mpx.get(), linefeedCC), externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
409    mGrepDriver->makeKernelCall(lineFeedGrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {LineBreakStream});
410
411
412    externalStreamNames.push_back("UTF8_nonfinal");
413
414    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
415    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, u8NoFinalStream, decompressedCharClasses}, {MatchResults});
416    MatchResultsBufs[0] = MatchResults;
417
418    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
419    if (mREs.size() > 1) {
420        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
421        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
422        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
423    }
424    StreamSetBuffer * Matches = MergedResults;
425    if (mMoveMatchesToEOL) {
426        StreamSetBuffer * OriginalMatches = Matches;
427        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
428        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
429        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
430    }
431
432    if (MaxCountFlag > 0) {
433        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
434        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
435        StreamSetBuffer * const AllMatches = Matches;
436        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
437        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
438    }
439
440    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
441};
442
443std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
444        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
445
446    this->initREs(REs);
447    auto mGrepDriver = &mPxDriver;
448
449    auto & idb = mGrepDriver->getBuilder();
450    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
451    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
452    int MaxCountFlag = 0;
453
454    //  Regular Expression Processing and Analysis Phase
455    const auto nREs = mREs.size();
456
457    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
458
459    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
460
461
462    std::map<std::string, StreamSetBuffer *> propertyStream;
463
464    for(unsigned i = 0; i < nREs; ++i) {
465        std::vector<std::string> externalStreamNames;
466        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
467
468        if (mEnableMultiplexing) {
469            const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
470            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
471
472            mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
473            mREs[i] = transformCCs(mpx.get(), mREs[i]);
474            std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
475            auto numOfCharacterClasses = mpx_basis.size();
476            StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
477            kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
478            mGrepDriver->makeKernelCall(ccK, {decompressedBasisBits}, {CharClasses});
479
480            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
481            icgrepInputSets.push_back(CharClasses);
482            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
483            MatchResultsBufs[i] = MatchResults;
484        } else {
485            std::set<re::Name *> UnicodeProperties;
486
487            StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
488            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
489            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
490            MatchResultsBufs[i] = MatchResults;
491        }
492    }
493
494    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
495    if (mREs.size() > 1) {
496        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
497        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
498        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
499    }
500    StreamSetBuffer * Matches = MergedResults;
501    if (mMoveMatchesToEOL) {
502        StreamSetBuffer * OriginalMatches = Matches;
503        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
504        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
505        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
506    }
507
508    if (MaxCountFlag > 0) {
509        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
510        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
511        StreamSetBuffer * const AllMatches = Matches;
512        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
513        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
514    }
515
516    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
517
518}
519
520void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
521    auto main = this->getScanMatchGrepMainFunction();
522    std::ostringstream s;
523    EmitMatch accum("", false, false, s);
524
525    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
526    llvm::outs() << s.str();
527}
528
529void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
530    auto & iBuilder = mPxDriver.getBuilder();
531    this->generateScanMatchMainFunc(iBuilder);
532
533    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
534
535    // GeneratePipeline
536    this->generateLoadByteStreamAndBitStream(iBuilder);
537    this->generateExtractAndDepositMarkers(iBuilder);
538
539    auto swizzle = this->generateSwizzleExtractData(iBuilder);
540
541    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
542    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
543
544    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
545    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
546
547    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
548    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
549
550    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
551    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
552
553    // Produce unswizzled bit streams
554    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
555    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
556    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
557
558    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
559    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
560
561    StreamSetBuffer * LineBreakStream;
562    StreamSetBuffer * Matches;
563    std::vector<re::RE*> res = {regex};
564    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
565
566    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
567    scanMatchK->setInitialArguments({match_accumulator});
568    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
569    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
570    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
571
572    mPxDriver.generatePipelineIR();
573    mPxDriver.deallocateBuffers();
574
575    iBuilder->CreateRetVoid();
576
577    mPxDriver.finalizeObject();
578}
579
580void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE *regex) {
581    auto & iBuilder = mPxDriver.getBuilder();
582    this->generateCountOnlyMainFunc(iBuilder);
583
584    // GeneratePipeline
585    this->generateLoadByteStreamAndBitStream(iBuilder);
586//    this->generateExtractAndDepositMarkers(iBuilder);
587
588    StreamSetBuffer * LineBreakStream;
589    StreamSetBuffer * Matches;
590    std::vector<re::RE*> res = {regex};
591    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
592
593    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
594    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
595    mPxDriver.generatePipelineIR();
596
597    iBuilder->setKernel(matchCountK);
598    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
599    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
600
601    mPxDriver.deallocateBuffers();
602
603    iBuilder->CreateRet(matchedLineCount);
604
605
606    mPxDriver.finalizeObject();
607}
608
609void LZ4GrepGenerator::generateByteStreamMultiplexingAioPipeline(re::RE* regex) {
610    auto & iBuilder = mPxDriver.getBuilder();
611    this->generateCountOnlyMainFunc(iBuilder);
612
613    this->generateLoadByteStreamAndBitStream(iBuilder);
614    StreamSetBuffer * LineBreakStream;
615    StreamSetBuffer * Matches;
616    std::vector<re::RE*> res = {regex};
617    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false, true);
618
619    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
620    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
621    mPxDriver.generatePipelineIR();
622
623    iBuilder->setKernel(matchCountK);
624    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
625    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
626
627    mPxDriver.deallocateBuffers();
628
629    iBuilder->CreateRet(matchedLineCount);
630
631    mPxDriver.finalizeObject();
632}
633
634
635void LZ4GrepGenerator::generateMultiplexingBitStreamAioPipeline(re::RE* regex) {
636    auto & iBuilder = mPxDriver.getBuilder();
637    this->generateCountOnlyMainFunc(iBuilder);
638
639    this->generateLoadByteStreamAndBitStream(iBuilder);
640    StreamSetBuffer * LineBreakStream;
641    StreamSetBuffer * Matches;
642    std::vector<re::RE*> res = {regex};
643    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false);
644
645    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
646    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
647    mPxDriver.generatePipelineIR();
648
649    iBuilder->setKernel(matchCountK);
650    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
651    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
652
653    mPxDriver.deallocateBuffers();
654
655    iBuilder->CreateRet(matchedLineCount);
656
657    mPxDriver.finalizeObject();
658}
659
660void LZ4GrepGenerator::generateBitStreamAioPipeline(re::RE* regex) {
661    auto & iBuilder = mPxDriver.getBuilder();
662    this->generateCountOnlyMainFunc(iBuilder);
663
664    // GeneratePipeline
665    this->generateLoadByteStreamAndBitStream(iBuilder);
666    StreamSetBuffer * const decompressionBitStream = this->generateBitStreamAIODecompression(iBuilder);
667
668    StreamSetBuffer * LineBreakStream;
669    StreamSetBuffer * Matches;
670    std::vector<re::RE*> res = {regex};
671    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
672
673    /*
674    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
675    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
676    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
677
678    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
679    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
680    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
681    */
682    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
683
684    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
685    mPxDriver.generatePipelineIR();
686
687    iBuilder->setKernel(matchCountK);
688    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
689    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
690    mPxDriver.deallocateBuffers();
691
692    iBuilder->CreateRet(matchedLineCount);
693
694    mPxDriver.finalizeObject();
695}
696
697void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
698    auto & iBuilder = mPxDriver.getBuilder();
699    this->generateCountOnlyMainFunc(iBuilder);
700
701    // GeneratePipeline
702    this->generateLoadByteStreamAndBitStream(iBuilder);
703
704    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
705
706    StreamSetBuffer * LineBreakStream;
707    StreamSetBuffer * Matches;
708    std::vector<re::RE*> res = {regex};
709    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
710/*
711    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
712    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
713    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
714
715    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
716    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
717    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
718*/
719    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
720
721    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
722    mPxDriver.generatePipelineIR();
723
724    iBuilder->setKernel(matchCountK);
725    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
726    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
727    mPxDriver.deallocateBuffers();
728
729    iBuilder->CreateRet(matchedLineCount);
730
731    mPxDriver.finalizeObject();
732}
733
734void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel) {
735    auto & iBuilder = mPxDriver.getBuilder();
736    this->generateCountOnlyMainFunc(iBuilder);
737
738    this->generateLoadByteStream(iBuilder);
739    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter, minParallelLevel);
740
741
742    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
743    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
744    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
745
746
747    StreamSetBuffer * LineBreakStream;
748    StreamSetBuffer * Matches;
749    std::vector<re::RE*> res = {regex};
750    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
751
752
753//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
754//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
755//    mPxDriver.makeKernelCall(outK, {decompressedByteStream}, {});
756
757    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
758    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
759    mPxDriver.generatePipelineIR();
760
761    iBuilder->setKernel(matchCountK);
762    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
763    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
764
765    mPxDriver.deallocateBuffers();
766
767    iBuilder->CreateRet(matchedLineCount);
768
769    mPxDriver.finalizeObject();
770}
771
772
773
774void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
775    auto & iBuilder = mPxDriver.getBuilder();
776    this->generateCountOnlyMainFunc(iBuilder);
777
778    // GeneratePipeline
779//    this->generateLoadByteStream(iBuilder);
780    this->generateLoadByteStreamAndBitStream(iBuilder);
781
782    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
783
784
785    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
786    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
787    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
788
789
790    StreamSetBuffer * LineBreakStream;
791    StreamSetBuffer * Matches;
792    std::vector<re::RE*> res = {regex};
793    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
794
795
796//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
797//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
798//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
799
800    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
801    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
802    mPxDriver.generatePipelineIR();
803
804    iBuilder->setKernel(matchCountK);
805    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
806    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
807
808    mPxDriver.deallocateBuffers();
809
810    iBuilder->CreateRet(matchedLineCount);
811
812    mPxDriver.finalizeObject();
813
814}
815
816
817void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
818    auto & iBuilder = mPxDriver.getBuilder();
819    this->generateCountOnlyMainFunc(iBuilder);
820
821    // GeneratePipeline
822    this->generateLoadByteStreamAndBitStream(iBuilder);
823    this->generateExtractAndDepositMarkers(iBuilder);
824
825    StreamSetBuffer * LineBreakStream;
826    StreamSetBuffer * Matches;
827    std::vector<re::RE*> res = {regex};
828    if (mEnableMultiplexing) {
829        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
830    } else {
831        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
832        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
833        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
834        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
835
836        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
837        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
838        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
839
840        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
841    };
842
843    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
844    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
845    mPxDriver.generatePipelineIR();
846
847    iBuilder->setKernel(matchCountK);
848    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
849    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
850
851    mPxDriver.deallocateBuffers();
852
853    iBuilder->CreateRet(matchedLineCount);
854
855    mPxDriver.finalizeObject();
856}
857
858
859void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
860    auto & iBuilder = mPxDriver.getBuilder();
861    this->generateCountOnlyMainFunc(iBuilder);
862
863
864    // GeneratePipeline
865    this->generateLoadByteStreamAndBitStream(iBuilder);
866    this->generateExtractAndDepositMarkers(iBuilder);
867
868
869    StreamSetBuffer * LineBreakStream;
870    StreamSetBuffer * Matches;
871    std::vector<re::RE*> res = {regex};
872    if (mEnableMultiplexing) {
873        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
874    } else {
875        auto swizzle = this->generateSwizzleExtractData(iBuilder);
876
877        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
878        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
879
880        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
881        mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
882
883
884        // split PDEP into 2 kernel will be a little slower in single thread environment
885/*
886    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
887    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
888
889    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
890    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
891*/
892
893        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
894        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
895
896        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
897        mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
898
899        // Produce unswizzled bit streams
900        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
901        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
902        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
903
904
905        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
906    };
907
908    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
909    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
910    mPxDriver.generatePipelineIR();
911
912    iBuilder->setKernel(matchCountK);
913    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
914    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
915
916    mPxDriver.deallocateBuffers();
917
918    iBuilder->CreateRet(matchedLineCount);
919
920    mPxDriver.finalizeObject();
921}
922
923ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
924    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
925}
926CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
927    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
928}
929
930void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
931    Module * M = iBuilder->getModule();
932    Type * const int64Ty = iBuilder->getInt64Ty();
933    Type * const sizeTy = iBuilder->getSizeTy();
934    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
935//    Type * const voidTy = iBuilder->getVoidTy();
936    Type * const inputType = iBuilder->getInt8PtrTy();
937
938    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
939    main->setCallingConv(CallingConv::C);
940    Function::arg_iterator args = main->arg_begin();
941    mInputStream = &*(args++);
942    mInputStream->setName("input");
943
944    mHeaderSize = &*(args++);
945    mHeaderSize->setName("mHeaderSize");
946
947    mFileSize = &*(args++);
948    mFileSize->setName("mFileSize");
949
950    mHasBlockChecksum = &*(args++);
951    mHasBlockChecksum->setName("mHasBlockChecksum");
952    // TODO for now, we do not handle blockCheckSum
953    mHasBlockChecksum = iBuilder->getInt1(false);
954
955    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
956}
957
958void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
959    Module * M = iBuilder->getModule();
960    Type * const sizeTy = iBuilder->getSizeTy();
961    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
962    Type * const voidTy = iBuilder->getVoidTy();
963    Type * const inputType = iBuilder->getInt8PtrTy();
964    Type * const intAddrTy = iBuilder->getIntAddrTy();
965
966    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
967    main->setCallingConv(CallingConv::C);
968    Function::arg_iterator args = main->arg_begin();
969    mInputStream = &*(args++);
970    mInputStream->setName("input");
971
972    mHeaderSize = &*(args++);
973    mHeaderSize->setName("mHeaderSize");
974
975    mFileSize = &*(args++);
976    mFileSize->setName("mFileSize");
977
978    mHasBlockChecksum = &*(args++);
979    mHasBlockChecksum->setName("mHasBlockChecksum");
980
981    match_accumulator = &*(args++);
982    match_accumulator->setName("match_accumulator");
983
984    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
985}
Note: See TracBrowser for help on using the repository browser.