source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6132

Last change on this file since 6132 was 6132, checked in by xwa163, 10 months ago
  1. More experiment on lz4 grep
  2. Improve performance of lzparabix grep
File size: 47.0 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/lz4/aio/lz4_i4_bytestream_aio.h>
25#include <kernels/fake_stream_generating_kernel.h>
26#include <kernels/bitstream_pdep_kernel.h>
27#include <kernels/bitstream_gather_pdep_kernel.h>
28#include <re/re_toolchain.h>
29
30#include <re/collect_ccs.h>
31#include <re/replaceCC.h>
32
33#include <re/casing.h>
34#include <re/exclude_CC.h>
35#include <re/to_utf8.h>
36#include <re/re_analysis.h>
37#include <re/re_name_resolve.h>
38#include <re/re_name_gather.h>
39#include <re/re_multiplex.h>
40#include <re/re_utility.h>
41
42#include <UCD/resolve_properties.h>
43#include <kernels/charclasses.h>
44#include <kernels/grep_kernel.h>
45#include <kernels/UCD_property_kernel.h>
46#include <kernels/grapheme_kernel.h>
47#include <kernels/linebreak_kernel.h>
48#include <kernels/streams_merge.h>
49#include <kernels/scanmatchgen.h>
50#include <kernels/until_n.h>
51#include <re/casing.h>
52#include <re/exclude_CC.h>
53#include <re/to_utf8.h>
54#include <re/re_analysis.h>
55#include <re/re_name_resolve.h>
56#include <re/re_name_gather.h>
57#include <re/re_multiplex.h>
58#include <re/re_utility.h>
59#include <re/grapheme_clusters.h>
60#include <re/printer_re.h>
61#include <llvm/Support/raw_ostream.h>
62#include <llvm/Support/Debug.h>
63#include <kernels/lz4/lz4_block_decoder.h>
64#include <kernels/lz4/aio/lz4_swizzled_aio.h>
65#include <kernels/lz4/aio/lz4_bitstream_aio.h>
66#include <re/re_seq.h>
67#include <kernels/lz4/aio/lz4_bytestream_aio.h>
68
69namespace re { class CC; }
70
71using namespace llvm;
72using namespace parabix;
73using namespace kernel;
74using namespace grep;
75
76LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
77    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
78    mMoveMatchesToEOL = true;
79}
80
81void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
82    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
83        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
84    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
85        mBreakCC = re::makeByte(0);  // Null
86    } else {
87        mBreakCC = re::makeByte(0x0A); // LF
88    }
89    re::RE * anchorRE = mBreakCC;
90    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
91        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
92        anchorName->setDefinition(re::makeUnicodeBreak());
93        anchorRE = anchorName;
94    }
95
96    mREs = REs;
97    bool allAnchored = true;
98    for(unsigned i = 0; i < mREs.size(); ++i) {
99        if (!hasEndAnchor(mREs[i])) allAnchored = false;
100        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
101        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
102        mREs[i] = resolveAnchors(mREs[i], anchorRE);
103        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
104        mREs[i] = regular_expression_passes(mREs[i]);
105    }
106    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
107
108}
109
110
111parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
112//    auto mGrepDriver = &mPxDriver;
113    auto & idb = mPxDriver.getBuilder();
114    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
115    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
116    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
117    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
118    return LineFeedStream;
119}
120
121parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithByteStreamAioApproach(
122        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
123    auto mGrepDriver = &mPxDriver;
124    auto & b = mGrepDriver->getBuilder();
125
126    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
127
128    StreamSetBuffer * const mtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b));
129    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(b, cc::BitNumbering::BigEndian, prefix, numberOfStream);
130    mPxDriver.makeKernelCall(p2sK, {compressedBitStream}, {mtxByteStream});
131
132    StreamSetBuffer * const decompressionMtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b), 1);
133    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(b, true);
134    lz4AioK->setInitialArguments({mFileSize});
135    mPxDriver.makeKernelCall(
136            lz4AioK,
137            {
138                    mCompressedByteStream,
139                    // Block Data
140                    blockInfo.isCompress,
141                    blockInfo.blockStart,
142                    blockInfo.blockEnd,
143                    mtxByteStream
144            }, {
145                    decompressionMtxByteStream
146            });
147
148    StreamSetBuffer * const decompressedMtxBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8), this->getDecompressedBufferBlocks(b));
149
150    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(b, cc::BitNumbering::BigEndian, true, prefix, numberOfStream);
151    mPxDriver.makeKernelCall(s2pk, {decompressionMtxByteStream}, {decompressedMtxBitStream});
152
153    return decompressedMtxBitStream;
154}
155
156StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledAioApproach(
157        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
158    auto mGrepDriver = &mPxDriver;
159    auto & b = mGrepDriver->getBuilder();
160
161    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
162
163    // Produce unswizzled bit streams
164    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
165    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "source");
166    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
167
168    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
169
170
171    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(b, 4, 1, 4);
172    lz4AioK->setInitialArguments({mFileSize});
173    mPxDriver.makeKernelCall(
174            lz4AioK,
175            {
176                    mCompressedByteStream,
177//                    Extenders,
178
179                    // Block Data
180                    blockInfo.isCompress,
181                    blockInfo.blockStart,
182                    blockInfo.blockEnd,
183
184                    u16Swizzle0,
185            }, {
186                    decompressedSwizzled0,
187            });
188
189
190
191    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(b));
192    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "dst");
193    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
194
195    return decompressionBitStream;
196
197}
198
199StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
200    auto mGrepDriver = &mPxDriver;
201    auto & idb = mGrepDriver->getBuilder();
202
203    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
204    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
205    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
206
207    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
208    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
209    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
210
211    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
212    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
213    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
214
215    // Produce unswizzled bit streams
216    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
217    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
218    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
219
220    return matchCopiedBits;
221}
222parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
223    if (numberOfStream == 4) {
224        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
225    }
226
227    auto mGrepDriver = &mPxDriver;
228    auto & idb = mGrepDriver->getBuilder();
229
230    // Extract (Deletion)
231    this->generateCompressionMarker(idb);
232
233    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
234    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
235
236    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
237    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
238
239    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
240    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
241    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
242
243    // Deposit
244    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
245    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
246    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
247
248    // Match Copy
249    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
250    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
251    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
252
253    return matchCopiedBits;
254}
255
256parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
257    auto mGrepDriver = &mPxDriver;
258    auto & idb = mGrepDriver->getBuilder();
259    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
260
261    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
262    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
263    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
264    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
265}
266
267void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
268    this->initREs(REs);
269    auto mGrepDriver = &mPxDriver;
270
271    auto & idb = mGrepDriver->getBuilder();
272    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
273    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
274
275
276    //  Regular Expression Processing and Analysis Phase
277    const auto nREs = mREs.size();
278
279    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
280
281    std::map<std::string, StreamSetBuffer *> propertyStream;
282
283    std::vector<std::string> externalStreamNames;
284    std::set<re::Name *> UnicodeProperties;
285
286    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
287    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
288
289    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
290    mREs[0] = transformCCs(mpx.get(), mREs[0]);
291    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
292    auto numOfCharacterClasses = mpx_basis.size();
293    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
294
295    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
296    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
297
298    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
299    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
300    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
301
302    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
303    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
304    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
305    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithSwizzledAioApproach(
306            combinedStream, 1 + numOfCharacterClasses, "combined");
307
308    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
309    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
310    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
311    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
312
313
314
315    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
316    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
317    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
318
319    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
320    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
321    MatchResultsBufs[0] = MatchResults;
322
323    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
324    if (mREs.size() > 1) {
325        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
326        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
327        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
328    }
329    StreamSetBuffer * Matches = MergedResults;
330    if (mMoveMatchesToEOL) {
331        StreamSetBuffer * OriginalMatches = Matches;
332        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
333        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
334        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
335    }
336
337//    if (MaxCountFlag > 0) {
338//        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
339//        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
340//        StreamSetBuffer * const AllMatches = Matches;
341//        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
342//        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
343//    }
344
345//    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
346
347};
348std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled, bool useByteStream) {
349
350    this->initREs(REs);
351    auto mGrepDriver = &mPxDriver;
352
353    auto & idb = mGrepDriver->getBuilder();
354    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
355    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
356    int MaxCountFlag = 0;
357
358    //  Regular Expression Processing and Analysis Phase
359    const auto nREs = mREs.size();
360
361    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
362
363
364    std::map<std::string, StreamSetBuffer *> propertyStream;
365
366    std::vector<std::string> externalStreamNames;
367    std::set<re::Name *> UnicodeProperties;
368
369    re::CC* linefeedCC = re::makeCC(0x0A);
370
371    re::Seq* seq = re::makeSeq();
372    seq->push_back(mREs[0]);
373    seq->push_back(std::move(linefeedCC));
374
375
376    const auto UnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
377    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
378
379    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
380    mREs[0] = transformCCs(mpx.get(), mREs[0]);
381    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
382    auto numOfCharacterClasses = mpx_basis.size();
383    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
384
385    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
386    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
387
388    StreamSetBuffer * decompressedCharClasses = nullptr;
389    if (useSwizzled) {
390        if (useAio) {
391            decompressedCharClasses = this->convertCompressedBitsStreamWithSwizzledAioApproach(CharClasses, numOfCharacterClasses, "combined");
392        } else {
393            decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "combined");
394        }
395    } else if (useByteStream){
396        decompressedCharClasses = this->convertCompressedBitsStreamWithByteStreamAioApproach(CharClasses, numOfCharacterClasses, "combined");
397    } else {
398        auto ret = this->convertCompressedBitsStreamWithBitStreamAioApproach({CharClasses}, "combined");
399        decompressedCharClasses = ret[0];
400    }
401
402    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
403    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
404    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
405
406    StreamSetBuffer * LineBreakStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb));
407    kernel::Kernel * lineFeedGrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, transformCCs(mpx.get(), linefeedCC), externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
408    mGrepDriver->makeKernelCall(lineFeedGrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {LineBreakStream});
409
410
411    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
412    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
413    MatchResultsBufs[0] = MatchResults;
414
415    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
416    if (mREs.size() > 1) {
417        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
418        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
419        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
420    }
421    StreamSetBuffer * Matches = MergedResults;
422    if (mMoveMatchesToEOL) {
423        StreamSetBuffer * OriginalMatches = Matches;
424        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
425        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
426        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
427    }
428
429    if (MaxCountFlag > 0) {
430        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
431        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
432        StreamSetBuffer * const AllMatches = Matches;
433        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
434        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
435    }
436
437    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
438};
439
440std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
441        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
442
443    this->initREs(REs);
444    auto mGrepDriver = &mPxDriver;
445
446    auto & idb = mGrepDriver->getBuilder();
447    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
448    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
449    int MaxCountFlag = 0;
450
451    //  Regular Expression Processing and Analysis Phase
452    const auto nREs = mREs.size();
453
454    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
455
456    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
457
458
459    std::map<std::string, StreamSetBuffer *> propertyStream;
460
461    for(unsigned i = 0; i < nREs; ++i) {
462        std::vector<std::string> externalStreamNames;
463        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
464
465        if (mEnableMultiplexing) {
466            const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
467            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
468
469            mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
470            mREs[i] = transformCCs(mpx.get(), mREs[i]);
471            std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
472            auto numOfCharacterClasses = mpx_basis.size();
473            StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
474            kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
475            mGrepDriver->makeKernelCall(ccK, {decompressedBasisBits}, {CharClasses});
476
477            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
478            icgrepInputSets.push_back(CharClasses);
479            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
480            MatchResultsBufs[i] = MatchResults;
481        } else {
482            std::set<re::Name *> UnicodeProperties;
483
484            StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
485            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
486            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
487            MatchResultsBufs[i] = MatchResults;
488        }
489    }
490
491    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
492    if (mREs.size() > 1) {
493        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
494        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
495        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
496    }
497    StreamSetBuffer * Matches = MergedResults;
498    if (mMoveMatchesToEOL) {
499        StreamSetBuffer * OriginalMatches = Matches;
500        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
501        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
502        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
503    }
504
505    if (MaxCountFlag > 0) {
506        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
507        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
508        StreamSetBuffer * const AllMatches = Matches;
509        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
510        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
511    }
512
513    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
514
515}
516
517void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
518    auto main = this->getScanMatchGrepMainFunction();
519    std::ostringstream s;
520    EmitMatch accum("", false, false, s);
521
522    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
523    llvm::outs() << s.str();
524}
525
526void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
527    auto & iBuilder = mPxDriver.getBuilder();
528    this->generateScanMatchMainFunc(iBuilder);
529
530    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
531
532    // GeneratePipeline
533    this->generateLoadByteStreamAndBitStream(iBuilder);
534    this->generateExtractAndDepositMarkers(iBuilder);
535
536    auto swizzle = this->generateSwizzleExtractData(iBuilder);
537
538    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
539    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
540
541    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
542    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
543
544    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
545    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
546
547    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
548    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
549
550    // Produce unswizzled bit streams
551    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
552    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
553    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
554
555    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
556    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
557
558    StreamSetBuffer * LineBreakStream;
559    StreamSetBuffer * Matches;
560    std::vector<re::RE*> res = {regex};
561    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
562
563    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
564    scanMatchK->setInitialArguments({match_accumulator});
565    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
566    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
567    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
568
569    mPxDriver.generatePipelineIR();
570    mPxDriver.deallocateBuffers();
571
572    iBuilder->CreateRetVoid();
573
574    mPxDriver.finalizeObject();
575}
576
577void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE *regex) {
578    auto & iBuilder = mPxDriver.getBuilder();
579    this->generateCountOnlyMainFunc(iBuilder);
580
581    // GeneratePipeline
582    this->generateLoadByteStreamAndBitStream(iBuilder);
583//    this->generateExtractAndDepositMarkers(iBuilder);
584
585    StreamSetBuffer * LineBreakStream;
586    StreamSetBuffer * Matches;
587    std::vector<re::RE*> res = {regex};
588    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
589
590    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
591    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
592    mPxDriver.generatePipelineIR();
593
594    iBuilder->setKernel(matchCountK);
595    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
596    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
597
598    mPxDriver.deallocateBuffers();
599
600    iBuilder->CreateRet(matchedLineCount);
601
602
603    mPxDriver.finalizeObject();
604}
605
606void LZ4GrepGenerator::generateByteStreamMultiplexingAioPipeline(re::RE* regex) {
607    auto & iBuilder = mPxDriver.getBuilder();
608    this->generateCountOnlyMainFunc(iBuilder);
609
610    this->generateLoadByteStreamAndBitStream(iBuilder);
611    StreamSetBuffer * LineBreakStream;
612    StreamSetBuffer * Matches;
613    std::vector<re::RE*> res = {regex};
614    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false, true);
615
616    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
617    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
618    mPxDriver.generatePipelineIR();
619
620    iBuilder->setKernel(matchCountK);
621    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
622    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
623
624    mPxDriver.deallocateBuffers();
625
626    iBuilder->CreateRet(matchedLineCount);
627
628    mPxDriver.finalizeObject();
629}
630
631
632void LZ4GrepGenerator::generateMultiplexingBitStreamAioPipeline(re::RE* regex) {
633    auto & iBuilder = mPxDriver.getBuilder();
634    this->generateCountOnlyMainFunc(iBuilder);
635
636    this->generateLoadByteStreamAndBitStream(iBuilder);
637    StreamSetBuffer * LineBreakStream;
638    StreamSetBuffer * Matches;
639    std::vector<re::RE*> res = {regex};
640    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false);
641
642    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
643    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
644    mPxDriver.generatePipelineIR();
645
646    iBuilder->setKernel(matchCountK);
647    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
648    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
649
650    mPxDriver.deallocateBuffers();
651
652    iBuilder->CreateRet(matchedLineCount);
653
654    mPxDriver.finalizeObject();
655}
656
657void LZ4GrepGenerator::generateBitStreamAioPipeline(re::RE* regex) {
658    auto & iBuilder = mPxDriver.getBuilder();
659    this->generateCountOnlyMainFunc(iBuilder);
660
661    // GeneratePipeline
662    this->generateLoadByteStreamAndBitStream(iBuilder);
663    StreamSetBuffer * const decompressionBitStream = this->generateBitStreamAIODecompression(iBuilder);
664
665    StreamSetBuffer * LineBreakStream;
666    StreamSetBuffer * Matches;
667    std::vector<re::RE*> res = {regex};
668    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
669
670    /*
671    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
672    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
673    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
674
675    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
676    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
677    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
678    */
679    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
680
681    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
682    mPxDriver.generatePipelineIR();
683
684    iBuilder->setKernel(matchCountK);
685    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
686    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
687    mPxDriver.deallocateBuffers();
688
689    iBuilder->CreateRet(matchedLineCount);
690
691    mPxDriver.finalizeObject();
692}
693
694void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
695    auto & iBuilder = mPxDriver.getBuilder();
696    this->generateCountOnlyMainFunc(iBuilder);
697
698    // GeneratePipeline
699    this->generateLoadByteStreamAndBitStream(iBuilder);
700
701    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
702
703    StreamSetBuffer * LineBreakStream;
704    StreamSetBuffer * Matches;
705    std::vector<re::RE*> res = {regex};
706    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
707/*
708    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
709    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
710    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
711
712    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
713    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
714    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
715*/
716    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
717
718    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
719    mPxDriver.generatePipelineIR();
720
721    iBuilder->setKernel(matchCountK);
722    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
723    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
724    mPxDriver.deallocateBuffers();
725
726    iBuilder->CreateRet(matchedLineCount);
727
728    mPxDriver.finalizeObject();
729}
730
731void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel) {
732    auto & iBuilder = mPxDriver.getBuilder();
733    this->generateCountOnlyMainFunc(iBuilder);
734
735    this->generateLoadByteStream(iBuilder);
736    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter, minParallelLevel);
737
738
739    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
740    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
741    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
742
743
744    StreamSetBuffer * LineBreakStream;
745    StreamSetBuffer * Matches;
746    std::vector<re::RE*> res = {regex};
747    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
748
749
750//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
751//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
752//    mPxDriver.makeKernelCall(outK, {decompressedByteStream}, {});
753
754    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
755    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
756    mPxDriver.generatePipelineIR();
757
758    iBuilder->setKernel(matchCountK);
759    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
760    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
761
762    mPxDriver.deallocateBuffers();
763
764    iBuilder->CreateRet(matchedLineCount);
765
766    mPxDriver.finalizeObject();
767}
768
769
770
771void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
772    auto & iBuilder = mPxDriver.getBuilder();
773    this->generateCountOnlyMainFunc(iBuilder);
774
775    // GeneratePipeline
776    this->generateLoadByteStream(iBuilder);
777//    this->generateLoadByteStreamAndBitStream(iBuilder);
778
779    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
780
781
782    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
783    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
784    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
785
786
787    StreamSetBuffer * LineBreakStream;
788    StreamSetBuffer * Matches;
789    std::vector<re::RE*> res = {regex};
790    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
791
792
793//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
794//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
795//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
796
797    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
798    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
799    mPxDriver.generatePipelineIR();
800
801    iBuilder->setKernel(matchCountK);
802    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
803    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
804
805    mPxDriver.deallocateBuffers();
806
807    iBuilder->CreateRet(matchedLineCount);
808
809    mPxDriver.finalizeObject();
810
811}
812
813
814void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
815    auto & iBuilder = mPxDriver.getBuilder();
816    this->generateCountOnlyMainFunc(iBuilder);
817
818    // GeneratePipeline
819    this->generateLoadByteStreamAndBitStream(iBuilder);
820    this->generateExtractAndDepositMarkers(iBuilder);
821
822    StreamSetBuffer * LineBreakStream;
823    StreamSetBuffer * Matches;
824    std::vector<re::RE*> res = {regex};
825    if (mEnableMultiplexing) {
826        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
827    } else {
828        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
829        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
830        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
831        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
832
833        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
834        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
835        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
836
837        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
838    };
839
840    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
841    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
842    mPxDriver.generatePipelineIR();
843
844    iBuilder->setKernel(matchCountK);
845    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
846    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
847
848    mPxDriver.deallocateBuffers();
849
850    iBuilder->CreateRet(matchedLineCount);
851
852    mPxDriver.finalizeObject();
853}
854
855
856void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
857    auto & iBuilder = mPxDriver.getBuilder();
858    this->generateCountOnlyMainFunc(iBuilder);
859
860
861    // GeneratePipeline
862    this->generateLoadByteStreamAndBitStream(iBuilder);
863    this->generateExtractAndDepositMarkers(iBuilder);
864
865
866    StreamSetBuffer * LineBreakStream;
867    StreamSetBuffer * Matches;
868    std::vector<re::RE*> res = {regex};
869    if (mEnableMultiplexing) {
870        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
871    } else {
872        auto swizzle = this->generateSwizzleExtractData(iBuilder);
873
874        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
875        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
876
877        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
878        mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
879
880
881        // split PDEP into 2 kernel will be a little slower in single thread environment
882/*
883    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
884    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
885
886    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
887    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
888*/
889
890        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
891        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
892
893        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
894        mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
895
896        // Produce unswizzled bit streams
897        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
898        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
899        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
900
901
902        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
903    };
904
905    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
906    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
907    mPxDriver.generatePipelineIR();
908
909    iBuilder->setKernel(matchCountK);
910    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
911    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
912
913    mPxDriver.deallocateBuffers();
914
915    iBuilder->CreateRet(matchedLineCount);
916
917    mPxDriver.finalizeObject();
918}
919
920ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
921    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
922}
923CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
924    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
925}
926
927void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
928    Module * M = iBuilder->getModule();
929    Type * const int64Ty = iBuilder->getInt64Ty();
930    Type * const sizeTy = iBuilder->getSizeTy();
931    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
932//    Type * const voidTy = iBuilder->getVoidTy();
933    Type * const inputType = iBuilder->getInt8PtrTy();
934
935    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
936    main->setCallingConv(CallingConv::C);
937    Function::arg_iterator args = main->arg_begin();
938    mInputStream = &*(args++);
939    mInputStream->setName("input");
940
941    mHeaderSize = &*(args++);
942    mHeaderSize->setName("mHeaderSize");
943
944    mFileSize = &*(args++);
945    mFileSize->setName("mFileSize");
946
947    mHasBlockChecksum = &*(args++);
948    mHasBlockChecksum->setName("mHasBlockChecksum");
949    // TODO for now, we do not handle blockCheckSum
950    mHasBlockChecksum = iBuilder->getInt1(false);
951
952    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
953}
954
955void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
956    Module * M = iBuilder->getModule();
957    Type * const sizeTy = iBuilder->getSizeTy();
958    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
959    Type * const voidTy = iBuilder->getVoidTy();
960    Type * const inputType = iBuilder->getInt8PtrTy();
961    Type * const intAddrTy = iBuilder->getIntAddrTy();
962
963    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
964    main->setCallingConv(CallingConv::C);
965    Function::arg_iterator args = main->arg_begin();
966    mInputStream = &*(args++);
967    mInputStream->setName("input");
968
969    mHeaderSize = &*(args++);
970    mHeaderSize->setName("mHeaderSize");
971
972    mFileSize = &*(args++);
973    mFileSize->setName("mFileSize");
974
975    mHasBlockChecksum = &*(args++);
976    mHasBlockChecksum->setName("mHasBlockChecksum");
977
978    match_accumulator = &*(args++);
979    match_accumulator->setName("match_accumulator");
980
981    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
982}
Note: See TracBrowser for help on using the repository browser.