source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6124

Last change on this file since 6124 was 6124, checked in by xwa163, 10 months ago

Minor bug fix for lzparabix grep and lz4 grep

File size: 44.9 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/fake_stream_generating_kernel.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/bitstream_gather_pdep_kernel.h>
27#include <re/re_toolchain.h>
28
29#include <re/collect_ccs.h>
30#include <re/replaceCC.h>
31
32#include <UCD/resolve_properties.h>
33#include <kernels/charclasses.h>
34#include <kernels/grep_kernel.h>
35#include <kernels/UCD_property_kernel.h>
36#include <kernels/grapheme_kernel.h>
37#include <kernels/linebreak_kernel.h>
38#include <kernels/streams_merge.h>
39#include <kernels/scanmatchgen.h>
40#include <kernels/until_n.h>
41#include <re/casing.h>
42#include <re/exclude_CC.h>
43#include <re/to_utf8.h>
44#include <re/re_analysis.h>
45#include <re/re_name_resolve.h>
46#include <re/re_name_gather.h>
47#include <re/re_multiplex.h>
48#include <re/re_utility.h>
49#include <re/grapheme_clusters.h>
50#include <re/printer_re.h>
51#include <llvm/Support/raw_ostream.h>
52#include <llvm/Support/Debug.h>
53#include <kernels/lz4/lz4_block_decoder.h>
54#include <kernels/lz4/aio/lz4_swizzled_aio.h>
55#include <kernels/lz4/aio/lz4_bitstream_aio.h>
56
57
58namespace re { class CC; }
59
60using namespace llvm;
61using namespace parabix;
62using namespace kernel;
63using namespace grep;
64
65LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
66    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
67    mMoveMatchesToEOL = true;
68}
69
70void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
71    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
72        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
73    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
74        mBreakCC = re::makeByte(0);  // Null
75    } else {
76        mBreakCC = re::makeByte(0x0A); // LF
77    }
78    re::RE * anchorRE = mBreakCC;
79    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
80        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
81        anchorName->setDefinition(re::makeUnicodeBreak());
82        anchorRE = anchorName;
83    }
84
85    mREs = REs;
86    bool allAnchored = true;
87    for(unsigned i = 0; i < mREs.size(); ++i) {
88        if (!hasEndAnchor(mREs[i])) allAnchored = false;
89        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
90        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
91        mREs[i] = resolveAnchors(mREs[i], anchorRE);
92        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
93        mREs[i] = regular_expression_passes(mREs[i]);
94    }
95    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
96
97}
98
99
100parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
101//    auto mGrepDriver = &mPxDriver;
102    auto & idb = mPxDriver.getBuilder();
103    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
104    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
105    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
106    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
107    return LineFeedStream;
108}
109
110
111
112StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledAioApproach(
113        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
114    auto mGrepDriver = &mPxDriver;
115    auto & iBuilder = mGrepDriver->getBuilder();
116
117    //// Decode Block Information
118    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
119    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
120    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
121
122    //// Generate Helper Markers Extenders, FX, XF
123//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
124//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
125//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
126//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
127
128
129    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
130    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
131    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
132
133
134    // Produce unswizzled bit streams
135    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
136    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "source");
137    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
138
139    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
140
141
142    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 1, 4);
143    lz4AioK->setInitialArguments({mFileSize});
144    mPxDriver.makeKernelCall(
145            lz4AioK,
146            {
147                    mCompressedByteStream,
148//                    Extenders,
149
150                    // Block Data
151                    BlockData_IsCompressed,
152                    BlockData_BlockStart,
153                    BlockData_BlockEnd,
154
155                    u16Swizzle0,
156            }, {
157                    decompressedSwizzled0,
158            });
159
160
161
162    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
163    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "dst");
164    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
165
166    return decompressionBitStream;
167
168}
169
170StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
171    auto mGrepDriver = &mPxDriver;
172    auto & idb = mGrepDriver->getBuilder();
173
174    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
175    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
176    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
177
178    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
179    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
180    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
181
182    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
183    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
184    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
185
186    // Produce unswizzled bit streams
187    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
188    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
189    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
190
191    return matchCopiedBits;
192}
193parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
194    if (numberOfStream == 4) {
195        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
196    }
197
198    auto mGrepDriver = &mPxDriver;
199    auto & idb = mGrepDriver->getBuilder();
200
201    // Extract (Deletion)
202    this->generateCompressionMarker(idb);
203
204    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
205    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
206
207    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
208    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
209
210    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
211    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
212    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
213
214    // Deposit
215    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
216    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
217    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
218
219    // Match Copy
220    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
221    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
222    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
223
224    return matchCopiedBits;
225}
226
227parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
228    auto mGrepDriver = &mPxDriver;
229    auto & idb = mGrepDriver->getBuilder();
230    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
231
232    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
233    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
234    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
235    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
236}
237
238void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
239    this->initREs(REs);
240    auto mGrepDriver = &mPxDriver;
241
242    auto & idb = mGrepDriver->getBuilder();
243    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
244    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
245
246
247    //  Regular Expression Processing and Analysis Phase
248    const auto nREs = mREs.size();
249
250    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
251
252    std::map<std::string, StreamSetBuffer *> propertyStream;
253
254    std::vector<std::string> externalStreamNames;
255    std::set<re::Name *> UnicodeProperties;
256
257    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
258    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
259
260    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
261    mREs[0] = transformCCs(mpx.get(), mREs[0]);
262    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
263    auto numOfCharacterClasses = mpx_basis.size();
264    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
265
266    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
267    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
268
269    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
270    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
271    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
272
273    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
274    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
275    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
276    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithSwizzledAioApproach(
277            combinedStream, 1 + numOfCharacterClasses, "combined");
278
279    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
280    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
281    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
282    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
283
284
285
286    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
287    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
288    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
289
290    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
291    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
292    MatchResultsBufs[0] = MatchResults;
293
294    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
295    if (mREs.size() > 1) {
296        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
297        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
298        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
299    }
300    StreamSetBuffer * Matches = MergedResults;
301    if (mMoveMatchesToEOL) {
302        StreamSetBuffer * OriginalMatches = Matches;
303        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
304        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
305        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
306    }
307
308//    if (MaxCountFlag > 0) {
309//        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
310//        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
311//        StreamSetBuffer * const AllMatches = Matches;
312//        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
313//        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
314//    }
315
316//    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
317
318};
319std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled) {
320
321    this->initREs(REs);
322    auto mGrepDriver = &mPxDriver;
323
324    auto & idb = mGrepDriver->getBuilder();
325    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
326    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
327    int MaxCountFlag = 0;
328
329    //  Regular Expression Processing and Analysis Phase
330    const auto nREs = mREs.size();
331
332    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
333
334
335    std::map<std::string, StreamSetBuffer *> propertyStream;
336
337    std::vector<std::string> externalStreamNames;
338    std::set<re::Name *> UnicodeProperties;
339
340    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
341    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
342
343    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
344    mREs[0] = transformCCs(mpx.get(), mREs[0]);
345    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
346    auto numOfCharacterClasses = mpx_basis.size();
347    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
348
349    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
350    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
351
352    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
353    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
354    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
355
356
357    StreamSetBuffer * LineBreakStream = nullptr;
358    StreamSetBuffer * decompressedCharClasses = nullptr;
359    if (useSwizzled) {
360        StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
361        kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
362        mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
363        StreamSetBuffer * decompressedCombinedStream = nullptr;
364
365        if (useAio) {
366            decompressedCombinedStream = this->convertCompressedBitsStreamWithSwizzledAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
367        } else {
368            decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
369        }
370
371        LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
372        decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
373        kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
374        mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
375    } else {
376        auto ret = this->convertCompressedBitsStreamWithBitStreamAioApproach({CharClasses, CompressedLineFeedStream}, "combined");
377        decompressedCharClasses = ret[0];
378        LineBreakStream = ret[1];
379    }
380
381    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
382    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
383    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
384
385    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
386    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
387    MatchResultsBufs[0] = MatchResults;
388
389    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
390    if (mREs.size() > 1) {
391        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
392        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
393        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
394    }
395    StreamSetBuffer * Matches = MergedResults;
396    if (mMoveMatchesToEOL) {
397        StreamSetBuffer * OriginalMatches = Matches;
398        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
399        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
400        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
401    }
402
403    if (MaxCountFlag > 0) {
404        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
405        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
406        StreamSetBuffer * const AllMatches = Matches;
407        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
408        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
409    }
410
411    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
412};
413
414std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
415        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
416
417    this->initREs(REs);
418    auto mGrepDriver = &mPxDriver;
419
420    auto & idb = mGrepDriver->getBuilder();
421    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
422    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
423    int MaxCountFlag = 0;
424
425    //  Regular Expression Processing and Analysis Phase
426    const auto nREs = mREs.size();
427
428    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
429
430    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
431
432
433    std::map<std::string, StreamSetBuffer *> propertyStream;
434
435    for(unsigned i = 0; i < nREs; ++i) {
436        std::vector<std::string> externalStreamNames;
437        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
438
439        std::set<re::Name *> UnicodeProperties;
440
441        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
442        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
443        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
444        MatchResultsBufs[i] = MatchResults;
445    }
446
447    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
448    if (mREs.size() > 1) {
449        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
450        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
451        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
452    }
453    StreamSetBuffer * Matches = MergedResults;
454    if (mMoveMatchesToEOL) {
455        StreamSetBuffer * OriginalMatches = Matches;
456        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
457        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
458        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
459    }
460
461    if (MaxCountFlag > 0) {
462        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
463        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
464        StreamSetBuffer * const AllMatches = Matches;
465        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
466        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
467    }
468
469    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
470
471}
472
473void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
474    auto main = this->getScanMatchGrepMainFunction();
475    std::ostringstream s;
476    EmitMatch accum("", false, false, s);
477
478    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
479    llvm::outs() << s.str();
480}
481
482void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
483    auto & iBuilder = mPxDriver.getBuilder();
484    this->generateScanMatchMainFunc(iBuilder);
485
486    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
487
488    // GeneratePipeline
489    this->generateLoadByteStreamAndBitStream(iBuilder);
490    this->generateExtractAndDepositMarkers(iBuilder);
491
492    auto swizzle = this->generateSwizzleExtractData(iBuilder);
493
494    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
495    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
496
497    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
498    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
499
500    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
501    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
502
503    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
504    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
505
506    // Produce unswizzled bit streams
507    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
508    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
509    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
510
511    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
512    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
513
514    StreamSetBuffer * LineBreakStream;
515    StreamSetBuffer * Matches;
516    std::vector<re::RE*> res = {regex};
517    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
518
519    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
520    scanMatchK->setInitialArguments({match_accumulator});
521    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
522    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
523    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
524
525    mPxDriver.generatePipelineIR();
526    mPxDriver.deallocateBuffers();
527
528    iBuilder->CreateRetVoid();
529
530    mPxDriver.finalizeObject();
531}
532
533void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE *regex) {
534    auto & iBuilder = mPxDriver.getBuilder();
535    this->generateCountOnlyMainFunc(iBuilder);
536
537    // GeneratePipeline
538    this->generateLoadByteStreamAndBitStream(iBuilder);
539//    this->generateExtractAndDepositMarkers(iBuilder);
540
541    StreamSetBuffer * LineBreakStream;
542    StreamSetBuffer * Matches;
543    std::vector<re::RE*> res = {regex};
544    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
545
546    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
547    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
548    mPxDriver.generatePipelineIR();
549
550    iBuilder->setKernel(matchCountK);
551    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
552    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
553
554    mPxDriver.deallocateBuffers();
555
556    iBuilder->CreateRet(matchedLineCount);
557
558
559    mPxDriver.finalizeObject();
560}
561
562void LZ4GrepGenerator::generateMultiplexingBitStreamAioPipeline(re::RE* regex) {
563    auto & iBuilder = mPxDriver.getBuilder();
564    this->generateCountOnlyMainFunc(iBuilder);
565
566    this->generateLoadByteStreamAndBitStream(iBuilder);
567    StreamSetBuffer * LineBreakStream;
568    StreamSetBuffer * Matches;
569    std::vector<re::RE*> res = {regex};
570    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false);
571
572    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
573    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
574    mPxDriver.generatePipelineIR();
575
576    iBuilder->setKernel(matchCountK);
577    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
578    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
579
580    mPxDriver.deallocateBuffers();
581
582    iBuilder->CreateRet(matchedLineCount);
583
584    mPxDriver.finalizeObject();
585}
586
587void LZ4GrepGenerator::generateBitStreamAioPipeline(re::RE* regex) {
588    auto & iBuilder = mPxDriver.getBuilder();
589    this->generateCountOnlyMainFunc(iBuilder);
590
591    // GeneratePipeline
592    this->generateLoadByteStreamAndBitStream(iBuilder);
593    StreamSetBuffer * const decompressionBitStream = this->generateBitStreamAIODecompression(iBuilder);
594
595    StreamSetBuffer * LineBreakStream;
596    StreamSetBuffer * Matches;
597    std::vector<re::RE*> res = {regex};
598    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
599
600    /*
601    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
602    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
603    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
604
605    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
606    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
607    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
608    */
609    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
610
611    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
612    mPxDriver.generatePipelineIR();
613
614    iBuilder->setKernel(matchCountK);
615    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
616    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
617    mPxDriver.deallocateBuffers();
618
619    iBuilder->CreateRet(matchedLineCount);
620
621    mPxDriver.finalizeObject();
622}
623
624void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
625    auto & iBuilder = mPxDriver.getBuilder();
626    this->generateCountOnlyMainFunc(iBuilder);
627
628    // GeneratePipeline
629    this->generateLoadByteStreamAndBitStream(iBuilder);
630
631    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
632
633    StreamSetBuffer * LineBreakStream;
634    StreamSetBuffer * Matches;
635    std::vector<re::RE*> res = {regex};
636    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
637/*
638    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
639    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
640    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
641
642    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
643    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
644    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
645*/
646    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
647
648    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
649    mPxDriver.generatePipelineIR();
650
651    iBuilder->setKernel(matchCountK);
652    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
653    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
654    mPxDriver.deallocateBuffers();
655
656    iBuilder->CreateRet(matchedLineCount);
657
658    mPxDriver.finalizeObject();
659}
660
661void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel) {
662    auto & iBuilder = mPxDriver.getBuilder();
663    this->generateCountOnlyMainFunc(iBuilder);
664
665    this->generateLoadByteStream(iBuilder);
666    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter, minParallelLevel);
667
668
669    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
670    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
671    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
672
673
674    StreamSetBuffer * LineBreakStream;
675    StreamSetBuffer * Matches;
676    std::vector<re::RE*> res = {regex};
677    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
678
679
680//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
681//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
682//    mPxDriver.makeKernelCall(outK, {decompressedByteStream}, {});
683
684    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
685    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
686    mPxDriver.generatePipelineIR();
687
688    iBuilder->setKernel(matchCountK);
689    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
690    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
691
692    mPxDriver.deallocateBuffers();
693
694    iBuilder->CreateRet(matchedLineCount);
695
696    mPxDriver.finalizeObject();
697}
698
699void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
700    auto & iBuilder = mPxDriver.getBuilder();
701    this->generateCountOnlyMainFunc(iBuilder);
702
703    // GeneratePipeline
704    this->generateLoadByteStream(iBuilder);
705//    this->generateLoadByteStreamAndBitStream(iBuilder);
706
707    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
708
709
710    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
711    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
712    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
713
714
715    StreamSetBuffer * LineBreakStream;
716    StreamSetBuffer * Matches;
717    std::vector<re::RE*> res = {regex};
718    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
719
720
721//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
722//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
723//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
724
725    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
726    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
727    mPxDriver.generatePipelineIR();
728
729    iBuilder->setKernel(matchCountK);
730    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
731    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
732
733    mPxDriver.deallocateBuffers();
734
735    iBuilder->CreateRet(matchedLineCount);
736
737    mPxDriver.finalizeObject();
738
739}
740
741
742void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
743    auto & iBuilder = mPxDriver.getBuilder();
744    this->generateCountOnlyMainFunc(iBuilder);
745
746    // GeneratePipeline
747    this->generateLoadByteStreamAndBitStream(iBuilder);
748    this->generateExtractAndDepositMarkers(iBuilder);
749
750    StreamSetBuffer * LineBreakStream;
751    StreamSetBuffer * Matches;
752    std::vector<re::RE*> res = {regex};
753    if (mEnableMultiplexing) {
754        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
755    } else {
756        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
757        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
758        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
759        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
760
761        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
762        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
763        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
764
765        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
766    };
767
768    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
769    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
770    mPxDriver.generatePipelineIR();
771
772    iBuilder->setKernel(matchCountK);
773    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
774    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
775
776    mPxDriver.deallocateBuffers();
777
778    iBuilder->CreateRet(matchedLineCount);
779
780    mPxDriver.finalizeObject();
781}
782
783
784void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
785    auto & iBuilder = mPxDriver.getBuilder();
786    this->generateCountOnlyMainFunc(iBuilder);
787
788
789    // GeneratePipeline
790    this->generateLoadByteStreamAndBitStream(iBuilder);
791    this->generateExtractAndDepositMarkers(iBuilder);
792
793
794    StreamSetBuffer * LineBreakStream;
795    StreamSetBuffer * Matches;
796    std::vector<re::RE*> res = {regex};
797    if (mEnableMultiplexing) {
798        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
799    } else {
800        auto swizzle = this->generateSwizzleExtractData(iBuilder);
801
802        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
803        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
804
805        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
806        mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
807
808
809        // split PDEP into 2 kernel will be a little slower in single thread environment
810/*
811    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
812    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
813
814    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
815    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
816*/
817
818        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
819        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
820
821        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
822        mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
823
824        // Produce unswizzled bit streams
825        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
826        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
827        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
828
829
830        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
831    };
832
833    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
834    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
835    mPxDriver.generatePipelineIR();
836
837    iBuilder->setKernel(matchCountK);
838    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
839    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
840
841    mPxDriver.deallocateBuffers();
842
843    iBuilder->CreateRet(matchedLineCount);
844
845    mPxDriver.finalizeObject();
846}
847
848ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
849    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
850}
851CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
852    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
853}
854
855void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
856    Module * M = iBuilder->getModule();
857    Type * const int64Ty = iBuilder->getInt64Ty();
858    Type * const sizeTy = iBuilder->getSizeTy();
859    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
860//    Type * const voidTy = iBuilder->getVoidTy();
861    Type * const inputType = iBuilder->getInt8PtrTy();
862
863    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
864    main->setCallingConv(CallingConv::C);
865    Function::arg_iterator args = main->arg_begin();
866    mInputStream = &*(args++);
867    mInputStream->setName("input");
868
869    mHeaderSize = &*(args++);
870    mHeaderSize->setName("mHeaderSize");
871
872    mFileSize = &*(args++);
873    mFileSize->setName("mFileSize");
874
875    mHasBlockChecksum = &*(args++);
876    mHasBlockChecksum->setName("mHasBlockChecksum");
877    // TODO for now, we do not handle blockCheckSum
878    mHasBlockChecksum = iBuilder->getInt1(false);
879
880    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
881}
882
883void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
884    Module * M = iBuilder->getModule();
885    Type * const sizeTy = iBuilder->getSizeTy();
886    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
887    Type * const voidTy = iBuilder->getVoidTy();
888    Type * const inputType = iBuilder->getInt8PtrTy();
889    Type * const intAddrTy = iBuilder->getIntAddrTy();
890
891    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
892    main->setCallingConv(CallingConv::C);
893    Function::arg_iterator args = main->arg_begin();
894    mInputStream = &*(args++);
895    mInputStream->setName("input");
896
897    mHeaderSize = &*(args++);
898    mHeaderSize->setName("mHeaderSize");
899
900    mFileSize = &*(args++);
901    mFileSize->setName("mFileSize");
902
903    mHasBlockChecksum = &*(args++);
904    mHasBlockChecksum->setName("mHasBlockChecksum");
905
906    match_accumulator = &*(args++);
907    match_accumulator->setName("match_accumulator");
908
909    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
910}
Note: See TracBrowser for help on using the repository browser.