source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6070

Last change on this file since 6070 was 6070, checked in by xwa163, 11 months ago

Improve performance of literal copy and match copy in LZ4ParallelByteSteamAIOKernel

File size: 42.3 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/lz4/lz4_fake_stream_generating_kernel.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/bitstream_gather_pdep_kernel.h>
27#include <re/re_toolchain.h>
28
29#include <re/collect_ccs.h>
30#include <re/replaceCC.h>
31
32#include <UCD/resolve_properties.h>
33#include <kernels/charclasses.h>
34#include <kernels/grep_kernel.h>
35#include <kernels/UCD_property_kernel.h>
36#include <kernels/grapheme_kernel.h>
37#include <kernels/linebreak_kernel.h>
38#include <kernels/streams_merge.h>
39#include <kernels/scanmatchgen.h>
40#include <kernels/until_n.h>
41#include <re/casing.h>
42#include <re/exclude_CC.h>
43#include <re/to_utf8.h>
44#include <re/re_analysis.h>
45#include <re/re_name_resolve.h>
46#include <re/re_name_gather.h>
47#include <re/re_multiplex.h>
48#include <re/re_utility.h>
49#include <re/grapheme_clusters.h>
50#include <re/printer_re.h>
51#include <llvm/Support/raw_ostream.h>
52#include <llvm/Support/Debug.h>
53#include <kernels/lz4/lz4_block_decoder.h>
54#include <kernels/lz4/lz4_swizzled_aio.h>
55
56
57namespace re { class CC; }
58
59using namespace llvm;
60using namespace parabix;
61using namespace kernel;
62using namespace grep;
63
64LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
65    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
66    mMoveMatchesToEOL = true;
67}
68
69void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
70    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
71        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
72    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
73        mBreakCC = re::makeByte(0);  // Null
74    } else {
75        mBreakCC = re::makeByte(0x0A); // LF
76    }
77    re::RE * anchorRE = mBreakCC;
78    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
79        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
80        anchorName->setDefinition(re::makeUnicodeBreak());
81        anchorRE = anchorName;
82    }
83
84    mREs = REs;
85    bool allAnchored = true;
86    for(unsigned i = 0; i < mREs.size(); ++i) {
87        if (!hasEndAnchor(mREs[i])) allAnchored = false;
88        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
89        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
90        mREs[i] = resolveAnchors(mREs[i], anchorRE);
91        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
92        mREs[i] = regular_expression_passes(mREs[i]);
93    }
94    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
95
96}
97
98
99parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
100//    auto mGrepDriver = &mPxDriver;
101    auto & idb = mPxDriver.getBuilder();
102    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
103    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
104    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
105    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
106    return LineFeedStream;
107}
108
109StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithAioApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
110    auto mGrepDriver = &mPxDriver;
111    auto & iBuilder = mGrepDriver->getBuilder();
112
113    //// Decode Block Information
114    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
115    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
116    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
117
118    //// Generate Helper Markers Extenders, FX, XF
119    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
120    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
121    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
122    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
123
124
125    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
126    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
127    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
128
129
130    // Produce unswizzled bit streams
131    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
132    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "source");
133    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
134
135    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
136
137
138    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 1, 4);
139    lz4AioK->setInitialArguments({mFileSize});
140    mPxDriver.makeKernelCall(
141            lz4AioK,
142            {
143                    mCompressedByteStream,
144                    Extenders,
145
146                    // Block Data
147                    BlockData_IsCompressed,
148                    BlockData_BlockStart,
149                    BlockData_BlockEnd,
150
151                    u16Swizzle0,
152            }, {
153                    decompressedSwizzled0,
154            });
155
156
157
158    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
159    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "dst");
160    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
161
162    return decompressionBitStream;
163
164}
165
166StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
167    auto mGrepDriver = &mPxDriver;
168    auto & idb = mGrepDriver->getBuilder();
169
170    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
171    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
172    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
173
174    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
175    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
176    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
177
178    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
179    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
180    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
181
182    // Produce unswizzled bit streams
183    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
184    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
185    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
186
187    return matchCopiedBits;
188}
189parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
190    if (numberOfStream == 4) {
191        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
192    }
193
194    auto mGrepDriver = &mPxDriver;
195    auto & idb = mGrepDriver->getBuilder();
196
197    // Extract (Deletion)
198    this->generateCompressionMarker(idb);
199
200    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
201    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
202
203    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
204    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
205
206    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
207    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
208    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
209
210    // Deposit
211    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
212    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
213    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
214
215    // Match Copy
216    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
217    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
218    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
219
220    return matchCopiedBits;
221}
222
223parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
224    auto mGrepDriver = &mPxDriver;
225    auto & idb = mGrepDriver->getBuilder();
226    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
227
228    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
229    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
230    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
231    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
232}
233
234void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
235    this->initREs(REs);
236    auto mGrepDriver = &mPxDriver;
237
238    auto & idb = mGrepDriver->getBuilder();
239    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
240    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
241
242
243    //  Regular Expression Processing and Analysis Phase
244    const auto nREs = mREs.size();
245
246    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
247
248    std::map<std::string, StreamSetBuffer *> propertyStream;
249
250    std::vector<std::string> externalStreamNames;
251    std::set<re::Name *> UnicodeProperties;
252
253    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
254    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
255
256    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
257    mREs[0] = transformCCs(mpx.get(), mREs[0]);
258    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
259    auto numOfCharacterClasses = mpx_basis.size();
260    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
261
262    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
263    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
264
265    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
266    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
267    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
268
269    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
270    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
271    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
272    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
273
274    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
275    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
276    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
277    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
278
279
280
281    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
282    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
283    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
284
285    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
286    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
287    MatchResultsBufs[0] = MatchResults;
288
289    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
290    if (mREs.size() > 1) {
291        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
292        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
293        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
294    }
295    StreamSetBuffer * Matches = MergedResults;
296    if (mMoveMatchesToEOL) {
297        StreamSetBuffer * OriginalMatches = Matches;
298        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
299        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
300        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
301    }
302
303//    if (MaxCountFlag > 0) {
304//        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
305//        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
306//        StreamSetBuffer * const AllMatches = Matches;
307//        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
308//        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
309//    }
310
311//    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
312
313};
314std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio) {
315
316    this->initREs(REs);
317    auto mGrepDriver = &mPxDriver;
318
319    auto & idb = mGrepDriver->getBuilder();
320    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
321    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
322    int MaxCountFlag = 0;
323
324    //  Regular Expression Processing and Analysis Phase
325    const auto nREs = mREs.size();
326
327    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
328
329
330    std::map<std::string, StreamSetBuffer *> propertyStream;
331
332    std::vector<std::string> externalStreamNames;
333    std::set<re::Name *> UnicodeProperties;
334
335    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
336    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
337
338    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
339    mREs[0] = transformCCs(mpx.get(), mREs[0]);
340    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
341    auto numOfCharacterClasses = mpx_basis.size();
342    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
343
344    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
345    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
346
347    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
348    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
349    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
350
351    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
352    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
353    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
354    StreamSetBuffer * decompressedCombinedStream = nullptr;
355
356    if (useAio) {
357        decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
358    } else {
359        decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
360    }
361
362    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
363    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
364    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
365    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
366
367    /*
368    StreamSetBuffer * LineBreakStream = this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
369    StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
370     */
371
372    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
373    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
374    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
375
376    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
377    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
378    MatchResultsBufs[0] = MatchResults;
379
380    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
381    if (mREs.size() > 1) {
382        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
383        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
384        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
385    }
386    StreamSetBuffer * Matches = MergedResults;
387    if (mMoveMatchesToEOL) {
388        StreamSetBuffer * OriginalMatches = Matches;
389        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
390        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
391        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
392    }
393
394    if (MaxCountFlag > 0) {
395        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
396        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
397        StreamSetBuffer * const AllMatches = Matches;
398        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
399        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
400    }
401
402    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
403};
404
405std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
406        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
407
408    this->initREs(REs);
409    auto mGrepDriver = &mPxDriver;
410
411    auto & idb = mGrepDriver->getBuilder();
412    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
413    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
414    int MaxCountFlag = 0;
415
416    //  Regular Expression Processing and Analysis Phase
417    const auto nREs = mREs.size();
418
419    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
420
421    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
422
423
424    std::map<std::string, StreamSetBuffer *> propertyStream;
425
426    for(unsigned i = 0; i < nREs; ++i) {
427        std::vector<std::string> externalStreamNames;
428        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
429
430        std::set<re::Name *> UnicodeProperties;
431
432        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
433        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
434        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
435        MatchResultsBufs[i] = MatchResults;
436    }
437
438    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
439    if (mREs.size() > 1) {
440        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
441        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
442        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
443    }
444    StreamSetBuffer * Matches = MergedResults;
445    if (mMoveMatchesToEOL) {
446        StreamSetBuffer * OriginalMatches = Matches;
447        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
448        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
449        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
450    }
451
452    if (MaxCountFlag > 0) {
453        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
454        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
455        StreamSetBuffer * const AllMatches = Matches;
456        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
457        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
458    }
459
460    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
461
462}
463
464void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
465    auto main = this->getScanMatchGrepMainFunction();
466    std::ostringstream s;
467    EmitMatch accum("", false, false, s);
468
469    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
470    llvm::outs() << s.str();
471}
472
473void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
474    auto & iBuilder = mPxDriver.getBuilder();
475    this->generateScanMatchMainFunc(iBuilder);
476
477    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
478
479    // GeneratePipeline
480    this->generateLoadByteStreamAndBitStream(iBuilder);
481    this->generateExtractAndDepositMarkers(iBuilder);
482
483    auto swizzle = this->generateSwizzleExtractData(iBuilder);
484
485    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
486    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
487
488    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
489    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
490
491    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
492    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
493
494    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
495    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
496
497    // Produce unswizzled bit streams
498    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
499    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
500    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
501
502    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
503    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
504
505    StreamSetBuffer * LineBreakStream;
506    StreamSetBuffer * Matches;
507    std::vector<re::RE*> res = {regex};
508    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
509
510    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
511    scanMatchK->setInitialArguments({match_accumulator});
512    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
513    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
514    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
515
516    mPxDriver.generatePipelineIR();
517    mPxDriver.deallocateBuffers();
518
519    iBuilder->CreateRetVoid();
520
521    mPxDriver.finalizeObject();
522}
523
524void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE* regex) {
525    auto & iBuilder = mPxDriver.getBuilder();
526    this->generateMainFunc(iBuilder);
527
528    // GeneratePipeline
529    this->generateLoadByteStreamAndBitStream(iBuilder);
530
531    std::vector<re::RE*> res = {regex};
532    this->generateMultiplexingCompressedBitStream(res);
533
534    mPxDriver.generatePipelineIR();
535    mPxDriver.deallocateBuffers();
536
537    iBuilder->CreateRetVoid();
538
539    mPxDriver.finalizeObject();
540}
541
542void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline2(re::RE* regex) {
543    auto & iBuilder = mPxDriver.getBuilder();
544    this->generateCountOnlyMainFunc(iBuilder);
545
546    // GeneratePipeline
547    this->generateLoadByteStreamAndBitStream(iBuilder);
548//    this->generateExtractAndDepositMarkers(iBuilder);
549
550    StreamSetBuffer * LineBreakStream;
551    StreamSetBuffer * Matches;
552    std::vector<re::RE*> res = {regex};
553    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
554
555    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
556    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
557    mPxDriver.generatePipelineIR();
558
559    iBuilder->setKernel(matchCountK);
560    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
561    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
562
563    mPxDriver.deallocateBuffers();
564
565    iBuilder->CreateRet(matchedLineCount);
566
567
568    mPxDriver.finalizeObject();
569}
570
571void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
572    auto & iBuilder = mPxDriver.getBuilder();
573    this->generateCountOnlyMainFunc(iBuilder);
574
575    // GeneratePipeline
576    this->generateLoadByteStreamAndBitStream(iBuilder);
577
578    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
579
580    StreamSetBuffer * LineBreakStream;
581    StreamSetBuffer * Matches;
582    std::vector<re::RE*> res = {regex};
583    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
584/*
585    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
586    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
587    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
588
589    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
590    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
591    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
592*/
593    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
594
595    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
596    mPxDriver.generatePipelineIR();
597
598    iBuilder->setKernel(matchCountK);
599    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
600    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
601    mPxDriver.deallocateBuffers();
602
603    iBuilder->CreateRet(matchedLineCount);
604
605    mPxDriver.finalizeObject();
606}
607
608void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex) {
609    auto & iBuilder = mPxDriver.getBuilder();
610    this->generateCountOnlyMainFunc(iBuilder);
611
612    this->generateLoadByteStream(iBuilder);
613    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder);
614
615
616    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
617    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
618//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
619    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
620
621
622    StreamSetBuffer * LineBreakStream;
623    StreamSetBuffer * Matches;
624    std::vector<re::RE*> res = {regex};
625    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
626
627
628//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
629//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
630//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
631
632    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
633    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
634    mPxDriver.generatePipelineIR();
635
636    iBuilder->setKernel(matchCountK);
637    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
638    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
639
640    mPxDriver.deallocateBuffers();
641
642    iBuilder->CreateRet(matchedLineCount);
643
644    mPxDriver.finalizeObject();
645}
646
647void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
648    auto & iBuilder = mPxDriver.getBuilder();
649    this->generateCountOnlyMainFunc(iBuilder);
650
651    // GeneratePipeline
652//    this->generateLoadByteStreamAndBitStream(iBuilder);
653    this->generateLoadByteStream(iBuilder);
654    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
655
656
657    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
658    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
659//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
660    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
661
662
663    StreamSetBuffer * LineBreakStream;
664    StreamSetBuffer * Matches;
665    std::vector<re::RE*> res = {regex};
666    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
667
668
669//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
670//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
671//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
672
673    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
674    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
675    mPxDriver.generatePipelineIR();
676
677    iBuilder->setKernel(matchCountK);
678    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
679    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
680
681    mPxDriver.deallocateBuffers();
682
683    iBuilder->CreateRet(matchedLineCount);
684
685    mPxDriver.finalizeObject();
686
687}
688
689
690void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
691    auto & iBuilder = mPxDriver.getBuilder();
692    this->generateCountOnlyMainFunc(iBuilder);
693
694    // GeneratePipeline
695    this->generateLoadByteStreamAndBitStream(iBuilder);
696    this->generateExtractAndDepositMarkers(iBuilder);
697
698    StreamSetBuffer * LineBreakStream;
699    StreamSetBuffer * Matches;
700    std::vector<re::RE*> res = {regex};
701    if (mEnableMultiplexing) {
702        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
703    } else {
704        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
705        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
706        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
707        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
708
709        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
710        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
711        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
712
713        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
714    };
715
716    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
717    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
718    mPxDriver.generatePipelineIR();
719
720    iBuilder->setKernel(matchCountK);
721    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
722    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
723
724    mPxDriver.deallocateBuffers();
725
726    iBuilder->CreateRet(matchedLineCount);
727
728    mPxDriver.finalizeObject();
729}
730
731
732void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
733    auto & iBuilder = mPxDriver.getBuilder();
734    this->generateCountOnlyMainFunc(iBuilder);
735
736
737    // GeneratePipeline
738    this->generateLoadByteStreamAndBitStream(iBuilder);
739    this->generateExtractAndDepositMarkers(iBuilder);
740
741
742    StreamSetBuffer * LineBreakStream;
743    StreamSetBuffer * Matches;
744    std::vector<re::RE*> res = {regex};
745    if (mEnableMultiplexing) {
746        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
747    } else {
748        auto swizzle = this->generateSwizzleExtractData(iBuilder);
749
750        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
751        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
752
753        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
754        mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
755
756
757        // split PDEP into 2 kernel will be a little slower in single thread environment
758/*
759    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
760    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
761
762    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
763    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
764*/
765
766        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
767        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
768
769        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
770        mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
771
772        // Produce unswizzled bit streams
773        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
774        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
775        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
776
777
778        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
779    };
780
781    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
782    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
783    mPxDriver.generatePipelineIR();
784
785    iBuilder->setKernel(matchCountK);
786    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
787    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
788
789    mPxDriver.deallocateBuffers();
790
791    iBuilder->CreateRet(matchedLineCount);
792
793    mPxDriver.finalizeObject();
794}
795
796ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
797    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
798}
799CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
800    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
801}
802
803void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
804    Module * M = iBuilder->getModule();
805    Type * const int64Ty = iBuilder->getInt64Ty();
806    Type * const sizeTy = iBuilder->getSizeTy();
807    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
808//    Type * const voidTy = iBuilder->getVoidTy();
809    Type * const inputType = iBuilder->getInt8PtrTy();
810
811    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
812    main->setCallingConv(CallingConv::C);
813    Function::arg_iterator args = main->arg_begin();
814    mInputStream = &*(args++);
815    mInputStream->setName("input");
816
817    mHeaderSize = &*(args++);
818    mHeaderSize->setName("mHeaderSize");
819
820    mFileSize = &*(args++);
821    mFileSize->setName("mFileSize");
822
823    mHasBlockChecksum = &*(args++);
824    mHasBlockChecksum->setName("mHasBlockChecksum");
825    // TODO for now, we do not handle blockCheckSum
826    mHasBlockChecksum = iBuilder->getInt1(false);
827
828    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
829}
830
831void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
832    Module * M = iBuilder->getModule();
833    Type * const sizeTy = iBuilder->getSizeTy();
834    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
835    Type * const voidTy = iBuilder->getVoidTy();
836    Type * const inputType = iBuilder->getInt8PtrTy();
837    Type * const intAddrTy = iBuilder->getIntAddrTy();
838
839    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
840    main->setCallingConv(CallingConv::C);
841    Function::arg_iterator args = main->arg_begin();
842    mInputStream = &*(args++);
843    mInputStream->setName("input");
844
845    mHeaderSize = &*(args++);
846    mHeaderSize->setName("mHeaderSize");
847
848    mFileSize = &*(args++);
849    mFileSize->setName("mFileSize");
850
851    mHasBlockChecksum = &*(args++);
852    mHasBlockChecksum->setName("mHasBlockChecksum");
853
854    match_accumulator = &*(args++);
855    match_accumulator->setName("match_accumulator");
856
857    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
858}
Note: See TracBrowser for help on using the repository browser.