source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6112

Last change on this file since 6112 was 6112, checked in by xwa163, 10 months ago

Remove S2PByPextKernel

File size: 41.9 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/fake_stream_generating_kernel.h>
25#include <kernels/bitstream_pdep_kernel.h>
26#include <kernels/bitstream_gather_pdep_kernel.h>
27#include <re/re_toolchain.h>
28
29#include <re/collect_ccs.h>
30#include <re/replaceCC.h>
31
32#include <UCD/resolve_properties.h>
33#include <kernels/charclasses.h>
34#include <kernels/grep_kernel.h>
35#include <kernels/UCD_property_kernel.h>
36#include <kernels/grapheme_kernel.h>
37#include <kernels/linebreak_kernel.h>
38#include <kernels/streams_merge.h>
39#include <kernels/scanmatchgen.h>
40#include <kernels/until_n.h>
41#include <re/casing.h>
42#include <re/exclude_CC.h>
43#include <re/to_utf8.h>
44#include <re/re_analysis.h>
45#include <re/re_name_resolve.h>
46#include <re/re_name_gather.h>
47#include <re/re_multiplex.h>
48#include <re/re_utility.h>
49#include <re/grapheme_clusters.h>
50#include <re/printer_re.h>
51#include <llvm/Support/raw_ostream.h>
52#include <llvm/Support/Debug.h>
53#include <kernels/lz4/lz4_block_decoder.h>
54#include <kernels/lz4/aio/lz4_swizzled_aio.h>
55
56
57namespace re { class CC; }
58
59using namespace llvm;
60using namespace parabix;
61using namespace kernel;
62using namespace grep;
63
64LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
65    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
66    mMoveMatchesToEOL = true;
67}
68
69void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
70    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
71        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
72    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
73        mBreakCC = re::makeByte(0);  // Null
74    } else {
75        mBreakCC = re::makeByte(0x0A); // LF
76    }
77    re::RE * anchorRE = mBreakCC;
78    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
79        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
80        anchorName->setDefinition(re::makeUnicodeBreak());
81        anchorRE = anchorName;
82    }
83
84    mREs = REs;
85    bool allAnchored = true;
86    for(unsigned i = 0; i < mREs.size(); ++i) {
87        if (!hasEndAnchor(mREs[i])) allAnchored = false;
88        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
89        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
90        mREs[i] = resolveAnchors(mREs[i], anchorRE);
91        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
92        mREs[i] = regular_expression_passes(mREs[i]);
93    }
94    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
95
96}
97
98
99parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
100//    auto mGrepDriver = &mPxDriver;
101    auto & idb = mPxDriver.getBuilder();
102    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
103    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
104    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
105    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
106    return LineFeedStream;
107}
108
109StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithAioApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
110    auto mGrepDriver = &mPxDriver;
111    auto & iBuilder = mGrepDriver->getBuilder();
112
113    //// Decode Block Information
114    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
115    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
116    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
117
118    //// Generate Helper Markers Extenders, FX, XF
119    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
120    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
121    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
122    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
123
124
125    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
126    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
127    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
128
129
130    // Produce unswizzled bit streams
131    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
132    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "source");
133    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
134
135    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
136
137
138    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 1, 4);
139    lz4AioK->setInitialArguments({mFileSize});
140    mPxDriver.makeKernelCall(
141            lz4AioK,
142            {
143                    mCompressedByteStream,
144                    Extenders,
145
146                    // Block Data
147                    BlockData_IsCompressed,
148                    BlockData_BlockStart,
149                    BlockData_BlockEnd,
150
151                    u16Swizzle0,
152            }, {
153                    decompressedSwizzled0,
154            });
155
156
157
158    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
159    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "dst");
160    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
161
162    return decompressionBitStream;
163
164}
165
166StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
167    auto mGrepDriver = &mPxDriver;
168    auto & idb = mGrepDriver->getBuilder();
169
170    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
171    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
172    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
173
174    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
175    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
176    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
177
178    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
179    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
180    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
181
182    // Produce unswizzled bit streams
183    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
184    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
185    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
186
187    return matchCopiedBits;
188}
189parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
190    if (numberOfStream == 4) {
191        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
192    }
193
194    auto mGrepDriver = &mPxDriver;
195    auto & idb = mGrepDriver->getBuilder();
196
197    // Extract (Deletion)
198    this->generateCompressionMarker(idb);
199
200    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
201    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
202
203    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
204    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
205
206    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
207    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
208    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
209
210    // Deposit
211    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
212    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
213    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
214
215    // Match Copy
216    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
217    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
218    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
219
220    return matchCopiedBits;
221}
222
223parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
224    auto mGrepDriver = &mPxDriver;
225    auto & idb = mGrepDriver->getBuilder();
226    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
227
228    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
229    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
230    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
231    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
232}
233
234void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
235    this->initREs(REs);
236    auto mGrepDriver = &mPxDriver;
237
238    auto & idb = mGrepDriver->getBuilder();
239    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
240    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
241
242
243    //  Regular Expression Processing and Analysis Phase
244    const auto nREs = mREs.size();
245
246    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
247
248    std::map<std::string, StreamSetBuffer *> propertyStream;
249
250    std::vector<std::string> externalStreamNames;
251    std::set<re::Name *> UnicodeProperties;
252
253    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
254    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
255
256    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
257    mREs[0] = transformCCs(mpx.get(), mREs[0]);
258    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
259    auto numOfCharacterClasses = mpx_basis.size();
260    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
261
262    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
263    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
264
265    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
266    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
267    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
268
269    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
270    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
271    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
272    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
273
274    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
275    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
276    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
277    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
278
279
280
281    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
282    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
283    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
284
285    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
286    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
287    MatchResultsBufs[0] = MatchResults;
288
289    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
290    if (mREs.size() > 1) {
291        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
292        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
293        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
294    }
295    StreamSetBuffer * Matches = MergedResults;
296    if (mMoveMatchesToEOL) {
297        StreamSetBuffer * OriginalMatches = Matches;
298        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
299        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
300        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
301    }
302
303//    if (MaxCountFlag > 0) {
304//        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
305//        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
306//        StreamSetBuffer * const AllMatches = Matches;
307//        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
308//        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
309//    }
310
311//    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
312
313};
314std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio) {
315
316    this->initREs(REs);
317    auto mGrepDriver = &mPxDriver;
318
319    auto & idb = mGrepDriver->getBuilder();
320    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
321    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
322    int MaxCountFlag = 0;
323
324    //  Regular Expression Processing and Analysis Phase
325    const auto nREs = mREs.size();
326
327    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
328
329
330    std::map<std::string, StreamSetBuffer *> propertyStream;
331
332    std::vector<std::string> externalStreamNames;
333    std::set<re::Name *> UnicodeProperties;
334
335    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
336    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
337
338    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
339    mREs[0] = transformCCs(mpx.get(), mREs[0]);
340    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
341    auto numOfCharacterClasses = mpx_basis.size();
342    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
343
344    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
345    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
346
347    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
348    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
349    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
350
351    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
352    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
353    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
354    StreamSetBuffer * decompressedCombinedStream = nullptr;
355
356    if (useAio) {
357        decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
358    } else {
359        decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
360    }
361
362    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
363    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
364    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
365    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
366
367    /*
368    StreamSetBuffer * LineBreakStream = this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
369    StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
370     */
371
372    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
373    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
374    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
375
376    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
377    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
378    MatchResultsBufs[0] = MatchResults;
379
380    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
381    if (mREs.size() > 1) {
382        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
383        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
384        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
385    }
386    StreamSetBuffer * Matches = MergedResults;
387    if (mMoveMatchesToEOL) {
388        StreamSetBuffer * OriginalMatches = Matches;
389        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
390        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
391        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
392    }
393
394    if (MaxCountFlag > 0) {
395        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
396        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
397        StreamSetBuffer * const AllMatches = Matches;
398        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
399        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
400    }
401
402    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
403};
404
405std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
406        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
407
408    this->initREs(REs);
409    auto mGrepDriver = &mPxDriver;
410
411    auto & idb = mGrepDriver->getBuilder();
412    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
413    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
414    int MaxCountFlag = 0;
415
416    //  Regular Expression Processing and Analysis Phase
417    const auto nREs = mREs.size();
418
419    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
420
421    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
422
423
424    std::map<std::string, StreamSetBuffer *> propertyStream;
425
426    for(unsigned i = 0; i < nREs; ++i) {
427        std::vector<std::string> externalStreamNames;
428        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
429
430        std::set<re::Name *> UnicodeProperties;
431
432        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
433        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
434        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
435        MatchResultsBufs[i] = MatchResults;
436    }
437
438    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
439    if (mREs.size() > 1) {
440        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
441        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
442        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
443    }
444    StreamSetBuffer * Matches = MergedResults;
445    if (mMoveMatchesToEOL) {
446        StreamSetBuffer * OriginalMatches = Matches;
447        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
448        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
449        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
450    }
451
452    if (MaxCountFlag > 0) {
453        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
454        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
455        StreamSetBuffer * const AllMatches = Matches;
456        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
457        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
458    }
459
460    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
461
462}
463
464void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
465    auto main = this->getScanMatchGrepMainFunction();
466    std::ostringstream s;
467    EmitMatch accum("", false, false, s);
468
469    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
470    llvm::outs() << s.str();
471}
472
473void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
474    auto & iBuilder = mPxDriver.getBuilder();
475    this->generateScanMatchMainFunc(iBuilder);
476
477    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
478
479    // GeneratePipeline
480    this->generateLoadByteStreamAndBitStream(iBuilder);
481    this->generateExtractAndDepositMarkers(iBuilder);
482
483    auto swizzle = this->generateSwizzleExtractData(iBuilder);
484
485    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
486    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
487
488    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
489    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
490
491    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
492    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
493
494    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
495    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
496
497    // Produce unswizzled bit streams
498    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
499    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
500    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
501
502    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
503    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
504
505    StreamSetBuffer * LineBreakStream;
506    StreamSetBuffer * Matches;
507    std::vector<re::RE*> res = {regex};
508    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
509
510    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
511    scanMatchK->setInitialArguments({match_accumulator});
512    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
513    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
514    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
515
516    mPxDriver.generatePipelineIR();
517    mPxDriver.deallocateBuffers();
518
519    iBuilder->CreateRetVoid();
520
521    mPxDriver.finalizeObject();
522}
523
524void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE *regex) {
525    auto & iBuilder = mPxDriver.getBuilder();
526    this->generateCountOnlyMainFunc(iBuilder);
527
528    // GeneratePipeline
529    this->generateLoadByteStreamAndBitStream(iBuilder);
530//    this->generateExtractAndDepositMarkers(iBuilder);
531
532    StreamSetBuffer * LineBreakStream;
533    StreamSetBuffer * Matches;
534    std::vector<re::RE*> res = {regex};
535    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
536
537    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
538    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
539    mPxDriver.generatePipelineIR();
540
541    iBuilder->setKernel(matchCountK);
542    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
543    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
544
545    mPxDriver.deallocateBuffers();
546
547    iBuilder->CreateRet(matchedLineCount);
548
549
550    mPxDriver.finalizeObject();
551}
552
553void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
554    auto & iBuilder = mPxDriver.getBuilder();
555    this->generateCountOnlyMainFunc(iBuilder);
556
557    // GeneratePipeline
558    this->generateLoadByteStreamAndBitStream(iBuilder);
559
560    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
561
562    StreamSetBuffer * LineBreakStream;
563    StreamSetBuffer * Matches;
564    std::vector<re::RE*> res = {regex};
565    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
566/*
567    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
568    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
569    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
570
571    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
572    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
573    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
574*/
575    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
576
577    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
578    mPxDriver.generatePipelineIR();
579
580    iBuilder->setKernel(matchCountK);
581    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
582    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
583    mPxDriver.deallocateBuffers();
584
585    iBuilder->CreateRet(matchedLineCount);
586
587    mPxDriver.finalizeObject();
588}
589
590void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel) {
591    auto & iBuilder = mPxDriver.getBuilder();
592    this->generateCountOnlyMainFunc(iBuilder);
593
594    this->generateLoadByteStream(iBuilder);
595    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter, minParallelLevel);
596
597
598    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
599    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian, /*aligned = */ true, "a");
600    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
601
602
603    StreamSetBuffer * LineBreakStream;
604    StreamSetBuffer * Matches;
605    std::vector<re::RE*> res = {regex};
606    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
607
608
609//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
610//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
611//    mPxDriver.makeKernelCall(outK, {decompressedByteStream}, {});
612
613    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
614    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
615    mPxDriver.generatePipelineIR();
616
617    iBuilder->setKernel(matchCountK);
618    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
619    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
620
621    mPxDriver.deallocateBuffers();
622
623    iBuilder->CreateRet(matchedLineCount);
624
625    mPxDriver.finalizeObject();
626}
627
628void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
629    auto & iBuilder = mPxDriver.getBuilder();
630    this->generateCountOnlyMainFunc(iBuilder);
631
632    // GeneratePipeline
633    this->generateLoadByteStream(iBuilder);
634//    this->generateLoadByteStreamAndBitStream(iBuilder);
635
636    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
637
638
639    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
640    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::LittleEndian, /*aligned = */ true, "a");
641    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
642
643
644    StreamSetBuffer * LineBreakStream;
645    StreamSetBuffer * Matches;
646    std::vector<re::RE*> res = {regex};
647    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
648
649
650//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
651//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
652//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
653
654    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
655    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
656    mPxDriver.generatePipelineIR();
657
658    iBuilder->setKernel(matchCountK);
659    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
660    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
661
662    mPxDriver.deallocateBuffers();
663
664    iBuilder->CreateRet(matchedLineCount);
665
666    mPxDriver.finalizeObject();
667
668}
669
670
671void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
672    auto & iBuilder = mPxDriver.getBuilder();
673    this->generateCountOnlyMainFunc(iBuilder);
674
675    // GeneratePipeline
676    this->generateLoadByteStreamAndBitStream(iBuilder);
677    this->generateExtractAndDepositMarkers(iBuilder);
678
679    StreamSetBuffer * LineBreakStream;
680    StreamSetBuffer * Matches;
681    std::vector<re::RE*> res = {regex};
682    if (mEnableMultiplexing) {
683        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
684    } else {
685        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
686        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
687        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
688        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
689
690        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
691        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
692        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
693
694        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
695    };
696
697    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
698    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
699    mPxDriver.generatePipelineIR();
700
701    iBuilder->setKernel(matchCountK);
702    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
703    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
704
705    mPxDriver.deallocateBuffers();
706
707    iBuilder->CreateRet(matchedLineCount);
708
709    mPxDriver.finalizeObject();
710}
711
712
713void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
714    auto & iBuilder = mPxDriver.getBuilder();
715    this->generateCountOnlyMainFunc(iBuilder);
716
717
718    // GeneratePipeline
719    this->generateLoadByteStreamAndBitStream(iBuilder);
720    this->generateExtractAndDepositMarkers(iBuilder);
721
722
723    StreamSetBuffer * LineBreakStream;
724    StreamSetBuffer * Matches;
725    std::vector<re::RE*> res = {regex};
726    if (mEnableMultiplexing) {
727        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
728    } else {
729        auto swizzle = this->generateSwizzleExtractData(iBuilder);
730
731        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
732        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
733
734        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
735        mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
736
737
738        // split PDEP into 2 kernel will be a little slower in single thread environment
739/*
740    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
741    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
742
743    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
744    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
745*/
746
747        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
748        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
749
750        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
751        mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
752
753        // Produce unswizzled bit streams
754        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
755        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
756        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
757
758
759        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
760    };
761
762    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
763    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
764    mPxDriver.generatePipelineIR();
765
766    iBuilder->setKernel(matchCountK);
767    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
768    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
769
770    mPxDriver.deallocateBuffers();
771
772    iBuilder->CreateRet(matchedLineCount);
773
774    mPxDriver.finalizeObject();
775}
776
777ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
778    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
779}
780CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
781    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
782}
783
784void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
785    Module * M = iBuilder->getModule();
786    Type * const int64Ty = iBuilder->getInt64Ty();
787    Type * const sizeTy = iBuilder->getSizeTy();
788    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
789//    Type * const voidTy = iBuilder->getVoidTy();
790    Type * const inputType = iBuilder->getInt8PtrTy();
791
792    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
793    main->setCallingConv(CallingConv::C);
794    Function::arg_iterator args = main->arg_begin();
795    mInputStream = &*(args++);
796    mInputStream->setName("input");
797
798    mHeaderSize = &*(args++);
799    mHeaderSize->setName("mHeaderSize");
800
801    mFileSize = &*(args++);
802    mFileSize->setName("mFileSize");
803
804    mHasBlockChecksum = &*(args++);
805    mHasBlockChecksum->setName("mHasBlockChecksum");
806    // TODO for now, we do not handle blockCheckSum
807    mHasBlockChecksum = iBuilder->getInt1(false);
808
809    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
810}
811
812void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
813    Module * M = iBuilder->getModule();
814    Type * const sizeTy = iBuilder->getSizeTy();
815    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
816    Type * const voidTy = iBuilder->getVoidTy();
817    Type * const inputType = iBuilder->getInt8PtrTy();
818    Type * const intAddrTy = iBuilder->getIntAddrTy();
819
820    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
821    main->setCallingConv(CallingConv::C);
822    Function::arg_iterator args = main->arg_begin();
823    mInputStream = &*(args++);
824    mInputStream->setName("input");
825
826    mHeaderSize = &*(args++);
827    mHeaderSize->setName("mHeaderSize");
828
829    mFileSize = &*(args++);
830    mFileSize->setName("mFileSize");
831
832    mHasBlockChecksum = &*(args++);
833    mHasBlockChecksum->setName("mHasBlockChecksum");
834
835    match_accumulator = &*(args++);
836    match_accumulator->setName("match_accumulator");
837
838    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
839}
Note: See TracBrowser for help on using the repository browser.