source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6089

Last change on this file since 6089 was 6089, checked in by cameron, 10 months ago

Little-endian/big-endian bit number options, default to little-endian

File size: 42.5 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/alphabet.h>
9#include <cc/cc_compiler.h>
10
11#include <kernels/cc_kernel.h>
12#include <kernels/s2p_kernel.h>
13#include <kernels/p2s_kernel.h>
14#include <kernels/source_kernel.h>
15#include <kernels/stdout_kernel.h>
16#include <kernels/lz4/lz4_generate_deposit_stream.h>
17#include <kernels/kernel_builder.h>
18#include <kernels/deletion.h>
19#include <kernels/swizzle.h>
20#include <kernels/pdep_kernel.h>
21#include <kernels/swizzled_multiple_pdep_kernel.h>
22#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
24#include <kernels/lz4/lz4_bitstream_not_kernel.h>
25#include <kernels/lz4/lz4_fake_stream_generating_kernel.h>
26#include <kernels/bitstream_pdep_kernel.h>
27#include <kernels/bitstream_gather_pdep_kernel.h>
28#include <re/re_toolchain.h>
29
30#include <re/collect_ccs.h>
31#include <re/replaceCC.h>
32
33#include <UCD/resolve_properties.h>
34#include <kernels/charclasses.h>
35#include <kernels/grep_kernel.h>
36#include <kernels/UCD_property_kernel.h>
37#include <kernels/grapheme_kernel.h>
38#include <kernels/linebreak_kernel.h>
39#include <kernels/streams_merge.h>
40#include <kernels/scanmatchgen.h>
41#include <kernels/until_n.h>
42#include <re/casing.h>
43#include <re/exclude_CC.h>
44#include <re/to_utf8.h>
45#include <re/re_analysis.h>
46#include <re/re_name_resolve.h>
47#include <re/re_name_gather.h>
48#include <re/re_multiplex.h>
49#include <re/re_utility.h>
50#include <re/grapheme_clusters.h>
51#include <re/printer_re.h>
52#include <llvm/Support/raw_ostream.h>
53#include <llvm/Support/Debug.h>
54#include <kernels/lz4/lz4_block_decoder.h>
55#include <kernels/lz4/lz4_swizzled_aio.h>
56
57
58namespace re { class CC; }
59
60using namespace llvm;
61using namespace parabix;
62using namespace kernel;
63using namespace grep;
64
65LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
66    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
67    mMoveMatchesToEOL = true;
68}
69
70void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
71    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
72        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
73    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
74        mBreakCC = re::makeByte(0);  // Null
75    } else {
76        mBreakCC = re::makeByte(0x0A); // LF
77    }
78    re::RE * anchorRE = mBreakCC;
79    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
80        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
81        anchorName->setDefinition(re::makeUnicodeBreak());
82        anchorRE = anchorName;
83    }
84
85    mREs = REs;
86    bool allAnchored = true;
87    for(unsigned i = 0; i < mREs.size(); ++i) {
88        if (!hasEndAnchor(mREs[i])) allAnchored = false;
89        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
90        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
91        mREs[i] = resolveAnchors(mREs[i], anchorRE);
92        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
93        mREs[i] = regular_expression_passes(mREs[i]);
94    }
95    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
96
97}
98
99
100parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
101//    auto mGrepDriver = &mPxDriver;
102    auto & idb = mPxDriver.getBuilder();
103    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
104    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
105    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
106    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
107    return LineFeedStream;
108}
109
110StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithAioApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
111    auto mGrepDriver = &mPxDriver;
112    auto & iBuilder = mGrepDriver->getBuilder();
113
114    //// Decode Block Information
115    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
116    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
117    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
118
119    //// Generate Helper Markers Extenders, FX, XF
120    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
121    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
122    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
123    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
124
125
126    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
127    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
128    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
129
130
131    // Produce unswizzled bit streams
132    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
133    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "source");
134    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
135
136    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
137
138
139    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 1, 4);
140    lz4AioK->setInitialArguments({mFileSize});
141    mPxDriver.makeKernelCall(
142            lz4AioK,
143            {
144                    mCompressedByteStream,
145                    Extenders,
146
147                    // Block Data
148                    BlockData_IsCompressed,
149                    BlockData_BlockStart,
150                    BlockData_BlockEnd,
151
152                    u16Swizzle0,
153            }, {
154                    decompressedSwizzled0,
155            });
156
157
158
159    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
160    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "dst");
161    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
162
163    return decompressionBitStream;
164
165}
166
167StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
168    auto mGrepDriver = &mPxDriver;
169    auto & idb = mGrepDriver->getBuilder();
170
171    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
172    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
173    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
174
175    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
176    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
177    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
178
179    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
180    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
181    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
182
183    // Produce unswizzled bit streams
184    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
185    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
186    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
187
188    return matchCopiedBits;
189}
190parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
191    if (numberOfStream == 4) {
192        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
193    }
194
195    auto mGrepDriver = &mPxDriver;
196    auto & idb = mGrepDriver->getBuilder();
197
198    // Extract (Deletion)
199    this->generateCompressionMarker(idb);
200
201    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
202    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
203
204    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
205    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
206
207    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
208    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
209    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
210
211    // Deposit
212    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
213    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
214    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
215
216    // Match Copy
217    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
218    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
219    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
220
221    return matchCopiedBits;
222}
223
224parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
225    auto mGrepDriver = &mPxDriver;
226    auto & idb = mGrepDriver->getBuilder();
227    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
228
229    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
230    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
231    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
232    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
233}
234
235void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
236    this->initREs(REs);
237    auto mGrepDriver = &mPxDriver;
238
239    auto & idb = mGrepDriver->getBuilder();
240    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
241    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
242
243
244    //  Regular Expression Processing and Analysis Phase
245    const auto nREs = mREs.size();
246
247    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
248
249    std::map<std::string, StreamSetBuffer *> propertyStream;
250
251    std::vector<std::string> externalStreamNames;
252    std::set<re::Name *> UnicodeProperties;
253
254    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
255    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
256
257    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
258    mREs[0] = transformCCs(mpx.get(), mREs[0]);
259    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
260    auto numOfCharacterClasses = mpx_basis.size();
261    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
262
263    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
264    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
265
266    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
267    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
268    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
269
270    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
271    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
272    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
273    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
274
275    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
276    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
277    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
278    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
279
280
281
282    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
283    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
284    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
285
286    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
287    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
288    MatchResultsBufs[0] = MatchResults;
289
290    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
291    if (mREs.size() > 1) {
292        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
293        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
294        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
295    }
296    StreamSetBuffer * Matches = MergedResults;
297    if (mMoveMatchesToEOL) {
298        StreamSetBuffer * OriginalMatches = Matches;
299        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
300        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
301        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
302    }
303
304//    if (MaxCountFlag > 0) {
305//        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
306//        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
307//        StreamSetBuffer * const AllMatches = Matches;
308//        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
309//        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
310//    }
311
312//    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
313
314};
315std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio) {
316
317    this->initREs(REs);
318    auto mGrepDriver = &mPxDriver;
319
320    auto & idb = mGrepDriver->getBuilder();
321    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
322    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
323    int MaxCountFlag = 0;
324
325    //  Regular Expression Processing and Analysis Phase
326    const auto nREs = mREs.size();
327
328    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
329
330
331    std::map<std::string, StreamSetBuffer *> propertyStream;
332
333    std::vector<std::string> externalStreamNames;
334    std::set<re::Name *> UnicodeProperties;
335
336    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
337    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
338
339    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
340    mREs[0] = transformCCs(mpx.get(), mREs[0]);
341    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
342    auto numOfCharacterClasses = mpx_basis.size();
343    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
344
345    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
346    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
347
348    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
349    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
350    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
351
352    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
353    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
354    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
355    StreamSetBuffer * decompressedCombinedStream = nullptr;
356
357    if (useAio) {
358        decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
359    } else {
360        decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
361    }
362
363    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
364    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
365    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
366    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
367
368    /*
369    StreamSetBuffer * LineBreakStream = this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
370    StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
371     */
372
373    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
374    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
375    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
376
377    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
378    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
379    MatchResultsBufs[0] = MatchResults;
380
381    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
382    if (mREs.size() > 1) {
383        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
384        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
385        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
386    }
387    StreamSetBuffer * Matches = MergedResults;
388    if (mMoveMatchesToEOL) {
389        StreamSetBuffer * OriginalMatches = Matches;
390        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
391        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
392        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
393    }
394
395    if (MaxCountFlag > 0) {
396        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
397        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
398        StreamSetBuffer * const AllMatches = Matches;
399        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
400        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
401    }
402
403    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
404};
405
406std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
407        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
408
409    this->initREs(REs);
410    auto mGrepDriver = &mPxDriver;
411
412    auto & idb = mGrepDriver->getBuilder();
413    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
414    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
415    int MaxCountFlag = 0;
416
417    //  Regular Expression Processing and Analysis Phase
418    const auto nREs = mREs.size();
419
420    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
421
422    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
423
424
425    std::map<std::string, StreamSetBuffer *> propertyStream;
426
427    for(unsigned i = 0; i < nREs; ++i) {
428        std::vector<std::string> externalStreamNames;
429        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
430
431        std::set<re::Name *> UnicodeProperties;
432
433        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
434        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
435        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
436        MatchResultsBufs[i] = MatchResults;
437    }
438
439    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
440    if (mREs.size() > 1) {
441        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
442        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
443        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
444    }
445    StreamSetBuffer * Matches = MergedResults;
446    if (mMoveMatchesToEOL) {
447        StreamSetBuffer * OriginalMatches = Matches;
448        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
449        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
450        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
451    }
452
453    if (MaxCountFlag > 0) {
454        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
455        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
456        StreamSetBuffer * const AllMatches = Matches;
457        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
458        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
459    }
460
461    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
462
463}
464
465void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
466    auto main = this->getScanMatchGrepMainFunction();
467    std::ostringstream s;
468    EmitMatch accum("", false, false, s);
469
470    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
471    llvm::outs() << s.str();
472}
473
474void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
475    auto & iBuilder = mPxDriver.getBuilder();
476    this->generateScanMatchMainFunc(iBuilder);
477
478    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
479
480    // GeneratePipeline
481    this->generateLoadByteStreamAndBitStream(iBuilder);
482    this->generateExtractAndDepositMarkers(iBuilder);
483
484    auto swizzle = this->generateSwizzleExtractData(iBuilder);
485
486    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
487    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
488
489    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
490    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
491
492    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
493    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
494
495    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
496    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
497
498    // Produce unswizzled bit streams
499    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
500    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
501    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
502
503    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
504    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
505
506    StreamSetBuffer * LineBreakStream;
507    StreamSetBuffer * Matches;
508    std::vector<re::RE*> res = {regex};
509    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
510
511    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
512    scanMatchK->setInitialArguments({match_accumulator});
513    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
514    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
515    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
516
517    mPxDriver.generatePipelineIR();
518    mPxDriver.deallocateBuffers();
519
520    iBuilder->CreateRetVoid();
521
522    mPxDriver.finalizeObject();
523}
524
525void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE* regex) {
526    auto & iBuilder = mPxDriver.getBuilder();
527    this->generateMainFunc(iBuilder);
528
529    // GeneratePipeline
530    this->generateLoadByteStreamAndBitStream(iBuilder);
531
532    std::vector<re::RE*> res = {regex};
533    this->generateMultiplexingCompressedBitStream(res);
534
535    mPxDriver.generatePipelineIR();
536    mPxDriver.deallocateBuffers();
537
538    iBuilder->CreateRetVoid();
539
540    mPxDriver.finalizeObject();
541}
542
543void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline2(re::RE* regex) {
544    auto & iBuilder = mPxDriver.getBuilder();
545    this->generateCountOnlyMainFunc(iBuilder);
546
547    // GeneratePipeline
548    this->generateLoadByteStreamAndBitStream(iBuilder);
549//    this->generateExtractAndDepositMarkers(iBuilder);
550
551    StreamSetBuffer * LineBreakStream;
552    StreamSetBuffer * Matches;
553    std::vector<re::RE*> res = {regex};
554    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
555
556    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
557    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
558    mPxDriver.generatePipelineIR();
559
560    iBuilder->setKernel(matchCountK);
561    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
562    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
563
564    mPxDriver.deallocateBuffers();
565
566    iBuilder->CreateRet(matchedLineCount);
567
568
569    mPxDriver.finalizeObject();
570}
571
572void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
573    auto & iBuilder = mPxDriver.getBuilder();
574    this->generateCountOnlyMainFunc(iBuilder);
575
576    // GeneratePipeline
577    this->generateLoadByteStreamAndBitStream(iBuilder);
578
579    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
580
581    StreamSetBuffer * LineBreakStream;
582    StreamSetBuffer * Matches;
583    std::vector<re::RE*> res = {regex};
584    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
585/*
586    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
587    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
588    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
589
590    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
591    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
592    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
593*/
594    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
595
596    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
597    mPxDriver.generatePipelineIR();
598
599    iBuilder->setKernel(matchCountK);
600    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
601    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
602    mPxDriver.deallocateBuffers();
603
604    iBuilder->CreateRet(matchedLineCount);
605
606    mPxDriver.finalizeObject();
607}
608
609void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter) {
610    auto & iBuilder = mPxDriver.getBuilder();
611    this->generateCountOnlyMainFunc(iBuilder);
612
613    this->generateLoadByteStream(iBuilder);
614    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter);
615
616
617    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
618    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
619//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, cc::BitNumbering::BigEndian, "a");
620    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
621
622
623    StreamSetBuffer * LineBreakStream;
624    StreamSetBuffer * Matches;
625    std::vector<re::RE*> res = {regex};
626    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
627
628
629//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
630//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
631//    mPxDriver.makeKernelCall(outK, {decompressedByteStream}, {});
632
633    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
634    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
635    mPxDriver.generatePipelineIR();
636
637    iBuilder->setKernel(matchCountK);
638    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
639    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
640
641    mPxDriver.deallocateBuffers();
642
643    iBuilder->CreateRet(matchedLineCount);
644
645    mPxDriver.finalizeObject();
646}
647
648void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
649    auto & iBuilder = mPxDriver.getBuilder();
650    this->generateCountOnlyMainFunc(iBuilder);
651
652    // GeneratePipeline
653//    this->generateLoadByteStreamAndBitStream(iBuilder);
654    this->generateLoadByteStream(iBuilder);
655    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
656
657
658    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
659    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
660//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
661    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
662
663
664    StreamSetBuffer * LineBreakStream;
665    StreamSetBuffer * Matches;
666    std::vector<re::RE*> res = {regex};
667    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
668
669
670//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
671//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
672//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
673
674    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
675    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
676    mPxDriver.generatePipelineIR();
677
678    iBuilder->setKernel(matchCountK);
679    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
680    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
681
682    mPxDriver.deallocateBuffers();
683
684    iBuilder->CreateRet(matchedLineCount);
685
686    mPxDriver.finalizeObject();
687
688}
689
690
691void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
692    auto & iBuilder = mPxDriver.getBuilder();
693    this->generateCountOnlyMainFunc(iBuilder);
694
695    // GeneratePipeline
696    this->generateLoadByteStreamAndBitStream(iBuilder);
697    this->generateExtractAndDepositMarkers(iBuilder);
698
699    StreamSetBuffer * LineBreakStream;
700    StreamSetBuffer * Matches;
701    std::vector<re::RE*> res = {regex};
702    if (mEnableMultiplexing) {
703        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
704    } else {
705        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
706        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
707        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
708        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
709
710        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
711        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
712        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
713
714        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
715    };
716
717    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
718    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
719    mPxDriver.generatePipelineIR();
720
721    iBuilder->setKernel(matchCountK);
722    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
723    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
724
725    mPxDriver.deallocateBuffers();
726
727    iBuilder->CreateRet(matchedLineCount);
728
729    mPxDriver.finalizeObject();
730}
731
732
733void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
734    auto & iBuilder = mPxDriver.getBuilder();
735    this->generateCountOnlyMainFunc(iBuilder);
736
737
738    // GeneratePipeline
739    this->generateLoadByteStreamAndBitStream(iBuilder);
740    this->generateExtractAndDepositMarkers(iBuilder);
741
742
743    StreamSetBuffer * LineBreakStream;
744    StreamSetBuffer * Matches;
745    std::vector<re::RE*> res = {regex};
746    if (mEnableMultiplexing) {
747        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
748    } else {
749        auto swizzle = this->generateSwizzleExtractData(iBuilder);
750
751        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
752        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
753
754        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
755        mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
756
757
758        // split PDEP into 2 kernel will be a little slower in single thread environment
759/*
760    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
761    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
762
763    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
764    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
765*/
766
767        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
768        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
769
770        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
771        mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
772
773        // Produce unswizzled bit streams
774        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
775        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
776        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
777
778
779        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
780    };
781
782    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
783    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
784    mPxDriver.generatePipelineIR();
785
786    iBuilder->setKernel(matchCountK);
787    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
788    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
789
790    mPxDriver.deallocateBuffers();
791
792    iBuilder->CreateRet(matchedLineCount);
793
794    mPxDriver.finalizeObject();
795}
796
797ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
798    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
799}
800CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
801    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
802}
803
804void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
805    Module * M = iBuilder->getModule();
806    Type * const int64Ty = iBuilder->getInt64Ty();
807    Type * const sizeTy = iBuilder->getSizeTy();
808    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
809//    Type * const voidTy = iBuilder->getVoidTy();
810    Type * const inputType = iBuilder->getInt8PtrTy();
811
812    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
813    main->setCallingConv(CallingConv::C);
814    Function::arg_iterator args = main->arg_begin();
815    mInputStream = &*(args++);
816    mInputStream->setName("input");
817
818    mHeaderSize = &*(args++);
819    mHeaderSize->setName("mHeaderSize");
820
821    mFileSize = &*(args++);
822    mFileSize->setName("mFileSize");
823
824    mHasBlockChecksum = &*(args++);
825    mHasBlockChecksum->setName("mHasBlockChecksum");
826    // TODO for now, we do not handle blockCheckSum
827    mHasBlockChecksum = iBuilder->getInt1(false);
828
829    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
830}
831
832void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
833    Module * M = iBuilder->getModule();
834    Type * const sizeTy = iBuilder->getSizeTy();
835    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
836    Type * const voidTy = iBuilder->getVoidTy();
837    Type * const inputType = iBuilder->getInt8PtrTy();
838    Type * const intAddrTy = iBuilder->getIntAddrTy();
839
840    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
841    main->setCallingConv(CallingConv::C);
842    Function::arg_iterator args = main->arg_begin();
843    mInputStream = &*(args++);
844    mInputStream->setName("input");
845
846    mHeaderSize = &*(args++);
847    mHeaderSize->setName("mHeaderSize");
848
849    mFileSize = &*(args++);
850    mFileSize->setName("mFileSize");
851
852    mHasBlockChecksum = &*(args++);
853    mHasBlockChecksum->setName("mHasBlockChecksum");
854
855    match_accumulator = &*(args++);
856    match_accumulator->setName("match_accumulator");
857
858    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
859}
Note: See TracBrowser for help on using the repository browser.