source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6133

Last change on this file since 6133 was 6133, checked in by xwa163, 8 months ago
  1. Add sourceCC in multiplexed CC
  2. Remove workaround FakeBasisBits? from ICGrep
  3. Implement Swizzled version of LZParabix
  4. Init checkin for SwizzleByGather? Kernel
File size: 47.0 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_bitstream_not_kernel.h>
24#include <kernels/lz4/aio/lz4_i4_bytestream_aio.h>
25#include <kernels/fake_stream_generating_kernel.h>
26#include <kernels/bitstream_pdep_kernel.h>
27#include <kernels/bitstream_gather_pdep_kernel.h>
28#include <re/re_toolchain.h>
29
30#include <re/collect_ccs.h>
31#include <re/replaceCC.h>
32
33#include <re/casing.h>
34#include <re/exclude_CC.h>
35#include <re/to_utf8.h>
36#include <re/re_analysis.h>
37#include <re/re_name_resolve.h>
38#include <re/re_name_gather.h>
39#include <re/re_multiplex.h>
40#include <re/re_utility.h>
41
42#include <UCD/resolve_properties.h>
43#include <kernels/charclasses.h>
44#include <kernels/grep_kernel.h>
45#include <kernels/UCD_property_kernel.h>
46#include <kernels/grapheme_kernel.h>
47#include <kernels/linebreak_kernel.h>
48#include <kernels/streams_merge.h>
49#include <kernels/scanmatchgen.h>
50#include <kernels/until_n.h>
51#include <re/casing.h>
52#include <re/exclude_CC.h>
53#include <re/to_utf8.h>
54#include <re/re_analysis.h>
55#include <re/re_name_resolve.h>
56#include <re/re_name_gather.h>
57#include <re/re_multiplex.h>
58#include <re/re_utility.h>
59#include <re/grapheme_clusters.h>
60#include <re/printer_re.h>
61#include <llvm/Support/raw_ostream.h>
62#include <llvm/Support/Debug.h>
63#include <kernels/lz4/lz4_block_decoder.h>
64#include <kernels/lz4/aio/lz4_swizzled_aio.h>
65#include <kernels/lz4/aio/lz4_bitstream_aio.h>
66#include <re/re_seq.h>
67#include <kernels/lz4/aio/lz4_bytestream_aio.h>
68
69namespace re { class CC; }
70
71using namespace llvm;
72using namespace parabix;
73using namespace kernel;
74using namespace grep;
75
76LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
77    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
78    mMoveMatchesToEOL = true;
79}
80
81void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
82    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
83        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
84    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
85        mBreakCC = re::makeByte(0);  // Null
86    } else {
87        mBreakCC = re::makeByte(0x0A); // LF
88    }
89    re::RE * anchorRE = mBreakCC;
90    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
91        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
92        anchorName->setDefinition(re::makeUnicodeBreak());
93        anchorRE = anchorName;
94    }
95
96    mREs = REs;
97    bool allAnchored = true;
98    for(unsigned i = 0; i < mREs.size(); ++i) {
99        if (!hasEndAnchor(mREs[i])) allAnchored = false;
100        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
101        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
102        mREs[i] = resolveAnchors(mREs[i], anchorRE);
103        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
104        mREs[i] = regular_expression_passes(mREs[i]);
105    }
106    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
107
108}
109
110
111parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
112//    auto mGrepDriver = &mPxDriver;
113    auto & idb = mPxDriver.getBuilder();
114    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
115    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
116    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
117    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
118    return LineFeedStream;
119}
120
121parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithByteStreamAioApproach(
122        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
123    auto mGrepDriver = &mPxDriver;
124    auto & b = mGrepDriver->getBuilder();
125
126    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
127
128    StreamSetBuffer * const mtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b));
129    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(b, cc::BitNumbering::BigEndian, prefix, numberOfStream);
130    mPxDriver.makeKernelCall(p2sK, {compressedBitStream}, {mtxByteStream});
131
132    StreamSetBuffer * const decompressionMtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b), 1);
133    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(b, true);
134    lz4AioK->setInitialArguments({mFileSize});
135    mPxDriver.makeKernelCall(
136            lz4AioK,
137            {
138                    mCompressedByteStream,
139                    // Block Data
140                    blockInfo.isCompress,
141                    blockInfo.blockStart,
142                    blockInfo.blockEnd,
143                    mtxByteStream
144            }, {
145                    decompressionMtxByteStream
146            });
147
148    StreamSetBuffer * const decompressedMtxBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8), this->getDecompressedBufferBlocks(b));
149
150    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(b, cc::BitNumbering::BigEndian, true, prefix, numberOfStream);
151    mPxDriver.makeKernelCall(s2pk, {decompressionMtxByteStream}, {decompressedMtxBitStream});
152
153    return decompressedMtxBitStream;
154}
155
156StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledAioApproach(
157        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
158    auto mGrepDriver = &mPxDriver;
159    auto & b = mGrepDriver->getBuilder();
160
161    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
162
163    // Produce unswizzled bit streams
164    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
165    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "source");
166    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
167
168    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
169
170
171    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(b, 4, 1, 4);
172    lz4AioK->setInitialArguments({mFileSize});
173    mPxDriver.makeKernelCall(
174            lz4AioK,
175            {
176                    mCompressedByteStream,
177//                    Extenders,
178
179                    // Block Data
180                    blockInfo.isCompress,
181                    blockInfo.blockStart,
182                    blockInfo.blockEnd,
183
184                    u16Swizzle0,
185            }, {
186                    decompressedSwizzled0,
187            });
188
189
190
191    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(b));
192    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "dst");
193    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
194
195    return decompressionBitStream;
196
197}
198
199StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledApproach(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
200    auto mGrepDriver = &mPxDriver;
201    auto & idb = mGrepDriver->getBuilder();
202
203    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
204    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
205    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
206
207    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
208    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
209    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
210
211    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
212    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
213    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
214
215    // Produce unswizzled bit streams
216    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
217    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
218    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
219
220    return matchCopiedBits;
221}
222parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
223    if (numberOfStream == 4) {
224        return this->convertCompressedBitsStreamWithSwizzledApproach(compressedBitStream, numberOfStream, prefix);
225    }
226
227    auto mGrepDriver = &mPxDriver;
228    auto & idb = mGrepDriver->getBuilder();
229
230    // Extract (Deletion)
231    this->generateCompressionMarker(idb);
232
233    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
234    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
235
236    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
237    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
238
239    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
240    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
241    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
242
243    // Deposit
244    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
245    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
246    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
247
248    // Match Copy
249    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
250    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
251    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
252
253    return matchCopiedBits;
254}
255
256parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
257    auto mGrepDriver = &mPxDriver;
258    auto & idb = mGrepDriver->getBuilder();
259    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
260
261    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
262    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
263    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
264    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
265}
266
267void LZ4GrepGenerator::generateMultiplexingCompressedBitStream(std::vector<re::RE *> &REs) {
268    this->initREs(REs);
269    auto mGrepDriver = &mPxDriver;
270
271    auto & idb = mGrepDriver->getBuilder();
272    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
273    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
274
275
276    //  Regular Expression Processing and Analysis Phase
277    const auto nREs = mREs.size();
278
279    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
280
281    std::map<std::string, StreamSetBuffer *> propertyStream;
282
283    std::vector<std::string> externalStreamNames;
284    std::set<re::Name *> UnicodeProperties;
285
286    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
287    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
288
289    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
290    mREs[0] = transformCCs(mpx.get(), mREs[0]);
291    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
292    auto numOfCharacterClasses = mpx_basis.size();
293    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
294
295    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
296    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
297
298    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
299    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
300    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
301
302    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
303    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
304    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
305    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithSwizzledAioApproach(
306            combinedStream, 1 + numOfCharacterClasses, "combined");
307
308    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
309    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
310    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
311    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
312
313
314
315    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
316    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
317    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
318
319    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
320    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
321    MatchResultsBufs[0] = MatchResults;
322
323    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
324    if (mREs.size() > 1) {
325        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
326        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
327        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
328    }
329    StreamSetBuffer * Matches = MergedResults;
330    if (mMoveMatchesToEOL) {
331        StreamSetBuffer * OriginalMatches = Matches;
332        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
333        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
334        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
335    }
336
337//    if (MaxCountFlag > 0) {
338//        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
339//        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
340//        StreamSetBuffer * const AllMatches = Matches;
341//        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
342//        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
343//    }
344
345//    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
346
347};
348std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled, bool useByteStream) {
349
350    this->initREs(REs);
351    auto mGrepDriver = &mPxDriver;
352
353    auto & idb = mGrepDriver->getBuilder();
354    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
355    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
356    int MaxCountFlag = 0;
357
358    //  Regular Expression Processing and Analysis Phase
359    const auto nREs = mREs.size();
360
361    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
362
363
364    std::map<std::string, StreamSetBuffer *> propertyStream;
365
366    std::vector<std::string> externalStreamNames;
367    std::set<re::Name *> UnicodeProperties;
368
369    re::CC* linefeedCC = re::makeCC(0x0A);
370
371    re::Seq* seq = re::makeSeq();
372    seq->push_back(mREs[0]);
373    seq->push_back(std::move(linefeedCC));
374
375
376    const auto UnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
377    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
378
379    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
380    mREs[0] = transformCCs(mpx.get(), mREs[0]);
381    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
382    auto numOfCharacterClasses = mpx_basis.size();
383    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
384
385    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
386    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
387
388    StreamSetBuffer * decompressedCharClasses = nullptr;
389    if (useSwizzled) {
390        if (useAio) {
391            decompressedCharClasses = this->convertCompressedBitsStreamWithSwizzledAioApproach(CharClasses, numOfCharacterClasses, "combined");
392        } else {
393            decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "combined");
394        }
395    } else if (useByteStream){
396        decompressedCharClasses = this->convertCompressedBitsStreamWithByteStreamAioApproach(CharClasses, numOfCharacterClasses, "combined");
397    } else {
398        auto ret = this->convertCompressedBitsStreamWithBitStreamAioApproach({CharClasses}, "combined");
399        decompressedCharClasses = ret[0];
400    }
401
402    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
403    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
404    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
405
406    StreamSetBuffer * LineBreakStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb));
407    kernel::Kernel * lineFeedGrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, transformCCs(mpx.get(), linefeedCC), externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
408    mGrepDriver->makeKernelCall(lineFeedGrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {LineBreakStream});
409
410
411    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
412    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
413    MatchResultsBufs[0] = MatchResults;
414
415    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
416    if (mREs.size() > 1) {
417        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
418        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
419        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
420    }
421    StreamSetBuffer * Matches = MergedResults;
422    if (mMoveMatchesToEOL) {
423        StreamSetBuffer * OriginalMatches = Matches;
424        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
425        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
426        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
427    }
428
429    if (MaxCountFlag > 0) {
430        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
431        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
432        StreamSetBuffer * const AllMatches = Matches;
433        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
434        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
435    }
436
437    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
438};
439
440std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
441        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
442
443    this->initREs(REs);
444    auto mGrepDriver = &mPxDriver;
445
446    auto & idb = mGrepDriver->getBuilder();
447    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
448    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
449    int MaxCountFlag = 0;
450
451    //  Regular Expression Processing and Analysis Phase
452    const auto nREs = mREs.size();
453
454    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
455
456    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
457
458
459    std::map<std::string, StreamSetBuffer *> propertyStream;
460
461    for(unsigned i = 0; i < nREs; ++i) {
462        std::vector<std::string> externalStreamNames;
463        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
464
465        if (mEnableMultiplexing) {
466            const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
467            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
468
469            mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
470            mREs[i] = transformCCs(mpx.get(), mREs[i]);
471            std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
472            auto numOfCharacterClasses = mpx_basis.size();
473            StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
474            kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
475            mGrepDriver->makeKernelCall(ccK, {decompressedBasisBits}, {CharClasses});
476
477            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
478            icgrepInputSets.push_back(CharClasses);
479            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
480            MatchResultsBufs[i] = MatchResults;
481        } else {
482            std::set<re::Name *> UnicodeProperties;
483
484            StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
485            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
486            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
487            MatchResultsBufs[i] = MatchResults;
488        }
489    }
490
491    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
492    if (mREs.size() > 1) {
493        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
494        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
495        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
496    }
497    StreamSetBuffer * Matches = MergedResults;
498    if (mMoveMatchesToEOL) {
499        StreamSetBuffer * OriginalMatches = Matches;
500        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
501        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
502        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
503    }
504
505    if (MaxCountFlag > 0) {
506        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
507        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
508        StreamSetBuffer * const AllMatches = Matches;
509        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
510        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
511    }
512
513    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
514
515}
516
517void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
518    auto main = this->getScanMatchGrepMainFunction();
519    std::ostringstream s;
520    EmitMatch accum("", false, false, s);
521
522    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
523    llvm::outs() << s.str();
524}
525
526void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
527    auto & iBuilder = mPxDriver.getBuilder();
528    this->generateScanMatchMainFunc(iBuilder);
529
530    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
531
532    // GeneratePipeline
533    this->generateLoadByteStreamAndBitStream(iBuilder);
534    this->generateExtractAndDepositMarkers(iBuilder);
535
536    auto swizzle = this->generateSwizzleExtractData(iBuilder);
537
538    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
539    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
540
541    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
542    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
543
544    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
545    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
546
547    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
548    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
549
550    // Produce unswizzled bit streams
551    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
552    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
553    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
554
555    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
556    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
557
558    StreamSetBuffer * LineBreakStream;
559    StreamSetBuffer * Matches;
560    std::vector<re::RE*> res = {regex};
561    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
562
563    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
564    scanMatchK->setInitialArguments({match_accumulator});
565    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
566    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
567    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
568
569    mPxDriver.generatePipelineIR();
570    mPxDriver.deallocateBuffers();
571
572    iBuilder->CreateRetVoid();
573
574    mPxDriver.finalizeObject();
575}
576
577void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE *regex) {
578    auto & iBuilder = mPxDriver.getBuilder();
579    this->generateCountOnlyMainFunc(iBuilder);
580
581    // GeneratePipeline
582    this->generateLoadByteStreamAndBitStream(iBuilder);
583//    this->generateExtractAndDepositMarkers(iBuilder);
584
585    StreamSetBuffer * LineBreakStream;
586    StreamSetBuffer * Matches;
587    std::vector<re::RE*> res = {regex};
588    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
589
590    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
591    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
592    mPxDriver.generatePipelineIR();
593
594    iBuilder->setKernel(matchCountK);
595    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
596    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
597
598    mPxDriver.deallocateBuffers();
599
600    iBuilder->CreateRet(matchedLineCount);
601
602
603    mPxDriver.finalizeObject();
604}
605
606void LZ4GrepGenerator::generateByteStreamMultiplexingAioPipeline(re::RE* regex) {
607    auto & iBuilder = mPxDriver.getBuilder();
608    this->generateCountOnlyMainFunc(iBuilder);
609
610    this->generateLoadByteStreamAndBitStream(iBuilder);
611    StreamSetBuffer * LineBreakStream;
612    StreamSetBuffer * Matches;
613    std::vector<re::RE*> res = {regex};
614    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false, true);
615
616    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
617    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
618    mPxDriver.generatePipelineIR();
619
620    iBuilder->setKernel(matchCountK);
621    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
622    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
623
624    mPxDriver.deallocateBuffers();
625
626    iBuilder->CreateRet(matchedLineCount);
627
628    mPxDriver.finalizeObject();
629}
630
631
632void LZ4GrepGenerator::generateMultiplexingBitStreamAioPipeline(re::RE* regex) {
633    auto & iBuilder = mPxDriver.getBuilder();
634    this->generateCountOnlyMainFunc(iBuilder);
635
636    this->generateLoadByteStreamAndBitStream(iBuilder);
637    StreamSetBuffer * LineBreakStream;
638    StreamSetBuffer * Matches;
639    std::vector<re::RE*> res = {regex};
640    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false);
641
642    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
643    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
644    mPxDriver.generatePipelineIR();
645
646    iBuilder->setKernel(matchCountK);
647    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
648    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
649
650    mPxDriver.deallocateBuffers();
651
652    iBuilder->CreateRet(matchedLineCount);
653
654    mPxDriver.finalizeObject();
655}
656
657void LZ4GrepGenerator::generateBitStreamAioPipeline(re::RE* regex) {
658    auto & iBuilder = mPxDriver.getBuilder();
659    this->generateCountOnlyMainFunc(iBuilder);
660
661    // GeneratePipeline
662    this->generateLoadByteStreamAndBitStream(iBuilder);
663    StreamSetBuffer * const decompressionBitStream = this->generateBitStreamAIODecompression(iBuilder);
664
665    StreamSetBuffer * LineBreakStream;
666    StreamSetBuffer * Matches;
667    std::vector<re::RE*> res = {regex};
668    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
669
670    /*
671    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
672    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
673    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
674
675    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
676    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
677    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
678    */
679    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
680
681    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
682    mPxDriver.generatePipelineIR();
683
684    iBuilder->setKernel(matchCountK);
685    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
686    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
687    mPxDriver.deallocateBuffers();
688
689    iBuilder->CreateRet(matchedLineCount);
690
691    mPxDriver.finalizeObject();
692}
693
694void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
695    auto & iBuilder = mPxDriver.getBuilder();
696    this->generateCountOnlyMainFunc(iBuilder);
697
698    // GeneratePipeline
699    this->generateLoadByteStreamAndBitStream(iBuilder);
700
701    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
702
703    StreamSetBuffer * LineBreakStream;
704    StreamSetBuffer * Matches;
705    std::vector<re::RE*> res = {regex};
706    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
707/*
708    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
709    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
710    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
711
712    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
713    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
714    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
715*/
716    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
717
718    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
719    mPxDriver.generatePipelineIR();
720
721    iBuilder->setKernel(matchCountK);
722    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
723    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
724    mPxDriver.deallocateBuffers();
725
726    iBuilder->CreateRet(matchedLineCount);
727
728    mPxDriver.finalizeObject();
729}
730
731void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel) {
732    auto & iBuilder = mPxDriver.getBuilder();
733    this->generateCountOnlyMainFunc(iBuilder);
734
735    this->generateLoadByteStream(iBuilder);
736    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter, minParallelLevel);
737
738
739    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
740    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
741    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
742
743
744    StreamSetBuffer * LineBreakStream;
745    StreamSetBuffer * Matches;
746    std::vector<re::RE*> res = {regex};
747    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
748
749
750//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
751//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
752//    mPxDriver.makeKernelCall(outK, {decompressedByteStream}, {});
753
754    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
755    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
756    mPxDriver.generatePipelineIR();
757
758    iBuilder->setKernel(matchCountK);
759    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
760    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
761
762    mPxDriver.deallocateBuffers();
763
764    iBuilder->CreateRet(matchedLineCount);
765
766    mPxDriver.finalizeObject();
767}
768
769
770
771void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
772    auto & iBuilder = mPxDriver.getBuilder();
773    this->generateCountOnlyMainFunc(iBuilder);
774
775    // GeneratePipeline
776    this->generateLoadByteStream(iBuilder);
777//    this->generateLoadByteStreamAndBitStream(iBuilder);
778
779    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
780
781
782    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
783    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
784    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
785
786
787    StreamSetBuffer * LineBreakStream;
788    StreamSetBuffer * Matches;
789    std::vector<re::RE*> res = {regex};
790    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
791
792
793//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
794//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
795//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
796
797    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
798    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
799    mPxDriver.generatePipelineIR();
800
801    iBuilder->setKernel(matchCountK);
802    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
803    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
804
805    mPxDriver.deallocateBuffers();
806
807    iBuilder->CreateRet(matchedLineCount);
808
809    mPxDriver.finalizeObject();
810
811}
812
813
814void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE *regex, bool enableGather) {
815    auto & iBuilder = mPxDriver.getBuilder();
816    this->generateCountOnlyMainFunc(iBuilder);
817
818    // GeneratePipeline
819    this->generateLoadByteStreamAndBitStream(iBuilder);
820    this->generateExtractAndDepositMarkers(iBuilder);
821
822    StreamSetBuffer * LineBreakStream;
823    StreamSetBuffer * Matches;
824    std::vector<re::RE*> res = {regex};
825    if (mEnableMultiplexing) {
826        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
827    } else {
828        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
829        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
830        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
831        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
832
833        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
834        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
835        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
836
837        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
838    };
839
840    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
841    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
842    mPxDriver.generatePipelineIR();
843
844    iBuilder->setKernel(matchCountK);
845    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
846    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
847
848    mPxDriver.deallocateBuffers();
849
850    iBuilder->CreateRet(matchedLineCount);
851
852    mPxDriver.finalizeObject();
853}
854
855
856void LZ4GrepGenerator::generateSwizzledCountOnlyGrepPipeline(re::RE *regex) {
857    auto & iBuilder = mPxDriver.getBuilder();
858    this->generateCountOnlyMainFunc(iBuilder);
859
860
861    // GeneratePipeline
862    this->generateLoadByteStreamAndBitStream(iBuilder);
863    this->generateExtractAndDepositMarkers(iBuilder);
864
865
866    StreamSetBuffer * LineBreakStream;
867    StreamSetBuffer * Matches;
868    std::vector<re::RE*> res = {regex};
869    if (mEnableMultiplexing) {
870        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
871    } else {
872        auto swizzle = this->generateSwizzleExtractData(iBuilder);
873
874        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
875        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
876
877        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
878        mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
879
880
881        // split PDEP into 2 kernel will be a little slower in single thread environment
882/*
883    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
884    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
885
886    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
887    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
888*/
889
890        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
891        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
892
893        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
894        mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
895
896        // Produce unswizzled bit streams
897        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
898        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
899        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
900
901
902        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
903    };
904
905    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
906    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
907    mPxDriver.generatePipelineIR();
908
909    iBuilder->setKernel(matchCountK);
910    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
911    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
912
913    mPxDriver.deallocateBuffers();
914
915    iBuilder->CreateRet(matchedLineCount);
916
917    mPxDriver.finalizeObject();
918}
919
920ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
921    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
922}
923CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
924    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
925}
926
927void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
928    Module * M = iBuilder->getModule();
929    Type * const int64Ty = iBuilder->getInt64Ty();
930    Type * const sizeTy = iBuilder->getSizeTy();
931    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
932//    Type * const voidTy = iBuilder->getVoidTy();
933    Type * const inputType = iBuilder->getInt8PtrTy();
934
935    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
936    main->setCallingConv(CallingConv::C);
937    Function::arg_iterator args = main->arg_begin();
938    mInputStream = &*(args++);
939    mInputStream->setName("input");
940
941    mHeaderSize = &*(args++);
942    mHeaderSize->setName("mHeaderSize");
943
944    mFileSize = &*(args++);
945    mFileSize->setName("mFileSize");
946
947    mHasBlockChecksum = &*(args++);
948    mHasBlockChecksum->setName("mHasBlockChecksum");
949    // TODO for now, we do not handle blockCheckSum
950    mHasBlockChecksum = iBuilder->getInt1(false);
951
952    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
953}
954
955void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
956    Module * M = iBuilder->getModule();
957    Type * const sizeTy = iBuilder->getSizeTy();
958    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
959    Type * const voidTy = iBuilder->getVoidTy();
960    Type * const inputType = iBuilder->getInt8PtrTy();
961    Type * const intAddrTy = iBuilder->getIntAddrTy();
962
963    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
964    main->setCallingConv(CallingConv::C);
965    Function::arg_iterator args = main->arg_begin();
966    mInputStream = &*(args++);
967    mInputStream->setName("input");
968
969    mHeaderSize = &*(args++);
970    mHeaderSize->setName("mHeaderSize");
971
972    mFileSize = &*(args++);
973    mFileSize->setName("mFileSize");
974
975    mHasBlockChecksum = &*(args++);
976    mHasBlockChecksum->setName("mHasBlockChecksum");
977
978    match_accumulator = &*(args++);
979    match_accumulator->setName("match_accumulator");
980
981    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
982}
Note: See TracBrowser for help on using the repository browser.