source: icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp @ 6136

Last change on this file since 6136 was 6136, checked in by xwa163, 11 months ago
  1. Cleanup legacy slow LZ4 related kernels
  2. Rename lz4d_ext_dep to lz4_decompression
  3. Rename LZ4 AIO related kernels to LZ4 Decompression Kernel
File size: 32.1 KB
Line 
1
2#include "LZ4GrepGenerator.h"
3
4#include <boost/iostreams/device/mapped_file.hpp>
5
6#include <llvm/Support/PrettyStackTrace.h>
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/kernel_builder.h>
16#include <kernels/deletion.h>
17#include <kernels/swizzle.h>
18#include <kernels/pdep_kernel.h>
19#include <kernels/swizzled_multiple_pdep_kernel.h>
20#include <kernels/fake_stream_generating_kernel.h>
21#include <kernels/bitstream_pdep_kernel.h>
22#include <kernels/bitstream_gather_pdep_kernel.h>
23#include <re/re_toolchain.h>
24
25#include <re/collect_ccs.h>
26#include <re/replaceCC.h>
27
28#include <re/casing.h>
29#include <re/exclude_CC.h>
30#include <re/to_utf8.h>
31#include <re/re_analysis.h>
32#include <re/re_name_resolve.h>
33#include <re/re_name_gather.h>
34#include <re/re_multiplex.h>
35#include <re/re_utility.h>
36
37#include <UCD/resolve_properties.h>
38#include <kernels/charclasses.h>
39#include <kernels/grep_kernel.h>
40#include <kernels/UCD_property_kernel.h>
41#include <kernels/grapheme_kernel.h>
42#include <kernels/linebreak_kernel.h>
43#include <kernels/streams_merge.h>
44#include <kernels/scanmatchgen.h>
45#include <kernels/until_n.h>
46#include <re/grapheme_clusters.h>
47#include <re/printer_re.h>
48#include <llvm/Support/raw_ostream.h>
49#include <llvm/Support/Debug.h>
50#include <kernels/lz4/lz4_block_decoder.h>
51#include <kernels/lz4/decompression/lz4_swizzled_decompression.h>
52#include <kernels/lz4/decompression/lz4_bitstream_decompression.h>
53#include <re/re_seq.h>
54#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
55
56namespace re { class CC; }
57
58using namespace llvm;
59using namespace parabix;
60using namespace kernel;
61using namespace grep;
62
63LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
64    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
65    mMoveMatchesToEOL = true;
66}
67
68void LZ4GrepGenerator::initREs(std::vector<re::RE *> & REs) {
69    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
70        mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
71    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
72        mBreakCC = re::makeByte(0);  // Null
73    } else {
74        mBreakCC = re::makeByte(0x0A); // LF
75    }
76    re::RE * anchorRE = mBreakCC;
77    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
78        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
79        anchorName->setDefinition(re::makeUnicodeBreak());
80        anchorRE = anchorName;
81    }
82
83    mREs = REs;
84    bool allAnchored = true;
85    for(unsigned i = 0; i < mREs.size(); ++i) {
86        if (!hasEndAnchor(mREs[i])) allAnchored = false;
87        mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
88        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
89        mREs[i] = resolveAnchors(mREs[i], anchorRE);
90        re::gatherUnicodeProperties(mREs[i], mUnicodeProperties);
91        mREs[i] = regular_expression_passes(mREs[i]);
92    }
93    if (allAnchored && (mGrepRecordBreak != GrepRecordBreakKind::Unicode)) mMoveMatchesToEOL = false;
94
95}
96
97
98parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
99//    auto mGrepDriver = &mPxDriver;
100    auto & idb = mPxDriver.getBuilder();
101    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
102    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
103    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
104    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
105    return LineFeedStream;
106}
107
108parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithByteStreamAioApproach(
109        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
110    auto mGrepDriver = &mPxDriver;
111    auto & b = mGrepDriver->getBuilder();
112
113    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
114
115    StreamSetBuffer * const mtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b));
116    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(b, cc::BitNumbering::BigEndian, prefix, numberOfStream);
117    mPxDriver.makeKernelCall(p2sK, {compressedBitStream}, {mtxByteStream});
118
119    StreamSetBuffer * const decompressionMtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b), 1);
120    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(b, true);
121    lz4AioK->setInitialArguments({mFileSize});
122    mPxDriver.makeKernelCall(
123            lz4AioK,
124            {
125                    mCompressedByteStream,
126                    // Block Data
127                    blockInfo.isCompress,
128                    blockInfo.blockStart,
129                    blockInfo.blockEnd,
130                    mtxByteStream
131            }, {
132                    decompressionMtxByteStream
133            });
134
135    StreamSetBuffer * const decompressedMtxBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8), this->getDecompressedBufferBlocks(b));
136
137    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(b, cc::BitNumbering::BigEndian, true, prefix, numberOfStream);
138    mPxDriver.makeKernelCall(s2pk, {decompressionMtxByteStream}, {decompressedMtxBitStream});
139
140    return decompressedMtxBitStream;
141}
142
143StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledAioApproach(
144        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
145    auto mGrepDriver = &mPxDriver;
146    auto & b = mGrepDriver->getBuilder();
147
148    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
149
150    // Produce unswizzled bit streams
151    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
152    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "source");
153    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
154
155    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
156
157
158    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledDecompressionKernel>(b, 4, 1, 4);
159    lz4AioK->setInitialArguments({mFileSize});
160    mPxDriver.makeKernelCall(
161            lz4AioK,
162            {
163                    mCompressedByteStream,
164//                    Extenders,
165
166                    // Block Data
167                    blockInfo.isCompress,
168                    blockInfo.blockStart,
169                    blockInfo.blockEnd,
170
171                    u16Swizzle0,
172            }, {
173                    decompressedSwizzled0,
174            });
175
176
177
178    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(b));
179    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "dst");
180    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
181
182    return decompressionBitStream;
183
184}
185
186
187std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled, bool useByteStream) {
188
189    this->initREs(REs);
190    auto mGrepDriver = &mPxDriver;
191
192    auto & idb = mGrepDriver->getBuilder();
193    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
194    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
195    int MaxCountFlag = 0;
196
197    //  Regular Expression Processing and Analysis Phase
198    const auto nREs = mREs.size();
199
200    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
201
202
203    std::map<std::string, StreamSetBuffer *> propertyStream;
204
205    std::vector<std::string> externalStreamNames;
206    std::set<re::Name *> UnicodeProperties;
207
208    re::CC* linefeedCC = re::makeCC(0x0A);
209
210    re::Seq* seq = re::makeSeq();
211    seq->push_back(mREs[0]);
212    seq->push_back(std::move(linefeedCC));
213
214
215    const auto UnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
216    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
217
218    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
219    mREs[0] = transformCCs(mpx.get(), mREs[0]);
220    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
221    auto numOfCharacterClasses = mpx_basis.size();
222    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
223
224    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
225    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
226
227    StreamSetBuffer * decompressedCharClasses = nullptr;
228    if (useSwizzled) {
229        decompressedCharClasses = this->convertCompressedBitsStreamWithSwizzledAioApproach(CharClasses, numOfCharacterClasses, "combined");
230    } else if (useByteStream){
231        decompressedCharClasses = this->convertCompressedBitsStreamWithByteStreamAioApproach(CharClasses, numOfCharacterClasses, "combined");
232    } else {
233        auto ret = this->convertCompressedBitsStreamWithBitStreamAioApproach({CharClasses}, "combined");
234        decompressedCharClasses = ret[0];
235    }
236
237    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
238    StreamSetBuffer * u8NoFinalStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), this->getInputBufferBlocks(idb), 1);
239
240    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, std::vector<unsigned>({8, 1}));
241    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits, u8NoFinalStream});
242
243    StreamSetBuffer * LineBreakStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb));
244    kernel::Kernel * lineFeedGrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, transformCCs(mpx.get(), linefeedCC), externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
245    mGrepDriver->makeKernelCall(lineFeedGrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {LineBreakStream});
246
247
248    externalStreamNames.push_back("UTF8_nonfinal");
249
250    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
251    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, u8NoFinalStream, decompressedCharClasses}, {MatchResults});
252    MatchResultsBufs[0] = MatchResults;
253
254    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
255    if (mREs.size() > 1) {
256        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
257        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
258        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
259    }
260    StreamSetBuffer * Matches = MergedResults;
261    if (mMoveMatchesToEOL) {
262        StreamSetBuffer * OriginalMatches = Matches;
263        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
264        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
265        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
266    }
267
268    if (MaxCountFlag > 0) {
269        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
270        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
271        StreamSetBuffer * const AllMatches = Matches;
272        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
273        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
274    }
275
276    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
277};
278
279std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
280        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
281
282    this->initREs(REs);
283    auto mGrepDriver = &mPxDriver;
284
285    auto & idb = mGrepDriver->getBuilder();
286    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
287    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
288    int MaxCountFlag = 0;
289
290    //  Regular Expression Processing and Analysis Phase
291    const auto nREs = mREs.size();
292
293    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
294
295    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
296
297
298    std::map<std::string, StreamSetBuffer *> propertyStream;
299
300    for(unsigned i = 0; i < nREs; ++i) {
301        std::vector<std::string> externalStreamNames;
302        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
303
304        if (mEnableMultiplexing) {
305            const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
306            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
307
308            mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
309            mREs[i] = transformCCs(mpx.get(), mREs[i]);
310            std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
311            auto numOfCharacterClasses = mpx_basis.size();
312            StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
313            kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
314            mGrepDriver->makeKernelCall(ccK, {decompressedBasisBits}, {CharClasses});
315
316            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
317            icgrepInputSets.push_back(CharClasses);
318            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
319            MatchResultsBufs[i] = MatchResults;
320        } else {
321            std::set<re::Name *> UnicodeProperties;
322
323            StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
324            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
325            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
326            MatchResultsBufs[i] = MatchResults;
327        }
328    }
329
330    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
331    if (mREs.size() > 1) {
332        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
333        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
334        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
335    }
336    StreamSetBuffer * Matches = MergedResults;
337    if (mMoveMatchesToEOL) {
338        StreamSetBuffer * OriginalMatches = Matches;
339        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
340        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
341        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
342    }
343
344    if (MaxCountFlag > 0) {
345        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
346        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
347        StreamSetBuffer * const AllMatches = Matches;
348        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
349        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
350    }
351
352    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
353
354}
355
356void LZ4GrepGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
357    auto main = this->getScanMatchGrepMainFunction();
358    std::ostringstream s;
359    EmitMatch accum("", false, false, s);
360
361    main(fileBuffer, blockStart, blockEnd, hasBlockChecksum, reinterpret_cast<intptr_t>(&accum));
362    llvm::outs() << s.str();
363}
364
365void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
366    auto & iBuilder = mPxDriver.getBuilder();
367    this->generateScanMatchMainFunc(iBuilder);
368
369    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
370    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
371
372    /*
373    // GeneratePipeline
374    this->generateLoadByteStreamAndBitStream(iBuilder);
375    this->generateExtractAndDepositMarkers(iBuilder);
376
377    auto swizzle = this->generateSwizzleExtractData(iBuilder);
378
379    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
380    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
381
382    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
383    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
384
385    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
386    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
387
388    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
389    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
390
391    // Produce unswizzled bit streams
392    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
393    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
394    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
395
396    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
397    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
398*/
399    // TODO fix this, generate DecompressedByteStream with LZ4ByteStreamDecompressionKernel
400
401    StreamSetBuffer * LineBreakStream;
402    StreamSetBuffer * Matches;
403    std::vector<re::RE*> res = {regex};
404    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
405
406    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
407    scanMatchK->setInitialArguments({match_accumulator});
408    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
409    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
410    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
411
412    mPxDriver.generatePipelineIR();
413    mPxDriver.deallocateBuffers();
414
415    iBuilder->CreateRetVoid();
416
417    mPxDriver.finalizeObject();
418}
419
420void LZ4GrepGenerator::generateMultiplexingSwizzledAioPipeline(re::RE *regex) {
421    auto & iBuilder = mPxDriver.getBuilder();
422    this->generateCountOnlyMainFunc(iBuilder);
423
424    // GeneratePipeline
425    this->generateLoadByteStreamAndBitStream(iBuilder);
426//    this->generateExtractAndDepositMarkers(iBuilder);
427
428    StreamSetBuffer * LineBreakStream;
429    StreamSetBuffer * Matches;
430    std::vector<re::RE*> res = {regex};
431    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true);
432
433    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
434    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
435    mPxDriver.generatePipelineIR();
436
437    iBuilder->setKernel(matchCountK);
438    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
439    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
440
441    mPxDriver.deallocateBuffers();
442
443    iBuilder->CreateRet(matchedLineCount);
444
445
446    mPxDriver.finalizeObject();
447}
448
449void LZ4GrepGenerator::generateByteStreamMultiplexingAioPipeline(re::RE* regex) {
450    auto & iBuilder = mPxDriver.getBuilder();
451    this->generateCountOnlyMainFunc(iBuilder);
452
453    this->generateLoadByteStreamAndBitStream(iBuilder);
454    StreamSetBuffer * LineBreakStream;
455    StreamSetBuffer * Matches;
456    std::vector<re::RE*> res = {regex};
457    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false, true);
458
459    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
460    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
461    mPxDriver.generatePipelineIR();
462
463    iBuilder->setKernel(matchCountK);
464    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
465    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
466
467    mPxDriver.deallocateBuffers();
468
469    iBuilder->CreateRet(matchedLineCount);
470
471    mPxDriver.finalizeObject();
472}
473
474
475void LZ4GrepGenerator::generateMultiplexingBitStreamAioPipeline(re::RE* regex) {
476    auto & iBuilder = mPxDriver.getBuilder();
477    this->generateCountOnlyMainFunc(iBuilder);
478
479    this->generateLoadByteStreamAndBitStream(iBuilder);
480    StreamSetBuffer * LineBreakStream;
481    StreamSetBuffer * Matches;
482    std::vector<re::RE*> res = {regex};
483    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false);
484
485    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
486    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
487    mPxDriver.generatePipelineIR();
488
489    iBuilder->setKernel(matchCountK);
490    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
491    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
492
493    mPxDriver.deallocateBuffers();
494
495    iBuilder->CreateRet(matchedLineCount);
496
497    mPxDriver.finalizeObject();
498}
499
500void LZ4GrepGenerator::generateBitStreamAioPipeline(re::RE* regex) {
501    auto & iBuilder = mPxDriver.getBuilder();
502    this->generateCountOnlyMainFunc(iBuilder);
503
504    // GeneratePipeline
505    this->generateLoadByteStreamAndBitStream(iBuilder);
506    StreamSetBuffer * const decompressionBitStream = this->generateBitStreamAIODecompression(iBuilder);
507
508    StreamSetBuffer * LineBreakStream;
509    StreamSetBuffer * Matches;
510    std::vector<re::RE*> res = {regex};
511    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
512
513    /*
514    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
515    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
516    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
517
518    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
519    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
520    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
521    */
522    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
523
524    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
525    mPxDriver.generatePipelineIR();
526
527    iBuilder->setKernel(matchCountK);
528    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
529    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
530    mPxDriver.deallocateBuffers();
531
532    iBuilder->CreateRet(matchedLineCount);
533
534    mPxDriver.finalizeObject();
535}
536
537void LZ4GrepGenerator::generateSwizzledAioPipeline(re::RE* regex) {
538    auto & iBuilder = mPxDriver.getBuilder();
539    this->generateCountOnlyMainFunc(iBuilder);
540
541    // GeneratePipeline
542    this->generateLoadByteStreamAndBitStream(iBuilder);
543
544    StreamSetBuffer * const decompressionBitStream = this->generateSwizzledAIODecompression(iBuilder);
545
546    StreamSetBuffer * LineBreakStream;
547    StreamSetBuffer * Matches;
548    std::vector<re::RE*> res = {regex};
549    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
550/*
551    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
552    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
553    mPxDriver.makeKernelCall(p2sK, {decompressionBitStream}, {decompressionByteStream});
554
555    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
556    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
557    mPxDriver.makeKernelCall(outK, {decompressionByteStream}, {});
558*/
559    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
560
561    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
562    mPxDriver.generatePipelineIR();
563
564    iBuilder->setKernel(matchCountK);
565    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
566    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
567    mPxDriver.deallocateBuffers();
568
569    iBuilder->CreateRet(matchedLineCount);
570
571    mPxDriver.finalizeObject();
572}
573
574void LZ4GrepGenerator::generateParallelAioPipeline(re::RE* regex, bool enableGather, bool enableScatter, int minParallelLevel) {
575    auto & iBuilder = mPxDriver.getBuilder();
576    this->generateCountOnlyMainFunc(iBuilder);
577
578    this->generateLoadByteStream(iBuilder);
579    parabix::StreamSetBuffer * decompressedByteStream = this->generateParallelAIODecompression(iBuilder, enableGather, enableScatter, minParallelLevel);
580
581
582    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
583    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
584    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
585
586
587    StreamSetBuffer * LineBreakStream;
588    StreamSetBuffer * Matches;
589    std::vector<re::RE*> res = {regex};
590    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
591
592
593//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
594//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
595//    mPxDriver.makeKernelCall(outK, {decompressedByteStream}, {});
596
597    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
598    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
599    mPxDriver.generatePipelineIR();
600
601    iBuilder->setKernel(matchCountK);
602    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
603    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
604
605    mPxDriver.deallocateBuffers();
606
607    iBuilder->CreateRet(matchedLineCount);
608
609    mPxDriver.finalizeObject();
610}
611
612
613
614void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
615    auto & iBuilder = mPxDriver.getBuilder();
616    this->generateCountOnlyMainFunc(iBuilder);
617
618    // GeneratePipeline
619    this->generateLoadByteStream(iBuilder);
620//    this->generateLoadByteStreamAndBitStream(iBuilder);
621
622    parabix::StreamSetBuffer * decompressedByteStream = this->generateAIODecompression(iBuilder);
623
624
625    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
626    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian, /*aligned = */ true, "a");
627    mPxDriver.makeKernelCall(s2pk, {decompressedByteStream}, {decompressionBitStream});
628
629
630    StreamSetBuffer * LineBreakStream;
631    StreamSetBuffer * Matches;
632    std::vector<re::RE*> res = {regex};
633    std::tie(LineBreakStream, Matches) = grepPipeline(res, decompressionBitStream);
634
635
636//    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
637//    outK->setInitialArguments({iBuilder->GetString("/Users/wxy325/developer/LZ4-sample-files/workspace/lz4d-normal/8k_.txt")});
638//    mPxDriver.makeKernelCall(outK, {decompressedStream}, {});
639
640    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
641    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
642    mPxDriver.generatePipelineIR();
643
644    iBuilder->setKernel(matchCountK);
645    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
646    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
647
648    mPxDriver.deallocateBuffers();
649
650    iBuilder->CreateRet(matchedLineCount);
651
652    mPxDriver.finalizeObject();
653
654}
655
656
657ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
658    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
659}
660CountOnlyGrepMainFunctionType LZ4GrepGenerator::getCountOnlyGrepMainFunction() {
661    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
662}
663
664void LZ4GrepGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
665    Module * M = iBuilder->getModule();
666    Type * const int64Ty = iBuilder->getInt64Ty();
667    Type * const sizeTy = iBuilder->getSizeTy();
668    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
669//    Type * const voidTy = iBuilder->getVoidTy();
670    Type * const inputType = iBuilder->getInt8PtrTy();
671
672    Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
673    main->setCallingConv(CallingConv::C);
674    Function::arg_iterator args = main->arg_begin();
675    mInputStream = &*(args++);
676    mInputStream->setName("input");
677
678    mHeaderSize = &*(args++);
679    mHeaderSize->setName("mHeaderSize");
680
681    mFileSize = &*(args++);
682    mFileSize->setName("mFileSize");
683
684    mHasBlockChecksum = &*(args++);
685    mHasBlockChecksum->setName("mHasBlockChecksum");
686    // TODO for now, we do not handle blockCheckSum
687    mHasBlockChecksum = iBuilder->getInt1(false);
688
689    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
690}
691
692void LZ4GrepGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
693    Module * M = iBuilder->getModule();
694    Type * const sizeTy = iBuilder->getSizeTy();
695    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
696    Type * const voidTy = iBuilder->getVoidTy();
697    Type * const inputType = iBuilder->getInt8PtrTy();
698    Type * const intAddrTy = iBuilder->getIntAddrTy();
699
700    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
701    main->setCallingConv(CallingConv::C);
702    Function::arg_iterator args = main->arg_begin();
703    mInputStream = &*(args++);
704    mInputStream->setName("input");
705
706    mHeaderSize = &*(args++);
707    mHeaderSize->setName("mHeaderSize");
708
709    mFileSize = &*(args++);
710    mFileSize->setName("mFileSize");
711
712    mHasBlockChecksum = &*(args++);
713    mHasBlockChecksum->setName("mHasBlockChecksum");
714
715    match_accumulator = &*(args++);
716    match_accumulator->setName("match_accumulator");
717
718    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
719}
Note: See TracBrowser for help on using the repository browser.