source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6119

Last change on this file since 6119 was 6119, checked in by xwa163, 9 months ago
  1. Add some BasisSetNumbering? option to fix bug of multiplexing
  2. Use BiigEndian? BitNumbering? for lz4 and lzparabix related pipeline
  3. Support multiplexing in LZ4BitStreamAio pipeline
File size: 36.0 KB
RevLine 
[5864]1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
[6026]20#include <kernels/swizzled_multiple_pdep_kernel.h>
[5906]21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
[6039]22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
[5948]23#include <kernels/lz4/lz4_block_decoder.h>
[5921]24#include <kernels/lz4/lz4_index_builder.h>
[6111]25#include <kernels/lz4/aio/lz4_bytestream_aio.h>
26#include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h>
27#include <kernels/lz4/aio/lz4_swizzled_aio.h>
[6118]28#include <kernels/lz4/aio/lz4_bitstream_aio.h>
[6029]29#include <kernels/bitstream_pdep_kernel.h>
[6039]30#include <kernels/lz4/lz4_bitstream_not_kernel.h>
[5864]31
32namespace re { class CC; }
33
34using namespace llvm;
35using namespace parabix;
36using namespace kernel;
37
[6111]38LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
[6044]39    mCompressionMarker = NULL;
[5864]40}
41
42MainFunctionType LZ4Generator::getMainFunc() {
[6026]43    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
[5864]44}
45
[6059]46void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
47    auto & iBuilder = mPxDriver.getBuilder();
48    this->generateMainFunc(iBuilder);
49
[6066]50    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6059]51
52    // GeneratePipeline
53    this->generateLoadByteStreamAndBitStream(iBuilder);
54
55    //// Decode Block Information
[6066]56    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
57    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
58    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]59
60    //// Generate Helper Markers Extenders, FX, XF
[6066]61    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
62    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6059]63    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
64    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
65
66
[6111]67    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
[6059]68    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
69    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
70
71    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
72    //TODO handle uncompressed part
[6066]73    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
74    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
75    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]76
[6066]77    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
78    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
79    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
[6059]80
[6111]81    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
[6059]82    Lz4IndexBuilderK->setInitialArguments({mFileSize});
83    mPxDriver.makeKernelCall(
84            Lz4IndexBuilderK,
85            {
86                    mCompressedByteStream,
87                    Extenders,
88
89                    // Block Data
90                    BlockData_IsCompressed,
91                    BlockData_BlockStart,
92                    BlockData_BlockEnd
93            }, {
94                    //Uncompressed Data
95                    UncompressedStartPos,
96                    UncompressedLength,
97                    UncompressedOutputPos,
98
99                    mCompressionMarker,
100                    mM0Marker,
101                    mMatchOffsetMarker
102            });
103
104    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
105    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
106
107
108    // Deletion
[6066]109    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
110    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
[6059]111
112    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
113    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
114
[6066]115    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6059]116    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
117    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
118
119
120    StreamSetBuffer * const extractedBits = compressedBits;
121
122    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
123    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
124
125    // --------------------------------------------------------
126    // End
127    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
128
129    outK->setInitialArguments({iBuilder->GetString(outputFile)});
130    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
131
132    mPxDriver.generatePipelineIR();
133    mPxDriver.deallocateBuffers();
134
135    iBuilder->CreateRetVoid();
136
137    mPxDriver.finalizeObject();
138}
139
[6039]140void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
[6026]141    auto & iBuilder = mPxDriver.getBuilder();
[5864]142    this->generateMainFunc(iBuilder);
143
[6066]144    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[5864]145
146    // GeneratePipeline
147    this->generateLoadByteStreamAndBitStream(iBuilder);
[6039]148    this->generateExtractAndDepositMarkers(iBuilder);
149    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5864]150
[6039]151    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
152    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
[5948]153
[6039]154    // --------------------------------------------------------
155    // End
156    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]157
[6039]158    outK->setInitialArguments({iBuilder->GetString(outputFile)});
159    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5948]160
[6039]161    mPxDriver.generatePipelineIR();
162    mPxDriver.deallocateBuffers();
163
164    iBuilder->CreateRetVoid();
165
166    mPxDriver.finalizeObject();
167}
168
169void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
170    auto & iBuilder = mPxDriver.getBuilder();
171    this->generateMainFunc(iBuilder);
172
[6066]173    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6039]174
175    // GeneratePipeline
176    this->generateLoadByteStreamAndBitStream(iBuilder);
177    this->generateExtractAndDepositMarkers(iBuilder);
[5864]178    auto swizzle = this->generateSwizzleExtractData(iBuilder);
179
180
181    // Produce unswizzled bit streams
[6066]182    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6026]183    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[5864]184
[6026]185    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
[5864]186
187
[6026]188    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
189    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
[5864]190
191    // --------------------------------------------------------
192    // End
[6026]193    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]194
195    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]196    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5864]197
[6026]198    mPxDriver.generatePipelineIR();
199    mPxDriver.deallocateBuffers();
[5864]200
201    iBuilder->CreateRetVoid();
202
[6026]203    mPxDriver.finalizeObject();
[5864]204}
205
[5874]206void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
[6026]207    auto & iBuilder = mPxDriver.getBuilder();
[5874]208    this->generateMainFunc(iBuilder);
209
[6066]210    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[5874]211
212    // GeneratePipeline
213    this->generateLoadByteStreamAndBitStream(iBuilder);
214    this->generateExtractAndDepositMarkers(iBuilder);
[6039]215    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5874]216
[6066]217    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
[6029]218    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
219    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
220
[6039]221    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
222    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
[6029]223
[6039]224    // --------------------------------------------------------
225    // End
226    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
227
228    outK->setInitialArguments({iBuilder->GetString(outputFile)});
229    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
230
231    mPxDriver.generatePipelineIR();
232    mPxDriver.deallocateBuffers();
233
234    iBuilder->CreateRetVoid();
235
236    mPxDriver.finalizeObject();
237}
238
239void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
240    auto & iBuilder = mPxDriver.getBuilder();
241    this->generateMainFunc(iBuilder);
242
[6066]243    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6039]244
245    // GeneratePipeline
246    this->generateLoadByteStreamAndBitStream(iBuilder);
247    this->generateExtractAndDepositMarkers(iBuilder);
248
249    auto swizzle = this->generateSwizzleExtractData(iBuilder);
250
[6066]251    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
252    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[5874]253
[6026]254    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
255    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
[5874]256
257    // Produce unswizzled bit streams
[6066]258    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6026]259    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[6029]260    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
[5874]261
[6026]262    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
[6029]263    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
[5874]264
265    // --------------------------------------------------------
266    // End
[6026]267    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5874]268    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]269    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5874]270
[6026]271    mPxDriver.generatePipelineIR();
272    mPxDriver.deallocateBuffers();
[5874]273
274    iBuilder->CreateRetVoid();
275
[6026]276    mPxDriver.finalizeObject();
[5874]277}
278
[6039]279void LZ4Generator::generatePipeline(const std::string &outputFile) {
[6026]280    auto & iBuilder = mPxDriver.getBuilder();
[5864]281    this->generateMainFunc(iBuilder);
282
[6066]283    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[5864]284
285    // GeneratePipeline
286    this->generateLoadByteStreamAndBitStream(iBuilder);
287    this->generateExtractAndDepositMarkers(iBuilder);
[6039]288    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5864]289
[6066]290    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
[6039]291    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
292    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
293
[6066]294    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6039]295    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
296    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
297
298    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
299    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
300
301    // --------------------------------------------------------
302    // End
303    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
304
305    outK->setInitialArguments({iBuilder->GetString(outputFile)});
306    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
307
308    mPxDriver.generatePipelineIR();
309    mPxDriver.deallocateBuffers();
310
311    iBuilder->CreateRetVoid();
312
313    mPxDriver.finalizeObject();
314}
315
316void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
317    auto & iBuilder = mPxDriver.getBuilder();
318    this->generateMainFunc(iBuilder);
319
[6066]320    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6039]321
322    // GeneratePipeline
323    this->generateLoadByteStreamAndBitStream(iBuilder);
324    this->generateExtractAndDepositMarkers(iBuilder);
325
[5864]326    auto swizzle = this->generateSwizzleExtractData(iBuilder);
327
[6066]328    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
329    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
[5864]330
[6026]331    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
332    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
[5874]333
[6066]334    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
335    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
[5906]336
[6026]337    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
338    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
[5906]339
340
[5864]341    // Produce unswizzled bit streams
[6066]342    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6026]343    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[6039]344    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
[5864]345
346
[6026]347    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
[6039]348    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
[5864]349
350    // --------------------------------------------------------
351    // End
[6026]352    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]353    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]354    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5864]355
[6026]356    mPxDriver.generatePipelineIR();
357    mPxDriver.deallocateBuffers();
[5864]358
359    iBuilder->CreateRetVoid();
360
[6026]361    mPxDriver.finalizeObject();
[5864]362}
363
364void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
365    Module * M = iBuilder->getModule();
366    Type * const sizeTy = iBuilder->getSizeTy();
367    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
368    Type * const voidTy = iBuilder->getVoidTy();
369    Type * const inputType = iBuilder->getInt8PtrTy();
370
371    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
372    main->setCallingConv(CallingConv::C);
373    Function::arg_iterator args = main->arg_begin();
[6026]374    mInputStream = &*(args++);
375    mInputStream->setName("input");
[5864]376
[6026]377    mHeaderSize = &*(args++);
378    mHeaderSize->setName("mHeaderSize");
[5864]379
[6026]380    mFileSize = &*(args++);
381    mFileSize->setName("mFileSize");
[5864]382
[6026]383    mHasBlockChecksum = &*(args++);
384    mHasBlockChecksum->setName("mHasBlockChecksum");
[6020]385    // TODO for now, we do not handle blockCheckSum
[6026]386    mHasBlockChecksum = iBuilder->getInt1(false);
[5864]387
388    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
389}
390
[6064]391void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
392    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
393    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
394    sourceK->setInitialArguments({mInputStream, mFileSize});
395    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
396}
[5864]397void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6047]398    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
[6066]399    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
[5864]400
[6026]401    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
402    sourceK->setInitialArguments({mInputStream, mFileSize});
403    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
[6119]404    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
[6026]405    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
[5864]406}
407
[6118]408StreamSetBuffer * LZ4Generator::generateBitStreamAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6119]409    return this->convertCompressedBitsStreamWithBitStreamAioApproach({mCompressedBasisBits}, "combined")[0];
410}
411
412std::vector<StreamSetBuffer*> LZ4Generator::convertCompressedBitsStreamWithBitStreamAioApproach(
413        std::vector<StreamSetBuffer*> compressedBitStreams, std::string prefix) {
414    auto mGrepDriver = &mPxDriver;
415    auto & iBuilder = mGrepDriver->getBuilder();
416
417    //// Decode Block Information
[6118]418    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
419    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
420    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
421
422
423    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
424    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
425    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
426
427
[6119]428    std::vector<StreamSetBuffer *> inputStreams = {
429            mCompressedByteStream,
[6118]430
[6119]431            // Block Data
432            BlockData_IsCompressed,
433            BlockData_BlockStart,
434            BlockData_BlockEnd
435    };
[6118]436
[6119]437    std::vector<StreamSetBuffer *> outputStream;
438    std::vector<unsigned> numbersOfStreams;
[6118]439
[6119]440    for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
441        unsigned numOfStreams = compressedBitStreams[i]->getNumOfStreams();
442        numbersOfStreams.push_back(numOfStreams);
443        inputStreams.push_back(compressedBitStreams[i]);
444        outputStream.push_back(mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams, 1), this->getInputBufferBlocks(iBuilder)));
445    }
[6118]446
[6119]447    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamAioKernel>(iBuilder, numbersOfStreams);
448    lz4AioK->setInitialArguments({mFileSize});
449    mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
450
451    return outputStream;
[6118]452}
453
[6119]454
[6059]455StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[5864]456    //// Decode Block Information
[6066]457    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
458    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
459    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[5864]460
461    //// Generate Helper Markers Extenders, FX, XF
[6111]462//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
463//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
464//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
465//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
[5864]466
[6059]467
[6111]468    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
[6059]469    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
470    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
471
472
473    // Produce unswizzled bit streams
[6066]474    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
475    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[6059]476    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
477    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
478
479
480
[6066]481    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
482    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[6059]483
484
485    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
486    lz4AioK->setInitialArguments({mFileSize});
487    mPxDriver.makeKernelCall(
488            lz4AioK,
489            {
490                    mCompressedByteStream,
491
[6111]492//                    Extenders,
493
[6059]494                    // Block Data
495                    BlockData_IsCompressed,
496                    BlockData_BlockStart,
497                    BlockData_BlockEnd,
498
499                    u16Swizzle0,
500                    u16Swizzle1
501            }, {
502                    decompressedSwizzled0,
503                    decompressedSwizzled1
504            });
505
506
[6066]507    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
[6059]508
509    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
510    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
511
512    return decompressionBitStream;
513}
514
[6111]515parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
[6064]516    //// Decode Block Information
[6066]517    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
518    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
519    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]520
[6064]521    //// Generate Helper Markers Extenders
[6070]522//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
523//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
524//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
525//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
[6059]526
[6111]527    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
[6064]528    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
529    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
530
531
[6066]532    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
[6064]533
[6111]534    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
[6064]535    lz4AioK->setInitialArguments({mFileSize});
536    mPxDriver.makeKernelCall(
537            lz4AioK,
538            {
539                    mCompressedByteStream,
540
[6111]541//                    Extenders,
542
[6064]543                    // Block Data
544                    BlockData_IsCompressed,
545                    BlockData_BlockStart,
546                    BlockData_BlockEnd
547            }, {
548                    decompressionByteStream
549            });
550
551    return decompressionByteStream;
552
553}
554
[6059]555StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
556    //// Decode Block Information
[6066]557    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
558    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
559    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]560
[6064]561
562    //// Generate Helper Markers Extenders
[6111]563//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
564//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
565//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
566//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
[5864]567
568
[6111]569    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
[6026]570    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
571    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
[5864]572
[5974]573
[6066]574    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
[5974]575
[6059]576    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
577    lz4AioK->setInitialArguments({mFileSize});
578    mPxDriver.makeKernelCall(
579            lz4AioK,
580            {
581                    mCompressedByteStream,
[6064]582//                    Extenders,
[5974]583
[6059]584                    // Block Data
585                    BlockData_IsCompressed,
586                    BlockData_BlockStart,
587                    BlockData_BlockEnd
588            }, {
589                    decompressionByteStream
590            });
591
592    return decompressionByteStream;
593}
594
595void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
596    //// Decode Block Information
[6066]597    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
598    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
599    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]600
601    //// Generate Helper Markers Extenders, FX, XF
[6066]602    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
603    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6059]604    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
605    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
606
607
[6111]608    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
[6059]609    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
610    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
611
[5864]612    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
613
614    //TODO handle uncompressed part
[6066]615    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
616    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
617    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[5864]618
[6066]619    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
620    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
621    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
[5864]622
[6026]623    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
624    Lz4IndexBuilderK->setInitialArguments({mFileSize});
625    mPxDriver.makeKernelCall(
[5948]626            Lz4IndexBuilderK,
[5864]627            {
[6026]628                    mCompressedByteStream,
[5864]629                    Extenders,
630
631                    // Block Data
632                    BlockData_IsCompressed,
633                    BlockData_BlockStart,
634                    BlockData_BlockEnd
635            }, {
636                    //Uncompressed Data
637                    UncompressedStartPos,
638                    UncompressedLength,
639                    UncompressedOutputPos,
640
[6026]641                    mDeletionMarker,
642                    mM0Marker,
643                    mMatchOffsetMarker
[5864]644            });
645
[6026]646    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
647    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
[5864]648
649}
650
651std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6066]652    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
653    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[5864]654
[6026]655    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
656    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
[5864]657    return std::make_pair(u16Swizzle0, u16Swizzle1);
658}
659
[6043]660void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
661    if (!mCompressionMarker) {
[6066]662        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6043]663        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
664        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
665    }
666}
667
[6039]668parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6043]669    this->generateCompressionMarker(iBuilder);
[6039]670
671    // Deletion
[6066]672    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
673    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
[6039]674
675    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
[6043]676    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
[6039]677
[6066]678    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6039]679    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
680    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
681
682    return compressedBits;
683}
684
[5948]685int LZ4Generator::get4MbBufferBlocks() {
[6111]686    return mLz4BlockSize / codegen::BlockSize;
[5948]687}
688
[6066]689int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
690    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
[5864]691}
[6066]692int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
693    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
[5864]694}
695
696
697
[5921]698
[5864]699// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.