source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6132

Last change on this file since 6132 was 6132, checked in by xwa163, 7 months ago
  1. More experiment on lz4 grep
  2. Improve performance of lzparabix grep
File size: 37.0 KB
RevLine 
[5864]1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
[6026]20#include <kernels/swizzled_multiple_pdep_kernel.h>
[5906]21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
[6039]22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
[5948]23#include <kernels/lz4/lz4_block_decoder.h>
[5921]24#include <kernels/lz4/lz4_index_builder.h>
[6111]25#include <kernels/lz4/aio/lz4_bytestream_aio.h>
26#include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h>
27#include <kernels/lz4/aio/lz4_swizzled_aio.h>
[6118]28#include <kernels/lz4/aio/lz4_bitstream_aio.h>
[6132]29#include <kernels/lz4/aio/lz4_i4_bytestream_aio.h>
[6029]30#include <kernels/bitstream_pdep_kernel.h>
[6039]31#include <kernels/lz4/lz4_bitstream_not_kernel.h>
[5864]32
33namespace re { class CC; }
34
35using namespace llvm;
36using namespace parabix;
37using namespace kernel;
38
[6111]39LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
[6044]40    mCompressionMarker = NULL;
[5864]41}
42
43MainFunctionType LZ4Generator::getMainFunc() {
[6026]44    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
[5864]45}
46
[6059]47void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
48    auto & iBuilder = mPxDriver.getBuilder();
49    this->generateMainFunc(iBuilder);
50
[6066]51    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6059]52
53    // GeneratePipeline
54    this->generateLoadByteStreamAndBitStream(iBuilder);
55
56    //// Decode Block Information
[6066]57    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
58    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
59    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]60
61    //// Generate Helper Markers Extenders, FX, XF
[6066]62    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
63    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6059]64    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
65    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
66
67
[6111]68    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
[6059]69    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
70    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
71
72    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
73    //TODO handle uncompressed part
[6066]74    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
75    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
76    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]77
[6066]78    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
79    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
80    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
[6059]81
[6111]82    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
[6059]83    Lz4IndexBuilderK->setInitialArguments({mFileSize});
84    mPxDriver.makeKernelCall(
85            Lz4IndexBuilderK,
86            {
87                    mCompressedByteStream,
88                    Extenders,
89
90                    // Block Data
91                    BlockData_IsCompressed,
92                    BlockData_BlockStart,
93                    BlockData_BlockEnd
94            }, {
95                    //Uncompressed Data
96                    UncompressedStartPos,
97                    UncompressedLength,
98                    UncompressedOutputPos,
99
100                    mCompressionMarker,
101                    mM0Marker,
102                    mMatchOffsetMarker
103            });
104
105    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
106    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
107
108
109    // Deletion
[6066]110    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
111    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
[6059]112
113    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
114    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
115
[6066]116    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6059]117    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
118    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
119
120
121    StreamSetBuffer * const extractedBits = compressedBits;
122
123    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
124    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
125
126    // --------------------------------------------------------
127    // End
128    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
129
130    outK->setInitialArguments({iBuilder->GetString(outputFile)});
131    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
132
133    mPxDriver.generatePipelineIR();
134    mPxDriver.deallocateBuffers();
135
136    iBuilder->CreateRetVoid();
137
138    mPxDriver.finalizeObject();
139}
140
[6039]141void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
[6026]142    auto & iBuilder = mPxDriver.getBuilder();
[5864]143    this->generateMainFunc(iBuilder);
144
[6066]145    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[5864]146
147    // GeneratePipeline
148    this->generateLoadByteStreamAndBitStream(iBuilder);
[6039]149    this->generateExtractAndDepositMarkers(iBuilder);
150    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5864]151
[6039]152    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
153    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
[5948]154
[6039]155    // --------------------------------------------------------
156    // End
157    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]158
[6039]159    outK->setInitialArguments({iBuilder->GetString(outputFile)});
160    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5948]161
[6039]162    mPxDriver.generatePipelineIR();
163    mPxDriver.deallocateBuffers();
164
165    iBuilder->CreateRetVoid();
166
167    mPxDriver.finalizeObject();
168}
169
170void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
171    auto & iBuilder = mPxDriver.getBuilder();
172    this->generateMainFunc(iBuilder);
173
[6066]174    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6039]175
176    // GeneratePipeline
177    this->generateLoadByteStreamAndBitStream(iBuilder);
178    this->generateExtractAndDepositMarkers(iBuilder);
[5864]179    auto swizzle = this->generateSwizzleExtractData(iBuilder);
180
181
182    // Produce unswizzled bit streams
[6066]183    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6026]184    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[5864]185
[6026]186    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
[5864]187
188
[6026]189    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
190    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
[5864]191
192    // --------------------------------------------------------
193    // End
[6026]194    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]195
196    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]197    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5864]198
[6026]199    mPxDriver.generatePipelineIR();
200    mPxDriver.deallocateBuffers();
[5864]201
202    iBuilder->CreateRetVoid();
203
[6026]204    mPxDriver.finalizeObject();
[5864]205}
206
[5874]207void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
[6026]208    auto & iBuilder = mPxDriver.getBuilder();
[5874]209    this->generateMainFunc(iBuilder);
210
[6066]211    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[5874]212
213    // GeneratePipeline
214    this->generateLoadByteStreamAndBitStream(iBuilder);
215    this->generateExtractAndDepositMarkers(iBuilder);
[6039]216    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5874]217
[6066]218    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
[6029]219    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
220    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
221
[6039]222    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
223    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
[6029]224
[6039]225    // --------------------------------------------------------
226    // End
227    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
228
229    outK->setInitialArguments({iBuilder->GetString(outputFile)});
230    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
231
232    mPxDriver.generatePipelineIR();
233    mPxDriver.deallocateBuffers();
234
235    iBuilder->CreateRetVoid();
236
237    mPxDriver.finalizeObject();
238}
239
240void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
241    auto & iBuilder = mPxDriver.getBuilder();
242    this->generateMainFunc(iBuilder);
243
[6066]244    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6039]245
246    // GeneratePipeline
247    this->generateLoadByteStreamAndBitStream(iBuilder);
248    this->generateExtractAndDepositMarkers(iBuilder);
249
250    auto swizzle = this->generateSwizzleExtractData(iBuilder);
251
[6066]252    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
253    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[5874]254
[6026]255    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
256    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
[5874]257
258    // Produce unswizzled bit streams
[6066]259    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6026]260    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[6029]261    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
[5874]262
[6026]263    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
[6029]264    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
[5874]265
266    // --------------------------------------------------------
267    // End
[6026]268    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5874]269    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]270    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5874]271
[6026]272    mPxDriver.generatePipelineIR();
273    mPxDriver.deallocateBuffers();
[5874]274
275    iBuilder->CreateRetVoid();
276
[6026]277    mPxDriver.finalizeObject();
[5874]278}
279
[6039]280void LZ4Generator::generatePipeline(const std::string &outputFile) {
[6026]281    auto & iBuilder = mPxDriver.getBuilder();
[5864]282    this->generateMainFunc(iBuilder);
283
[6066]284    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[5864]285
286    // GeneratePipeline
287    this->generateLoadByteStreamAndBitStream(iBuilder);
288    this->generateExtractAndDepositMarkers(iBuilder);
[6039]289    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5864]290
[6066]291    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
[6039]292    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
293    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
294
[6066]295    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6039]296    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
297    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
298
299    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
300    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
301
302    // --------------------------------------------------------
303    // End
304    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
305
306    outK->setInitialArguments({iBuilder->GetString(outputFile)});
307    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
308
309    mPxDriver.generatePipelineIR();
310    mPxDriver.deallocateBuffers();
311
312    iBuilder->CreateRetVoid();
313
314    mPxDriver.finalizeObject();
315}
316
317void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
318    auto & iBuilder = mPxDriver.getBuilder();
319    this->generateMainFunc(iBuilder);
320
[6066]321    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6039]322
323    // GeneratePipeline
324    this->generateLoadByteStreamAndBitStream(iBuilder);
325    this->generateExtractAndDepositMarkers(iBuilder);
326
[5864]327    auto swizzle = this->generateSwizzleExtractData(iBuilder);
328
[6066]329    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
330    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
[5864]331
[6026]332    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
333    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
[5874]334
[6066]335    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
336    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
[5906]337
[6026]338    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
339    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
[5906]340
341
[5864]342    // Produce unswizzled bit streams
[6066]343    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6026]344    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[6039]345    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
[5864]346
347
[6026]348    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
[6039]349    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
[5864]350
351    // --------------------------------------------------------
352    // End
[6026]353    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]354    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]355    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5864]356
[6026]357    mPxDriver.generatePipelineIR();
358    mPxDriver.deallocateBuffers();
[5864]359
360    iBuilder->CreateRetVoid();
361
[6026]362    mPxDriver.finalizeObject();
[5864]363}
364
365void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
366    Module * M = iBuilder->getModule();
367    Type * const sizeTy = iBuilder->getSizeTy();
368    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
369    Type * const voidTy = iBuilder->getVoidTy();
370    Type * const inputType = iBuilder->getInt8PtrTy();
371
372    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
373    main->setCallingConv(CallingConv::C);
374    Function::arg_iterator args = main->arg_begin();
[6026]375    mInputStream = &*(args++);
376    mInputStream->setName("input");
[5864]377
[6026]378    mHeaderSize = &*(args++);
379    mHeaderSize->setName("mHeaderSize");
[5864]380
[6026]381    mFileSize = &*(args++);
382    mFileSize->setName("mFileSize");
[5864]383
[6026]384    mHasBlockChecksum = &*(args++);
385    mHasBlockChecksum->setName("mHasBlockChecksum");
[6020]386    // TODO for now, we do not handle blockCheckSum
[6026]387    mHasBlockChecksum = iBuilder->getInt1(false);
[5864]388
389    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
390}
391
[6064]392void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
393    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
394    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
395    sourceK->setInitialArguments({mInputStream, mFileSize});
396    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
397}
[5864]398void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6047]399    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
[6066]400    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
[5864]401
[6026]402    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
403    sourceK->setInitialArguments({mInputStream, mFileSize});
404    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
[6119]405    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
[6026]406    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
[5864]407}
408
[6118]409StreamSetBuffer * LZ4Generator::generateBitStreamAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6119]410    return this->convertCompressedBitsStreamWithBitStreamAioApproach({mCompressedBasisBits}, "combined")[0];
411}
412
413std::vector<StreamSetBuffer*> LZ4Generator::convertCompressedBitsStreamWithBitStreamAioApproach(
414        std::vector<StreamSetBuffer*> compressedBitStreams, std::string prefix) {
415    auto mGrepDriver = &mPxDriver;
416    auto & iBuilder = mGrepDriver->getBuilder();
417
418    //// Decode Block Information
[6118]419    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
420    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
421    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
422
423
424    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
425    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
426    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
427
428
[6132]429
430    if (compressedBitStreams[0]->getNumOfStreams() == 4) {
431        StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
432        kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::P2S4StreamByPDEP>(iBuilder);
433        mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses});
434
435
436        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
437        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4I4ByteStreamAioKernel>(iBuilder);
438        lz4I4AioK->setInitialArguments({mFileSize});
439        mGrepDriver->makeKernelCall(lz4I4AioK, {
440                mCompressedByteStream,
441
442                // Block Data
443                BlockData_IsCompressed,
444                BlockData_BlockStart,
445                BlockData_BlockEnd,
446
447                twistedCharClasses
448        }, {
449                                            uncompressedTwistedCharClasses
450                                    });
451
452        StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder));
453        kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::S2P4StreamByPEXTKernel>(iBuilder);
454        mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
455        return {untwistedCharClasses};
456    }
457
458
459
460
[6119]461    std::vector<StreamSetBuffer *> inputStreams = {
462            mCompressedByteStream,
[6118]463
[6119]464            // Block Data
465            BlockData_IsCompressed,
466            BlockData_BlockStart,
467            BlockData_BlockEnd
468    };
[6118]469
[6119]470    std::vector<StreamSetBuffer *> outputStream;
471    std::vector<unsigned> numbersOfStreams;
[6118]472
[6119]473    for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
474        unsigned numOfStreams = compressedBitStreams[i]->getNumOfStreams();
475        numbersOfStreams.push_back(numOfStreams);
476        inputStreams.push_back(compressedBitStreams[i]);
477        outputStream.push_back(mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams, 1), this->getInputBufferBlocks(iBuilder)));
478    }
[6118]479
[6119]480    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamAioKernel>(iBuilder, numbersOfStreams);
481    lz4AioK->setInitialArguments({mFileSize});
482    mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
483
484    return outputStream;
[6118]485}
486
[6119]487
[6059]488StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[5864]489    //// Decode Block Information
[6066]490    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
491    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
492    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[5864]493
494    //// Generate Helper Markers Extenders, FX, XF
[6111]495//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
496//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
497//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
498//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
[5864]499
[6059]500
[6111]501    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
[6059]502    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
503    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
504
505
506    // Produce unswizzled bit streams
[6066]507    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
508    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[6059]509    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
510    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
511
512
513
[6066]514    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
515    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[6059]516
517
518    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
519    lz4AioK->setInitialArguments({mFileSize});
520    mPxDriver.makeKernelCall(
521            lz4AioK,
522            {
523                    mCompressedByteStream,
524
[6111]525//                    Extenders,
526
[6059]527                    // Block Data
528                    BlockData_IsCompressed,
529                    BlockData_BlockStart,
530                    BlockData_BlockEnd,
531
532                    u16Swizzle0,
533                    u16Swizzle1
534            }, {
535                    decompressedSwizzled0,
536                    decompressedSwizzled1
537            });
538
539
[6066]540    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
[6059]541
542    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
543    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
544
545    return decompressionBitStream;
546}
547
[6111]548parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
[6064]549    //// Decode Block Information
[6066]550    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
551    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
552    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]553
[6064]554    //// Generate Helper Markers Extenders
[6070]555//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
556//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
557//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
558//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
[6059]559
[6111]560    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
[6064]561    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
562    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
563
564
[6066]565    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
[6064]566
[6111]567    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
[6064]568    lz4AioK->setInitialArguments({mFileSize});
569    mPxDriver.makeKernelCall(
570            lz4AioK,
571            {
572                    mCompressedByteStream,
573
[6111]574//                    Extenders,
575
[6064]576                    // Block Data
577                    BlockData_IsCompressed,
578                    BlockData_BlockStart,
579                    BlockData_BlockEnd
580            }, {
581                    decompressionByteStream
582            });
583
584    return decompressionByteStream;
585
586}
587
[6059]588StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6132]589    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
[6059]590
[6066]591    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
[6059]592    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
593    lz4AioK->setInitialArguments({mFileSize});
594    mPxDriver.makeKernelCall(
595            lz4AioK,
596            {
597                    mCompressedByteStream,
[5974]598
[6059]599                    // Block Data
[6132]600                    blockInfo.isCompress,
601                    blockInfo.blockStart,
602                    blockInfo.blockEnd
[6059]603            }, {
604                    decompressionByteStream
605            });
606
607    return decompressionByteStream;
608}
609
610void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
611    //// Decode Block Information
[6066]612    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
613    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
614    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]615
616    //// Generate Helper Markers Extenders, FX, XF
[6066]617    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
618    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6059]619    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
620    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
621
622
[6111]623    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
[6059]624    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
625    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
626
[5864]627    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
628
629    //TODO handle uncompressed part
[6066]630    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
631    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
632    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[5864]633
[6066]634    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
635    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
636    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
[5864]637
[6026]638    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
639    Lz4IndexBuilderK->setInitialArguments({mFileSize});
640    mPxDriver.makeKernelCall(
[5948]641            Lz4IndexBuilderK,
[5864]642            {
[6026]643                    mCompressedByteStream,
[5864]644                    Extenders,
645
646                    // Block Data
647                    BlockData_IsCompressed,
648                    BlockData_BlockStart,
649                    BlockData_BlockEnd
650            }, {
651                    //Uncompressed Data
652                    UncompressedStartPos,
653                    UncompressedLength,
654                    UncompressedOutputPos,
655
[6026]656                    mDeletionMarker,
657                    mM0Marker,
658                    mMatchOffsetMarker
[5864]659            });
660
[6026]661    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
662    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
[5864]663
664}
665
666std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6066]667    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
668    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[5864]669
[6026]670    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
671    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
[5864]672    return std::make_pair(u16Swizzle0, u16Swizzle1);
673}
674
[6043]675void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
676    if (!mCompressionMarker) {
[6066]677        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6043]678        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
679        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
680    }
681}
682
[6039]683parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6043]684    this->generateCompressionMarker(iBuilder);
[6039]685
686    // Deletion
[6066]687    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
688    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
[6039]689
690    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
[6043]691    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
[6039]692
[6066]693    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6039]694    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
695    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
696
697    return compressedBits;
698}
699
[5948]700int LZ4Generator::get4MbBufferBlocks() {
[6111]701    return mLz4BlockSize / codegen::BlockSize;
[5948]702}
703
[6066]704int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
705    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
[5864]706}
[6066]707int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
708    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
[5864]709}
710
[6132]711LZ4BlockInfo LZ4Generator::getBlockInfo(const std::unique_ptr<kernel::KernelBuilder> & b) {
712    LZ4BlockInfo blockInfo;
713    blockInfo.isCompress = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getInputBufferBlocks(b), 1);
714    blockInfo.blockStart = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
715    blockInfo.blockEnd = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
[5864]716
[6132]717    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(b);
718    blockDecoderK->setInitialArguments({b->CreateTrunc(mHasBlockChecksum, b->getInt1Ty()), mHeaderSize, mFileSize});
719    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {blockInfo.isCompress, blockInfo.blockStart, blockInfo.blockEnd});
[5864]720
[6132]721    return blockInfo;
722}
[5921]723
[6132]724
[5864]725// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.