source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6135

Last change on this file since 6135 was 6135, checked in by xwa163, 8 months ago
  1. Implement twist_kernel and untwist_kernel by PEXT and PDEP
  2. Use twist form for multiplexing lz4 grep
File size: 38.7 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_block_decoder.h>
24#include <kernels/lz4/lz4_index_builder.h>
25#include <kernels/lz4/aio/lz4_bytestream_aio.h>
26#include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h>
27#include <kernels/lz4/aio/lz4_swizzled_aio.h>
28#include <kernels/lz4/aio/lz4_bitstream_aio.h>
29#include <kernels/lz4/aio/lz4_twist_aio.h>
30#include <kernels/bitstream_pdep_kernel.h>
31#include <kernels/lz4/lz4_bitstream_not_kernel.h>
32#include <kernels/lz4/aio/twist_kernel.h>
33#include <kernels/lz4/aio/untwist_kernel.h>
34
35namespace re { class CC; }
36
37using namespace llvm;
38using namespace parabix;
39using namespace kernel;
40
41LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
42    mCompressionMarker = NULL;
43}
44
45MainFunctionType LZ4Generator::getMainFunc() {
46    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
47}
48
49void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
50    auto & iBuilder = mPxDriver.getBuilder();
51    this->generateMainFunc(iBuilder);
52
53    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
54
55    // GeneratePipeline
56    this->generateLoadByteStreamAndBitStream(iBuilder);
57
58    //// Decode Block Information
59    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
60    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
61    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
62
63    //// Generate Helper Markers Extenders, FX, XF
64    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
65    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
66    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
67    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
68
69
70    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
71    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
72    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
73
74    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
75    //TODO handle uncompressed part
76    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
77    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
78    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
79
80    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
81    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
82    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
83
84    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
85    Lz4IndexBuilderK->setInitialArguments({mFileSize});
86    mPxDriver.makeKernelCall(
87            Lz4IndexBuilderK,
88            {
89                    mCompressedByteStream,
90                    Extenders,
91
92                    // Block Data
93                    BlockData_IsCompressed,
94                    BlockData_BlockStart,
95                    BlockData_BlockEnd
96            }, {
97                    //Uncompressed Data
98                    UncompressedStartPos,
99                    UncompressedLength,
100                    UncompressedOutputPos,
101
102                    mCompressionMarker,
103                    mM0Marker,
104                    mMatchOffsetMarker
105            });
106
107    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
108    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
109
110
111    // Deletion
112    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
113    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
114
115    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
116    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
117
118    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
119    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
120    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
121
122
123    StreamSetBuffer * const extractedBits = compressedBits;
124
125    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
126    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
127
128    // --------------------------------------------------------
129    // End
130    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
131
132    outK->setInitialArguments({iBuilder->GetString(outputFile)});
133    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
134
135    mPxDriver.generatePipelineIR();
136    mPxDriver.deallocateBuffers();
137
138    iBuilder->CreateRetVoid();
139
140    mPxDriver.finalizeObject();
141}
142
143void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
144    auto & iBuilder = mPxDriver.getBuilder();
145    this->generateMainFunc(iBuilder);
146
147    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
148
149    // GeneratePipeline
150    this->generateLoadByteStreamAndBitStream(iBuilder);
151    this->generateExtractAndDepositMarkers(iBuilder);
152    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
153
154    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
155    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
156
157    // --------------------------------------------------------
158    // End
159    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
160
161    outK->setInitialArguments({iBuilder->GetString(outputFile)});
162    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
163
164    mPxDriver.generatePipelineIR();
165    mPxDriver.deallocateBuffers();
166
167    iBuilder->CreateRetVoid();
168
169    mPxDriver.finalizeObject();
170}
171
172void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
173    auto & iBuilder = mPxDriver.getBuilder();
174    this->generateMainFunc(iBuilder);
175
176    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
177
178    // GeneratePipeline
179    this->generateLoadByteStreamAndBitStream(iBuilder);
180    this->generateExtractAndDepositMarkers(iBuilder);
181    auto swizzle = this->generateSwizzleExtractData(iBuilder);
182
183
184    // Produce unswizzled bit streams
185    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
186    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
187
188    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
189
190
191    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
192    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
193
194    // --------------------------------------------------------
195    // End
196    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
197
198    outK->setInitialArguments({iBuilder->GetString(outputFile)});
199    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
200
201    mPxDriver.generatePipelineIR();
202    mPxDriver.deallocateBuffers();
203
204    iBuilder->CreateRetVoid();
205
206    mPxDriver.finalizeObject();
207}
208
209void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
210    auto & iBuilder = mPxDriver.getBuilder();
211    this->generateMainFunc(iBuilder);
212
213    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
214
215    // GeneratePipeline
216    this->generateLoadByteStreamAndBitStream(iBuilder);
217    this->generateExtractAndDepositMarkers(iBuilder);
218    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
219
220    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
221    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
222    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
223
224    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
225    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
226
227    // --------------------------------------------------------
228    // End
229    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
230
231    outK->setInitialArguments({iBuilder->GetString(outputFile)});
232    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
233
234    mPxDriver.generatePipelineIR();
235    mPxDriver.deallocateBuffers();
236
237    iBuilder->CreateRetVoid();
238
239    mPxDriver.finalizeObject();
240}
241
242void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
243    auto & iBuilder = mPxDriver.getBuilder();
244    this->generateMainFunc(iBuilder);
245
246    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
247
248    // GeneratePipeline
249    this->generateLoadByteStreamAndBitStream(iBuilder);
250    this->generateExtractAndDepositMarkers(iBuilder);
251
252    auto swizzle = this->generateSwizzleExtractData(iBuilder);
253
254    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
255    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
256
257    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
258    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
259
260    // Produce unswizzled bit streams
261    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
262    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
263    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
264
265    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
266    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
267
268    // --------------------------------------------------------
269    // End
270    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
271    outK->setInitialArguments({iBuilder->GetString(outputFile)});
272    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
273
274    mPxDriver.generatePipelineIR();
275    mPxDriver.deallocateBuffers();
276
277    iBuilder->CreateRetVoid();
278
279    mPxDriver.finalizeObject();
280}
281
282void LZ4Generator::generatePipeline(const std::string &outputFile) {
283    auto & iBuilder = mPxDriver.getBuilder();
284    this->generateMainFunc(iBuilder);
285
286    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
287
288    // GeneratePipeline
289    this->generateLoadByteStreamAndBitStream(iBuilder);
290    this->generateExtractAndDepositMarkers(iBuilder);
291    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
292
293    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
294    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
295    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
296
297    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
298    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
299    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
300
301    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
302    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
303
304    // --------------------------------------------------------
305    // End
306    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
307
308    outK->setInitialArguments({iBuilder->GetString(outputFile)});
309    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
310
311    mPxDriver.generatePipelineIR();
312    mPxDriver.deallocateBuffers();
313
314    iBuilder->CreateRetVoid();
315
316    mPxDriver.finalizeObject();
317}
318
319void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
320    auto & iBuilder = mPxDriver.getBuilder();
321    this->generateMainFunc(iBuilder);
322
323    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
324
325    // GeneratePipeline
326    this->generateLoadByteStreamAndBitStream(iBuilder);
327    this->generateExtractAndDepositMarkers(iBuilder);
328
329    auto swizzle = this->generateSwizzleExtractData(iBuilder);
330
331    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
332    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
333
334    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
335    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
336
337    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
338    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
339
340    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
341    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
342
343
344    // Produce unswizzled bit streams
345    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
346    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
347    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
348
349
350    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
351    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
352
353    // --------------------------------------------------------
354    // End
355    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
356    outK->setInitialArguments({iBuilder->GetString(outputFile)});
357    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
358
359    mPxDriver.generatePipelineIR();
360    mPxDriver.deallocateBuffers();
361
362    iBuilder->CreateRetVoid();
363
364    mPxDriver.finalizeObject();
365}
366
367void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
368    Module * M = iBuilder->getModule();
369    Type * const sizeTy = iBuilder->getSizeTy();
370    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
371    Type * const voidTy = iBuilder->getVoidTy();
372    Type * const inputType = iBuilder->getInt8PtrTy();
373
374    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
375    main->setCallingConv(CallingConv::C);
376    Function::arg_iterator args = main->arg_begin();
377    mInputStream = &*(args++);
378    mInputStream->setName("input");
379
380    mHeaderSize = &*(args++);
381    mHeaderSize->setName("mHeaderSize");
382
383    mFileSize = &*(args++);
384    mFileSize->setName("mFileSize");
385
386    mHasBlockChecksum = &*(args++);
387    mHasBlockChecksum->setName("mHasBlockChecksum");
388    // TODO for now, we do not handle blockCheckSum
389    mHasBlockChecksum = iBuilder->getInt1(false);
390
391    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
392}
393
394void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
395    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
396    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
397    sourceK->setInitialArguments({mInputStream, mFileSize});
398    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
399}
400void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
401    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
402    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
403
404    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
405    sourceK->setInitialArguments({mInputStream, mFileSize});
406    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
407    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
408    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
409}
410
411StreamSetBuffer * LZ4Generator::generateBitStreamAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
412    return this->convertCompressedBitsStreamWithBitStreamAioApproach({mCompressedBasisBits}, "combined")[0];
413}
414
415std::vector<StreamSetBuffer*> LZ4Generator::convertCompressedBitsStreamWithBitStreamAioApproach(
416        std::vector<StreamSetBuffer*> compressedBitStreams, std::string prefix) {
417    auto mGrepDriver = &mPxDriver;
418    auto & iBuilder = mGrepDriver->getBuilder();
419
420    //// Decode Block Information
421    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
422    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
423    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
424
425
426    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
427    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
428    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
429
430
431
432    size_t numOfStreams = compressedBitStreams[0]->getNumOfStreams();
433
434    // 1, 2, 4, 8
435
436    if (numOfStreams <= 2) {
437        StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder));
438        kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::TwistByPDEPKernel>(iBuilder, numOfStreams, 2);
439        mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses});
440
441        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder));
442        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistAioKernel>(iBuilder, 2);
443        lz4I4AioK->setInitialArguments({mFileSize});
444        mGrepDriver->makeKernelCall(lz4I4AioK, {
445                mCompressedByteStream,
446
447                // Block Data
448                BlockData_IsCompressed,
449                BlockData_BlockStart,
450                BlockData_BlockEnd,
451
452                twistedCharClasses
453        }, {
454                                            uncompressedTwistedCharClasses
455                                    });
456
457        StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams), this->getInputBufferBlocks(iBuilder));
458        kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::UntwistByPEXTKernel>(iBuilder, numOfStreams, 2);
459        mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
460        return {untwistedCharClasses};
461    }
462
463    if (numOfStreams <= 4) {
464        StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
465        kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::TwistByPDEPKernel>(iBuilder, numOfStreams, 4);
466        mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses});
467
468
469        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
470
471        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistAioKernel>(iBuilder, 4);
472        lz4I4AioK->setInitialArguments({mFileSize});
473        mGrepDriver->makeKernelCall(lz4I4AioK, {
474                mCompressedByteStream,
475
476                // Block Data
477                BlockData_IsCompressed,
478                BlockData_BlockStart,
479                BlockData_BlockEnd,
480
481                twistedCharClasses
482        }, {
483                                            uncompressedTwistedCharClasses
484                                    });
485
486        StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams), this->getInputBufferBlocks(iBuilder));
487        kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::UntwistByPEXTKernel>(iBuilder, numOfStreams, 4);
488        mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
489        return {untwistedCharClasses};
490    }
491
492
493
494
495    std::vector<StreamSetBuffer *> inputStreams = {
496            mCompressedByteStream,
497
498            // Block Data
499            BlockData_IsCompressed,
500            BlockData_BlockStart,
501            BlockData_BlockEnd
502    };
503
504    std::vector<StreamSetBuffer *> outputStream;
505    std::vector<unsigned> numbersOfStreams;
506
507    for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
508        unsigned numOfStreams = compressedBitStreams[i]->getNumOfStreams();
509        numbersOfStreams.push_back(numOfStreams);
510        inputStreams.push_back(compressedBitStreams[i]);
511        outputStream.push_back(mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams, 1), this->getInputBufferBlocks(iBuilder)));
512    }
513
514    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamAioKernel>(iBuilder, numbersOfStreams);
515    lz4AioK->setInitialArguments({mFileSize});
516    mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
517
518    return outputStream;
519}
520
521
522StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
523    //// Decode Block Information
524    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
525    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
526    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
527
528    //// Generate Helper Markers Extenders, FX, XF
529//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
530//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
531//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
532//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
533
534
535    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
536    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
537    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
538
539
540    // Produce unswizzled bit streams
541    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
542    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
543    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
544    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
545
546
547
548    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
549    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
550
551
552    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
553    lz4AioK->setInitialArguments({mFileSize});
554    mPxDriver.makeKernelCall(
555            lz4AioK,
556            {
557                    mCompressedByteStream,
558
559//                    Extenders,
560
561                    // Block Data
562                    BlockData_IsCompressed,
563                    BlockData_BlockStart,
564                    BlockData_BlockEnd,
565
566                    u16Swizzle0,
567                    u16Swizzle1
568            }, {
569                    decompressedSwizzled0,
570                    decompressedSwizzled1
571            });
572
573
574    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
575
576    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
577    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
578
579    return decompressionBitStream;
580}
581
582parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
583    //// Decode Block Information
584    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
585    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
586    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
587
588    //// Generate Helper Markers Extenders
589//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
590//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
591//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
592//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
593
594    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
595    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
596    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
597
598
599    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
600
601    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
602    lz4AioK->setInitialArguments({mFileSize});
603    mPxDriver.makeKernelCall(
604            lz4AioK,
605            {
606                    mCompressedByteStream,
607
608//                    Extenders,
609
610                    // Block Data
611                    BlockData_IsCompressed,
612                    BlockData_BlockStart,
613                    BlockData_BlockEnd
614            }, {
615                    decompressionByteStream
616            });
617
618    return decompressionByteStream;
619
620}
621
622StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
623    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
624
625    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
626    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
627    lz4AioK->setInitialArguments({mFileSize});
628    mPxDriver.makeKernelCall(
629            lz4AioK,
630            {
631                    mCompressedByteStream,
632
633                    // Block Data
634                    blockInfo.isCompress,
635                    blockInfo.blockStart,
636                    blockInfo.blockEnd
637            }, {
638                    decompressionByteStream
639            });
640
641    return decompressionByteStream;
642}
643
644void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
645    //// Decode Block Information
646    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
647    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
648    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
649
650    //// Generate Helper Markers Extenders, FX, XF
651    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
652    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
653    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
654    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
655
656
657    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
658    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
659    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
660
661    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
662
663    //TODO handle uncompressed part
664    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
665    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
666    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
667
668    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
669    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
670    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
671
672    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
673    Lz4IndexBuilderK->setInitialArguments({mFileSize});
674    mPxDriver.makeKernelCall(
675            Lz4IndexBuilderK,
676            {
677                    mCompressedByteStream,
678                    Extenders,
679
680                    // Block Data
681                    BlockData_IsCompressed,
682                    BlockData_BlockStart,
683                    BlockData_BlockEnd
684            }, {
685                    //Uncompressed Data
686                    UncompressedStartPos,
687                    UncompressedLength,
688                    UncompressedOutputPos,
689
690                    mDeletionMarker,
691                    mM0Marker,
692                    mMatchOffsetMarker
693            });
694
695    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
696    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
697
698}
699
700std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
701    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
702    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
703
704    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
705    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
706    return std::make_pair(u16Swizzle0, u16Swizzle1);
707}
708
709void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
710    if (!mCompressionMarker) {
711        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
712        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
713        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
714    }
715}
716
717parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
718    this->generateCompressionMarker(iBuilder);
719
720    // Deletion
721    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
722    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
723
724    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
725    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
726
727    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
728    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
729    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
730
731    return compressedBits;
732}
733
734int LZ4Generator::get4MbBufferBlocks() {
735    return mLz4BlockSize / codegen::BlockSize;
736}
737
738int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
739    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
740}
741int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
742    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
743}
744
745LZ4BlockInfo LZ4Generator::getBlockInfo(const std::unique_ptr<kernel::KernelBuilder> & b) {
746    LZ4BlockInfo blockInfo;
747    blockInfo.isCompress = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getInputBufferBlocks(b), 1);
748    blockInfo.blockStart = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
749    blockInfo.blockEnd = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
750
751    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(b);
752    blockDecoderK->setInitialArguments({b->CreateTrunc(mHasBlockChecksum, b->getInt1Ty()), mHeaderSize, mFileSize});
753    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {blockInfo.isCompress, blockInfo.blockStart, blockInfo.blockEnd});
754
755    return blockInfo;
756}
757
758
759// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.