source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6066

Last change on this file since 6066 was 6066, checked in by xwa163, 12 months ago

fix some warning in lz4 related kernels

File size: 33.5 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_block_decoder.h>
24#include <kernels/lz4/lz4_index_builder.h>
25#include <kernels/lz4/lz4_index_builder_new.h>
26#include <kernels/lz4/lz4_bytestream_aio.h>
27#include <kernels/lz4/lz4_parallel_bytestream_aio.h>
28#include <kernels/lz4/lz4_swizzled_aio.h>
29#include <kernels/bitstream_pdep_kernel.h>
30#include <kernels/lz4/lz4_bitstream_not_kernel.h>
31
32namespace re { class CC; }
33
34using namespace llvm;
35using namespace parabix;
36using namespace kernel;
37
38LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
39    mCompressionMarker = NULL;
40}
41
42MainFunctionType LZ4Generator::getMainFunc() {
43    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
44}
45
46void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
47    auto & iBuilder = mPxDriver.getBuilder();
48    this->generateMainFunc(iBuilder);
49
50    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
51
52    // GeneratePipeline
53    this->generateLoadByteStreamAndBitStream(iBuilder);
54
55    //// Decode Block Information
56    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
57    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
58    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
59
60    //// Generate Helper Markers Extenders, FX, XF
61    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
62    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
63    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
64    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
65
66
67    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
68    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
69    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
70
71    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
72    //TODO handle uncompressed part
73    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
74    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
75    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
76
77    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
78    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
79    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
80
81    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderNewKernel>(iBuilder);
82    Lz4IndexBuilderK->setInitialArguments({mFileSize});
83    mPxDriver.makeKernelCall(
84            Lz4IndexBuilderK,
85            {
86                    mCompressedByteStream,
87                    Extenders,
88
89                    // Block Data
90                    BlockData_IsCompressed,
91                    BlockData_BlockStart,
92                    BlockData_BlockEnd
93            }, {
94                    //Uncompressed Data
95                    UncompressedStartPos,
96                    UncompressedLength,
97                    UncompressedOutputPos,
98
99                    mCompressionMarker,
100                    mM0Marker,
101                    mMatchOffsetMarker
102            });
103
104    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
105    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
106
107
108    // Deletion
109    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
110    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
111
112    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
113    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
114
115    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
116    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
117    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
118
119
120    StreamSetBuffer * const extractedBits = compressedBits;
121
122    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
123    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
124
125    // --------------------------------------------------------
126    // End
127    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
128
129    outK->setInitialArguments({iBuilder->GetString(outputFile)});
130    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
131
132    mPxDriver.generatePipelineIR();
133    mPxDriver.deallocateBuffers();
134
135    iBuilder->CreateRetVoid();
136
137    mPxDriver.finalizeObject();
138}
139
140void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
141    auto & iBuilder = mPxDriver.getBuilder();
142    this->generateMainFunc(iBuilder);
143
144    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
145
146    // GeneratePipeline
147    this->generateLoadByteStreamAndBitStream(iBuilder);
148    this->generateExtractAndDepositMarkers(iBuilder);
149    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
150
151    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
152    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
153
154    // --------------------------------------------------------
155    // End
156    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
157
158    outK->setInitialArguments({iBuilder->GetString(outputFile)});
159    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
160
161    mPxDriver.generatePipelineIR();
162    mPxDriver.deallocateBuffers();
163
164    iBuilder->CreateRetVoid();
165
166    mPxDriver.finalizeObject();
167}
168
169void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
170    auto & iBuilder = mPxDriver.getBuilder();
171    this->generateMainFunc(iBuilder);
172
173    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
174
175    // GeneratePipeline
176    this->generateLoadByteStreamAndBitStream(iBuilder);
177    this->generateExtractAndDepositMarkers(iBuilder);
178    auto swizzle = this->generateSwizzleExtractData(iBuilder);
179
180
181    // Produce unswizzled bit streams
182    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
183    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
184
185    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
186
187
188    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
189    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
190
191    // --------------------------------------------------------
192    // End
193    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
194
195    outK->setInitialArguments({iBuilder->GetString(outputFile)});
196    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
197
198    mPxDriver.generatePipelineIR();
199    mPxDriver.deallocateBuffers();
200
201    iBuilder->CreateRetVoid();
202
203    mPxDriver.finalizeObject();
204}
205
206void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
207    auto & iBuilder = mPxDriver.getBuilder();
208    this->generateMainFunc(iBuilder);
209
210    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
211
212    // GeneratePipeline
213    this->generateLoadByteStreamAndBitStream(iBuilder);
214    this->generateExtractAndDepositMarkers(iBuilder);
215    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
216
217    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
218    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
219    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
220
221    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
222    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
223
224    // --------------------------------------------------------
225    // End
226    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
227
228    outK->setInitialArguments({iBuilder->GetString(outputFile)});
229    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
230
231    mPxDriver.generatePipelineIR();
232    mPxDriver.deallocateBuffers();
233
234    iBuilder->CreateRetVoid();
235
236    mPxDriver.finalizeObject();
237}
238
239void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
240    auto & iBuilder = mPxDriver.getBuilder();
241    this->generateMainFunc(iBuilder);
242
243    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
244
245    // GeneratePipeline
246    this->generateLoadByteStreamAndBitStream(iBuilder);
247    this->generateExtractAndDepositMarkers(iBuilder);
248
249    auto swizzle = this->generateSwizzleExtractData(iBuilder);
250
251    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
252    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
253
254    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
255    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
256
257    // Produce unswizzled bit streams
258    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
259    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
260    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
261
262    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
263    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
264
265    // --------------------------------------------------------
266    // End
267    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
268    outK->setInitialArguments({iBuilder->GetString(outputFile)});
269    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
270
271    mPxDriver.generatePipelineIR();
272    mPxDriver.deallocateBuffers();
273
274    iBuilder->CreateRetVoid();
275
276    mPxDriver.finalizeObject();
277}
278
279void LZ4Generator::generatePipeline(const std::string &outputFile) {
280    auto & iBuilder = mPxDriver.getBuilder();
281    this->generateMainFunc(iBuilder);
282
283    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
284
285    // GeneratePipeline
286    this->generateLoadByteStreamAndBitStream(iBuilder);
287    this->generateExtractAndDepositMarkers(iBuilder);
288    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
289
290    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
291    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
292    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
293
294    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
295    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
296    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
297
298    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
299    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
300
301    // --------------------------------------------------------
302    // End
303    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
304
305    outK->setInitialArguments({iBuilder->GetString(outputFile)});
306    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
307
308    mPxDriver.generatePipelineIR();
309    mPxDriver.deallocateBuffers();
310
311    iBuilder->CreateRetVoid();
312
313    mPxDriver.finalizeObject();
314}
315
316void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
317    auto & iBuilder = mPxDriver.getBuilder();
318    this->generateMainFunc(iBuilder);
319
320    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
321
322    // GeneratePipeline
323    this->generateLoadByteStreamAndBitStream(iBuilder);
324    this->generateExtractAndDepositMarkers(iBuilder);
325
326    auto swizzle = this->generateSwizzleExtractData(iBuilder);
327
328    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
329    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
330
331    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
332    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
333
334    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
335    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
336
337    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
338    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
339
340
341    // Produce unswizzled bit streams
342    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
343    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
344    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
345
346
347    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
348    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
349
350    // --------------------------------------------------------
351    // End
352    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
353    outK->setInitialArguments({iBuilder->GetString(outputFile)});
354    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
355
356    mPxDriver.generatePipelineIR();
357    mPxDriver.deallocateBuffers();
358
359    iBuilder->CreateRetVoid();
360
361    mPxDriver.finalizeObject();
362}
363
364void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
365    Module * M = iBuilder->getModule();
366    Type * const sizeTy = iBuilder->getSizeTy();
367    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
368    Type * const voidTy = iBuilder->getVoidTy();
369    Type * const inputType = iBuilder->getInt8PtrTy();
370
371    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
372    main->setCallingConv(CallingConv::C);
373    Function::arg_iterator args = main->arg_begin();
374    mInputStream = &*(args++);
375    mInputStream->setName("input");
376
377    mHeaderSize = &*(args++);
378    mHeaderSize->setName("mHeaderSize");
379
380    mFileSize = &*(args++);
381    mFileSize->setName("mFileSize");
382
383    mHasBlockChecksum = &*(args++);
384    mHasBlockChecksum->setName("mHasBlockChecksum");
385    // TODO for now, we do not handle blockCheckSum
386    mHasBlockChecksum = iBuilder->getInt1(false);
387
388    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
389}
390
391void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
392    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
393    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
394    sourceK->setInitialArguments({mInputStream, mFileSize});
395    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
396}
397void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
398    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
399    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
400
401    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
402    sourceK->setInitialArguments({mInputStream, mFileSize});
403    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
404    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
405    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
406}
407
408StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
409    //// Decode Block Information
410    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
411    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
412    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
413
414    //// Generate Helper Markers Extenders, FX, XF
415    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
416    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
417    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
418    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
419
420
421    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
422    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
423    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
424
425
426    // Produce unswizzled bit streams
427    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
428    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
429    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
430    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
431
432
433
434    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
435    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
436
437
438    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
439    lz4AioK->setInitialArguments({mFileSize});
440    mPxDriver.makeKernelCall(
441            lz4AioK,
442            {
443                    mCompressedByteStream,
444                    Extenders,
445
446                    // Block Data
447                    BlockData_IsCompressed,
448                    BlockData_BlockStart,
449                    BlockData_BlockEnd,
450
451                    u16Swizzle0,
452                    u16Swizzle1
453            }, {
454                    decompressedSwizzled0,
455                    decompressedSwizzled1
456            });
457
458
459    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
460
461    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
462    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
463
464    return decompressionBitStream;
465}
466
467parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
468    //// Decode Block Information
469    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
470    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
471    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
472
473    //// Generate Helper Markers Extenders
474    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
475    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
476    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
477    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
478
479    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
480    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
481    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
482
483
484    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
485
486    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder);
487    lz4AioK->setInitialArguments({mFileSize});
488    mPxDriver.makeKernelCall(
489            lz4AioK,
490            {
491                    mCompressedByteStream,
492                    Extenders,
493
494                    // Block Data
495                    BlockData_IsCompressed,
496                    BlockData_BlockStart,
497                    BlockData_BlockEnd
498            }, {
499                    decompressionByteStream
500            });
501
502    return decompressionByteStream;
503
504}
505
506StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
507    //// Decode Block Information
508    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
509    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
510    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
511
512
513    //// Generate Helper Markers Extenders
514    /*
515    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
516    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
517    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
518    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
519    */
520
521
522    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
523    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
524    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
525
526
527    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
528
529    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
530    lz4AioK->setInitialArguments({mFileSize});
531    mPxDriver.makeKernelCall(
532            lz4AioK,
533            {
534                    mCompressedByteStream,
535//                    Extenders,
536
537                    // Block Data
538                    BlockData_IsCompressed,
539                    BlockData_BlockStart,
540                    BlockData_BlockEnd
541            }, {
542                    decompressionByteStream
543            });
544
545    return decompressionByteStream;
546}
547
548void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
549    //// Decode Block Information
550    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
551    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
552    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
553
554    //// Generate Helper Markers Extenders, FX, XF
555    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
556    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
557    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
558    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
559
560
561    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
562    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
563    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
564
565    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
566
567    //TODO handle uncompressed part
568    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
569    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
570    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
571
572    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
573    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
574    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
575
576    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
577    Lz4IndexBuilderK->setInitialArguments({mFileSize});
578    mPxDriver.makeKernelCall(
579            Lz4IndexBuilderK,
580            {
581                    mCompressedByteStream,
582                    Extenders,
583
584                    // Block Data
585                    BlockData_IsCompressed,
586                    BlockData_BlockStart,
587                    BlockData_BlockEnd
588            }, {
589                    //Uncompressed Data
590                    UncompressedStartPos,
591                    UncompressedLength,
592                    UncompressedOutputPos,
593
594                    mDeletionMarker,
595                    mM0Marker,
596                    mMatchOffsetMarker
597            });
598
599    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
600    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
601
602}
603
604std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
605    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
606    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
607
608    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
609    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
610    return std::make_pair(u16Swizzle0, u16Swizzle1);
611}
612
613void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
614    if (!mCompressionMarker) {
615        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
616        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
617        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
618    }
619}
620
621parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
622    this->generateCompressionMarker(iBuilder);
623
624    // Deletion
625    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
626    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
627
628    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
629    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
630
631    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
632    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
633    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
634
635    return compressedBits;
636}
637
638int LZ4Generator::get4MbBufferBlocks() {
639    return 4 * 1024 * 1024 / codegen::BlockSize;
640}
641
642int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
643    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
644}
645int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
646    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
647}
648
649
650
651
652// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.