source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6119

Last change on this file since 6119 was 6119, checked in by xwa163, 8 months ago
  1. Add some BasisSetNumbering? option to fix bug of multiplexing
  2. Use BiigEndian? BitNumbering? for lz4 and lzparabix related pipeline
  3. Support multiplexing in LZ4BitStreamAio pipeline
File size: 36.0 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
23#include <kernels/lz4/lz4_block_decoder.h>
24#include <kernels/lz4/lz4_index_builder.h>
25#include <kernels/lz4/aio/lz4_bytestream_aio.h>
26#include <kernels/lz4/aio/lz4_parallel_bytestream_aio.h>
27#include <kernels/lz4/aio/lz4_swizzled_aio.h>
28#include <kernels/lz4/aio/lz4_bitstream_aio.h>
29#include <kernels/bitstream_pdep_kernel.h>
30#include <kernels/lz4/lz4_bitstream_not_kernel.h>
31
32namespace re { class CC; }
33
34using namespace llvm;
35using namespace parabix;
36using namespace kernel;
37
38LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
39    mCompressionMarker = NULL;
40}
41
42MainFunctionType LZ4Generator::getMainFunc() {
43    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
44}
45
46void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
47    auto & iBuilder = mPxDriver.getBuilder();
48    this->generateMainFunc(iBuilder);
49
50    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
51
52    // GeneratePipeline
53    this->generateLoadByteStreamAndBitStream(iBuilder);
54
55    //// Decode Block Information
56    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
57    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
58    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
59
60    //// Generate Helper Markers Extenders, FX, XF
61    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
62    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
63    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
64    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
65
66
67    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
68    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
69    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
70
71    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
72    //TODO handle uncompressed part
73    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
74    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
75    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
76
77    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
78    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
79    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
80
81    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
82    Lz4IndexBuilderK->setInitialArguments({mFileSize});
83    mPxDriver.makeKernelCall(
84            Lz4IndexBuilderK,
85            {
86                    mCompressedByteStream,
87                    Extenders,
88
89                    // Block Data
90                    BlockData_IsCompressed,
91                    BlockData_BlockStart,
92                    BlockData_BlockEnd
93            }, {
94                    //Uncompressed Data
95                    UncompressedStartPos,
96                    UncompressedLength,
97                    UncompressedOutputPos,
98
99                    mCompressionMarker,
100                    mM0Marker,
101                    mMatchOffsetMarker
102            });
103
104    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
105    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
106
107
108    // Deletion
109    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
110    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
111
112    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
113    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
114
115    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
116    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
117    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
118
119
120    StreamSetBuffer * const extractedBits = compressedBits;
121
122    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
123    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
124
125    // --------------------------------------------------------
126    // End
127    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
128
129    outK->setInitialArguments({iBuilder->GetString(outputFile)});
130    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
131
132    mPxDriver.generatePipelineIR();
133    mPxDriver.deallocateBuffers();
134
135    iBuilder->CreateRetVoid();
136
137    mPxDriver.finalizeObject();
138}
139
140void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
141    auto & iBuilder = mPxDriver.getBuilder();
142    this->generateMainFunc(iBuilder);
143
144    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
145
146    // GeneratePipeline
147    this->generateLoadByteStreamAndBitStream(iBuilder);
148    this->generateExtractAndDepositMarkers(iBuilder);
149    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
150
151    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
152    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
153
154    // --------------------------------------------------------
155    // End
156    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
157
158    outK->setInitialArguments({iBuilder->GetString(outputFile)});
159    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
160
161    mPxDriver.generatePipelineIR();
162    mPxDriver.deallocateBuffers();
163
164    iBuilder->CreateRetVoid();
165
166    mPxDriver.finalizeObject();
167}
168
169void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
170    auto & iBuilder = mPxDriver.getBuilder();
171    this->generateMainFunc(iBuilder);
172
173    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
174
175    // GeneratePipeline
176    this->generateLoadByteStreamAndBitStream(iBuilder);
177    this->generateExtractAndDepositMarkers(iBuilder);
178    auto swizzle = this->generateSwizzleExtractData(iBuilder);
179
180
181    // Produce unswizzled bit streams
182    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
183    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
184
185    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
186
187
188    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
189    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
190
191    // --------------------------------------------------------
192    // End
193    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
194
195    outK->setInitialArguments({iBuilder->GetString(outputFile)});
196    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
197
198    mPxDriver.generatePipelineIR();
199    mPxDriver.deallocateBuffers();
200
201    iBuilder->CreateRetVoid();
202
203    mPxDriver.finalizeObject();
204}
205
206void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
207    auto & iBuilder = mPxDriver.getBuilder();
208    this->generateMainFunc(iBuilder);
209
210    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
211
212    // GeneratePipeline
213    this->generateLoadByteStreamAndBitStream(iBuilder);
214    this->generateExtractAndDepositMarkers(iBuilder);
215    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
216
217    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
218    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
219    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
220
221    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
222    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
223
224    // --------------------------------------------------------
225    // End
226    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
227
228    outK->setInitialArguments({iBuilder->GetString(outputFile)});
229    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
230
231    mPxDriver.generatePipelineIR();
232    mPxDriver.deallocateBuffers();
233
234    iBuilder->CreateRetVoid();
235
236    mPxDriver.finalizeObject();
237}
238
239void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
240    auto & iBuilder = mPxDriver.getBuilder();
241    this->generateMainFunc(iBuilder);
242
243    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
244
245    // GeneratePipeline
246    this->generateLoadByteStreamAndBitStream(iBuilder);
247    this->generateExtractAndDepositMarkers(iBuilder);
248
249    auto swizzle = this->generateSwizzleExtractData(iBuilder);
250
251    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
252    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
253
254    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
255    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
256
257    // Produce unswizzled bit streams
258    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
259    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
260    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
261
262    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
263    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
264
265    // --------------------------------------------------------
266    // End
267    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
268    outK->setInitialArguments({iBuilder->GetString(outputFile)});
269    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
270
271    mPxDriver.generatePipelineIR();
272    mPxDriver.deallocateBuffers();
273
274    iBuilder->CreateRetVoid();
275
276    mPxDriver.finalizeObject();
277}
278
279void LZ4Generator::generatePipeline(const std::string &outputFile) {
280    auto & iBuilder = mPxDriver.getBuilder();
281    this->generateMainFunc(iBuilder);
282
283    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
284
285    // GeneratePipeline
286    this->generateLoadByteStreamAndBitStream(iBuilder);
287    this->generateExtractAndDepositMarkers(iBuilder);
288    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
289
290    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
291    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
292    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
293
294    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
295    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
296    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
297
298    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
299    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
300
301    // --------------------------------------------------------
302    // End
303    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
304
305    outK->setInitialArguments({iBuilder->GetString(outputFile)});
306    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
307
308    mPxDriver.generatePipelineIR();
309    mPxDriver.deallocateBuffers();
310
311    iBuilder->CreateRetVoid();
312
313    mPxDriver.finalizeObject();
314}
315
316void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
317    auto & iBuilder = mPxDriver.getBuilder();
318    this->generateMainFunc(iBuilder);
319
320    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
321
322    // GeneratePipeline
323    this->generateLoadByteStreamAndBitStream(iBuilder);
324    this->generateExtractAndDepositMarkers(iBuilder);
325
326    auto swizzle = this->generateSwizzleExtractData(iBuilder);
327
328    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
329    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
330
331    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
332    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
333
334    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
335    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
336
337    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
338    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
339
340
341    // Produce unswizzled bit streams
342    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
343    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
344    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
345
346
347    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
348    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
349
350    // --------------------------------------------------------
351    // End
352    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
353    outK->setInitialArguments({iBuilder->GetString(outputFile)});
354    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
355
356    mPxDriver.generatePipelineIR();
357    mPxDriver.deallocateBuffers();
358
359    iBuilder->CreateRetVoid();
360
361    mPxDriver.finalizeObject();
362}
363
364void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
365    Module * M = iBuilder->getModule();
366    Type * const sizeTy = iBuilder->getSizeTy();
367    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
368    Type * const voidTy = iBuilder->getVoidTy();
369    Type * const inputType = iBuilder->getInt8PtrTy();
370
371    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
372    main->setCallingConv(CallingConv::C);
373    Function::arg_iterator args = main->arg_begin();
374    mInputStream = &*(args++);
375    mInputStream->setName("input");
376
377    mHeaderSize = &*(args++);
378    mHeaderSize->setName("mHeaderSize");
379
380    mFileSize = &*(args++);
381    mFileSize->setName("mFileSize");
382
383    mHasBlockChecksum = &*(args++);
384    mHasBlockChecksum->setName("mHasBlockChecksum");
385    // TODO for now, we do not handle blockCheckSum
386    mHasBlockChecksum = iBuilder->getInt1(false);
387
388    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
389}
390
391void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
392    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
393    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
394    sourceK->setInitialArguments({mInputStream, mFileSize});
395    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
396}
397void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
398    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
399    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
400
401    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
402    sourceK->setInitialArguments({mInputStream, mFileSize});
403    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
404    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
405    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
406}
407
408StreamSetBuffer * LZ4Generator::generateBitStreamAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
409    return this->convertCompressedBitsStreamWithBitStreamAioApproach({mCompressedBasisBits}, "combined")[0];
410}
411
412std::vector<StreamSetBuffer*> LZ4Generator::convertCompressedBitsStreamWithBitStreamAioApproach(
413        std::vector<StreamSetBuffer*> compressedBitStreams, std::string prefix) {
414    auto mGrepDriver = &mPxDriver;
415    auto & iBuilder = mGrepDriver->getBuilder();
416
417    //// Decode Block Information
418    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
419    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
420    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
421
422
423    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
424    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
425    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
426
427
428    std::vector<StreamSetBuffer *> inputStreams = {
429            mCompressedByteStream,
430
431            // Block Data
432            BlockData_IsCompressed,
433            BlockData_BlockStart,
434            BlockData_BlockEnd
435    };
436
437    std::vector<StreamSetBuffer *> outputStream;
438    std::vector<unsigned> numbersOfStreams;
439
440    for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
441        unsigned numOfStreams = compressedBitStreams[i]->getNumOfStreams();
442        numbersOfStreams.push_back(numOfStreams);
443        inputStreams.push_back(compressedBitStreams[i]);
444        outputStream.push_back(mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams, 1), this->getInputBufferBlocks(iBuilder)));
445    }
446
447    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamAioKernel>(iBuilder, numbersOfStreams);
448    lz4AioK->setInitialArguments({mFileSize});
449    mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
450
451    return outputStream;
452}
453
454
455StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
456    //// Decode Block Information
457    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
458    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
459    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
460
461    //// Generate Helper Markers Extenders, FX, XF
462//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
463//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
464//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
465//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
466
467
468    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
469    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
470    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
471
472
473    // Produce unswizzled bit streams
474    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
475    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
476    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
477    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
478
479
480
481    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
482    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
483
484
485    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
486    lz4AioK->setInitialArguments({mFileSize});
487    mPxDriver.makeKernelCall(
488            lz4AioK,
489            {
490                    mCompressedByteStream,
491
492//                    Extenders,
493
494                    // Block Data
495                    BlockData_IsCompressed,
496                    BlockData_BlockStart,
497                    BlockData_BlockEnd,
498
499                    u16Swizzle0,
500                    u16Swizzle1
501            }, {
502                    decompressedSwizzled0,
503                    decompressedSwizzled1
504            });
505
506
507    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
508
509    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
510    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
511
512    return decompressionBitStream;
513}
514
515parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
516    //// Decode Block Information
517    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
518    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
519    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
520
521    //// Generate Helper Markers Extenders
522//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
523//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
524//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
525//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
526
527    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
528    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
529    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
530
531
532    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
533
534    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
535    lz4AioK->setInitialArguments({mFileSize});
536    mPxDriver.makeKernelCall(
537            lz4AioK,
538            {
539                    mCompressedByteStream,
540
541//                    Extenders,
542
543                    // Block Data
544                    BlockData_IsCompressed,
545                    BlockData_BlockStart,
546                    BlockData_BlockEnd
547            }, {
548                    decompressionByteStream
549            });
550
551    return decompressionByteStream;
552
553}
554
555StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
556    //// Decode Block Information
557    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
558    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
559    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
560
561
562    //// Generate Helper Markers Extenders
563//    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
564//    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
565//    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
566//    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
567
568
569    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
570    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
571    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
572
573
574    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
575
576    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
577    lz4AioK->setInitialArguments({mFileSize});
578    mPxDriver.makeKernelCall(
579            lz4AioK,
580            {
581                    mCompressedByteStream,
582//                    Extenders,
583
584                    // Block Data
585                    BlockData_IsCompressed,
586                    BlockData_BlockStart,
587                    BlockData_BlockEnd
588            }, {
589                    decompressionByteStream
590            });
591
592    return decompressionByteStream;
593}
594
595void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
596    //// Decode Block Information
597    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
598    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
599    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
600
601    //// Generate Helper Markers Extenders, FX, XF
602    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
603    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
604    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
605    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
606
607
608    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
609    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
610    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
611
612    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
613
614    //TODO handle uncompressed part
615    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
616    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
617    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
618
619    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
620    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
621    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
622
623    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
624    Lz4IndexBuilderK->setInitialArguments({mFileSize});
625    mPxDriver.makeKernelCall(
626            Lz4IndexBuilderK,
627            {
628                    mCompressedByteStream,
629                    Extenders,
630
631                    // Block Data
632                    BlockData_IsCompressed,
633                    BlockData_BlockStart,
634                    BlockData_BlockEnd
635            }, {
636                    //Uncompressed Data
637                    UncompressedStartPos,
638                    UncompressedLength,
639                    UncompressedOutputPos,
640
641                    mDeletionMarker,
642                    mM0Marker,
643                    mMatchOffsetMarker
644            });
645
646    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
647    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
648
649}
650
651std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
652    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
653    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
654
655    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
656    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
657    return std::make_pair(u16Swizzle0, u16Swizzle1);
658}
659
660void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
661    if (!mCompressionMarker) {
662        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
663        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
664        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
665    }
666}
667
668parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
669    this->generateCompressionMarker(iBuilder);
670
671    // Deletion
672    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
673    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
674
675    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
676    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
677
678    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
679    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
680    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
681
682    return compressedBits;
683}
684
685int LZ4Generator::get4MbBufferBlocks() {
686    return mLz4BlockSize / codegen::BlockSize;
687}
688
689int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
690    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
691}
692int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
693    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
694}
695
696
697
698
699// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.