source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6066

Last change on this file since 6066 was 6066, checked in by xwa163, 12 months ago

fix some warning in lz4 related kernels

File size: 33.5 KB
RevLine 
[5864]1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
[6026]20#include <kernels/swizzled_multiple_pdep_kernel.h>
[5906]21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
[6039]22#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
[5948]23#include <kernels/lz4/lz4_block_decoder.h>
[5921]24#include <kernels/lz4/lz4_index_builder.h>
[6059]25#include <kernels/lz4/lz4_index_builder_new.h>
26#include <kernels/lz4/lz4_bytestream_aio.h>
[6064]27#include <kernels/lz4/lz4_parallel_bytestream_aio.h>
[6059]28#include <kernels/lz4/lz4_swizzled_aio.h>
[6029]29#include <kernels/bitstream_pdep_kernel.h>
[6039]30#include <kernels/lz4/lz4_bitstream_not_kernel.h>
[5864]31
32namespace re { class CC; }
33
34using namespace llvm;
35using namespace parabix;
36using namespace kernel;
37
[6026]38LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
[6044]39    mCompressionMarker = NULL;
[5864]40}
41
42MainFunctionType LZ4Generator::getMainFunc() {
[6026]43    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
[5864]44}
45
[6059]46void LZ4Generator::generateNewExtractOnlyPipeline(const std::string &outputFile) {
47    auto & iBuilder = mPxDriver.getBuilder();
48    this->generateMainFunc(iBuilder);
49
[6066]50    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6059]51
52    // GeneratePipeline
53    this->generateLoadByteStreamAndBitStream(iBuilder);
54
55    //// Decode Block Information
[6066]56    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
57    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
58    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]59
60    //// Generate Helper Markers Extenders, FX, XF
[6066]61    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
62    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6059]63    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
64    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
65
66
67    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
68    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
69    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
70
71    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
72    //TODO handle uncompressed part
[6066]73    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
74    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
75    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]76
[6066]77    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
78    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
79    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
[6059]80
81    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderNewKernel>(iBuilder);
82    Lz4IndexBuilderK->setInitialArguments({mFileSize});
83    mPxDriver.makeKernelCall(
84            Lz4IndexBuilderK,
85            {
86                    mCompressedByteStream,
87                    Extenders,
88
89                    // Block Data
90                    BlockData_IsCompressed,
91                    BlockData_BlockStart,
92                    BlockData_BlockEnd
93            }, {
94                    //Uncompressed Data
95                    UncompressedStartPos,
96                    UncompressedLength,
97                    UncompressedOutputPos,
98
99                    mCompressionMarker,
100                    mM0Marker,
101                    mMatchOffsetMarker
102            });
103
104    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
105    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
106
107
108    // Deletion
[6066]109    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
110    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
[6059]111
112    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
113    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
114
[6066]115    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6059]116    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
117    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
118
119
120    StreamSetBuffer * const extractedBits = compressedBits;
121
122    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
123    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
124
125    // --------------------------------------------------------
126    // End
127    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
128
129    outK->setInitialArguments({iBuilder->GetString(outputFile)});
130    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
131
132    mPxDriver.generatePipelineIR();
133    mPxDriver.deallocateBuffers();
134
135    iBuilder->CreateRetVoid();
136
137    mPxDriver.finalizeObject();
138}
139
[6039]140void LZ4Generator::generateExtractOnlyPipeline(const std::string &outputFile) {
[6026]141    auto & iBuilder = mPxDriver.getBuilder();
[5864]142    this->generateMainFunc(iBuilder);
143
[6066]144    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[5864]145
146    // GeneratePipeline
147    this->generateLoadByteStreamAndBitStream(iBuilder);
[6039]148    this->generateExtractAndDepositMarkers(iBuilder);
149    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5864]150
[6039]151    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
152    mPxDriver.makeKernelCall(p2sK, {extractedBits}, {DecompressedByteStream});
[5948]153
[6039]154    // --------------------------------------------------------
155    // End
156    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]157
[6039]158    outK->setInitialArguments({iBuilder->GetString(outputFile)});
159    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5948]160
[6039]161    mPxDriver.generatePipelineIR();
162    mPxDriver.deallocateBuffers();
163
164    iBuilder->CreateRetVoid();
165
166    mPxDriver.finalizeObject();
167}
168
169void LZ4Generator::generateSwizzledExtractOnlyPipeline(const std::string &outputFile) {
170    auto & iBuilder = mPxDriver.getBuilder();
171    this->generateMainFunc(iBuilder);
172
[6066]173    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6039]174
175    // GeneratePipeline
176    this->generateLoadByteStreamAndBitStream(iBuilder);
177    this->generateExtractAndDepositMarkers(iBuilder);
[5864]178    auto swizzle = this->generateSwizzleExtractData(iBuilder);
179
180
181    // Produce unswizzled bit streams
[6066]182    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6026]183    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[5864]184
[6026]185    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
[5864]186
187
[6026]188    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
189    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
[5864]190
191    // --------------------------------------------------------
192    // End
[6026]193    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]194
195    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]196    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5864]197
[6026]198    mPxDriver.generatePipelineIR();
199    mPxDriver.deallocateBuffers();
[5864]200
201    iBuilder->CreateRetVoid();
202
[6026]203    mPxDriver.finalizeObject();
[5864]204}
205
[5874]206void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
[6026]207    auto & iBuilder = mPxDriver.getBuilder();
[5874]208    this->generateMainFunc(iBuilder);
209
[6066]210    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[5874]211
212    // GeneratePipeline
213    this->generateLoadByteStreamAndBitStream(iBuilder);
214    this->generateExtractAndDepositMarkers(iBuilder);
[6039]215    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5874]216
[6066]217    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
[6029]218    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
219    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
220
[6039]221    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
222    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
[6029]223
[6039]224    // --------------------------------------------------------
225    // End
226    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
227
228    outK->setInitialArguments({iBuilder->GetString(outputFile)});
229    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
230
231    mPxDriver.generatePipelineIR();
232    mPxDriver.deallocateBuffers();
233
234    iBuilder->CreateRetVoid();
235
236    mPxDriver.finalizeObject();
237}
238
239void LZ4Generator::generateSwizzledExtractAndDepositOnlyPipeline(const std::string &outputFile) {
240    auto & iBuilder = mPxDriver.getBuilder();
241    this->generateMainFunc(iBuilder);
242
[6066]243    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6039]244
245    // GeneratePipeline
246    this->generateLoadByteStreamAndBitStream(iBuilder);
247    this->generateExtractAndDepositMarkers(iBuilder);
248
249    auto swizzle = this->generateSwizzleExtractData(iBuilder);
250
[6066]251    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
252    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[5874]253
[6026]254    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
255    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
[5874]256
257    // Produce unswizzled bit streams
[6066]258    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6026]259    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[6029]260    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
[5874]261
[6026]262    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
[6029]263    mPxDriver.makeKernelCall(p2sK, {depositedBits}, {DecompressedByteStream});
[5874]264
265    // --------------------------------------------------------
266    // End
[6026]267    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5874]268    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]269    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5874]270
[6026]271    mPxDriver.generatePipelineIR();
272    mPxDriver.deallocateBuffers();
[5874]273
274    iBuilder->CreateRetVoid();
275
[6026]276    mPxDriver.finalizeObject();
[5874]277}
278
[6039]279void LZ4Generator::generatePipeline(const std::string &outputFile) {
[6026]280    auto & iBuilder = mPxDriver.getBuilder();
[5864]281    this->generateMainFunc(iBuilder);
282
[6066]283    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[5864]284
285    // GeneratePipeline
286    this->generateLoadByteStreamAndBitStream(iBuilder);
287    this->generateExtractAndDepositMarkers(iBuilder);
[6039]288    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
[5864]289
[6066]290    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
[6039]291    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
292    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
293
[6066]294    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6039]295    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
296    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
297
298    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
299    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
300
301    // --------------------------------------------------------
302    // End
303    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
304
305    outK->setInitialArguments({iBuilder->GetString(outputFile)});
306    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
307
308    mPxDriver.generatePipelineIR();
309    mPxDriver.deallocateBuffers();
310
311    iBuilder->CreateRetVoid();
312
313    mPxDriver.finalizeObject();
314}
315
316void LZ4Generator::generateSwizzledPipeline(const std::string &outputFile) {
317    auto & iBuilder = mPxDriver.getBuilder();
318    this->generateMainFunc(iBuilder);
319
[6066]320    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
[6039]321
322    // GeneratePipeline
323    this->generateLoadByteStreamAndBitStream(iBuilder);
324    this->generateExtractAndDepositMarkers(iBuilder);
325
[5864]326    auto swizzle = this->generateSwizzleExtractData(iBuilder);
327
[6066]328    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
329    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
[5864]330
[6026]331    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
332    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
[5874]333
[6066]334    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
335    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
[5906]336
[6026]337    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
338    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
[5906]339
340
[5864]341    // Produce unswizzled bit streams
[6066]342    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6026]343    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
[6039]344    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
[5864]345
346
[6026]347    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
[6039]348    mPxDriver.makeKernelCall(p2sK, {matchCopiedBits}, {DecompressedByteStream});
[5864]349
350    // --------------------------------------------------------
351    // End
[6026]352    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
[5864]353    outK->setInitialArguments({iBuilder->GetString(outputFile)});
[6026]354    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
[5864]355
[6026]356    mPxDriver.generatePipelineIR();
357    mPxDriver.deallocateBuffers();
[5864]358
359    iBuilder->CreateRetVoid();
360
[6026]361    mPxDriver.finalizeObject();
[5864]362}
363
364void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
365    Module * M = iBuilder->getModule();
366    Type * const sizeTy = iBuilder->getSizeTy();
367    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
368    Type * const voidTy = iBuilder->getVoidTy();
369    Type * const inputType = iBuilder->getInt8PtrTy();
370
371    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
372    main->setCallingConv(CallingConv::C);
373    Function::arg_iterator args = main->arg_begin();
[6026]374    mInputStream = &*(args++);
375    mInputStream->setName("input");
[5864]376
[6026]377    mHeaderSize = &*(args++);
378    mHeaderSize->setName("mHeaderSize");
[5864]379
[6026]380    mFileSize = &*(args++);
381    mFileSize->setName("mFileSize");
[5864]382
[6026]383    mHasBlockChecksum = &*(args++);
384    mHasBlockChecksum->setName("mHasBlockChecksum");
[6020]385    // TODO for now, we do not handle blockCheckSum
[6026]386    mHasBlockChecksum = iBuilder->getInt1(false);
[5864]387
388    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
389}
390
[6064]391void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
392    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
393    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
394    sourceK->setInitialArguments({mInputStream, mFileSize});
395    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
396}
[5864]397void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6047]398    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
[6066]399    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
[5864]400
[6026]401    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
402    sourceK->setInitialArguments({mInputStream, mFileSize});
403    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
404    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
405    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
[5864]406}
407
[6059]408StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[5864]409    //// Decode Block Information
[6066]410    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
411    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
412    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[5864]413
414    //// Generate Helper Markers Extenders, FX, XF
[6066]415    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
416    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6059]417    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
418    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
[5864]419
[6059]420
421    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
422    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
423    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
424
425
426    // Produce unswizzled bit streams
[6066]427    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
428    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[6059]429    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
430    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
431
432
433
[6066]434    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
435    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[6059]436
437
438    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 2, 4);
439    lz4AioK->setInitialArguments({mFileSize});
440    mPxDriver.makeKernelCall(
441            lz4AioK,
442            {
443                    mCompressedByteStream,
444                    Extenders,
445
446                    // Block Data
447                    BlockData_IsCompressed,
448                    BlockData_BlockStart,
449                    BlockData_BlockEnd,
450
451                    u16Swizzle0,
452                    u16Swizzle1
453            }, {
454                    decompressedSwizzled0,
455                    decompressedSwizzled1
456            });
457
458
[6066]459    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
[6059]460
461    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
462    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
463
464    return decompressionBitStream;
465}
466
[6064]467parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
468    //// Decode Block Information
[6066]469    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
470    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
471    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]472
[6064]473    //// Generate Helper Markers Extenders
[6066]474    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
475    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6064]476    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
477    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
[6059]478
[6064]479    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
480    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
481    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
482
483
[6066]484    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
[6064]485
486    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder);
487    lz4AioK->setInitialArguments({mFileSize});
488    mPxDriver.makeKernelCall(
489            lz4AioK,
490            {
491                    mCompressedByteStream,
492                    Extenders,
493
494                    // Block Data
495                    BlockData_IsCompressed,
496                    BlockData_BlockStart,
497                    BlockData_BlockEnd
498            }, {
499                    decompressionByteStream
500            });
501
502    return decompressionByteStream;
503
504}
505
[6059]506StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
507    //// Decode Block Information
[6066]508    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
509    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
510    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]511
[6064]512
513    //// Generate Helper Markers Extenders
514    /*
[6059]515    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
516    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
[6026]517    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
518    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
[6064]519    */
[5864]520
521
[6026]522    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
523    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
524    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
[5864]525
[5974]526
[6066]527    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
[5974]528
[6059]529    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
530    lz4AioK->setInitialArguments({mFileSize});
531    mPxDriver.makeKernelCall(
532            lz4AioK,
533            {
534                    mCompressedByteStream,
[6064]535//                    Extenders,
[5974]536
[6059]537                    // Block Data
538                    BlockData_IsCompressed,
539                    BlockData_BlockStart,
540                    BlockData_BlockEnd
541            }, {
542                    decompressionByteStream
543            });
544
545    return decompressionByteStream;
546}
547
548void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
549    //// Decode Block Information
[6066]550    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
551    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
552    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[6059]553
554    //// Generate Helper Markers Extenders, FX, XF
[6066]555    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
556    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6059]557    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
558    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
559
560
561    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
562    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
563    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
564
[5864]565    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
566
567    //TODO handle uncompressed part
[6066]568    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
569    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
570    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
[5864]571
[6066]572    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
573    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
574    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
[5864]575
[6026]576    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
577    Lz4IndexBuilderK->setInitialArguments({mFileSize});
578    mPxDriver.makeKernelCall(
[5948]579            Lz4IndexBuilderK,
[5864]580            {
[6026]581                    mCompressedByteStream,
[5864]582                    Extenders,
583
584                    // Block Data
585                    BlockData_IsCompressed,
586                    BlockData_BlockStart,
587                    BlockData_BlockEnd
588            }, {
589                    //Uncompressed Data
590                    UncompressedStartPos,
591                    UncompressedLength,
592                    UncompressedOutputPos,
593
[6026]594                    mDeletionMarker,
595                    mM0Marker,
596                    mMatchOffsetMarker
[5864]597            });
598
[6026]599    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
600    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
[5864]601
602}
603
604std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6066]605    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
606    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[5864]607
[6026]608    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
609    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
[5864]610    return std::make_pair(u16Swizzle0, u16Swizzle1);
611}
612
[6043]613void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
614    if (!mCompressionMarker) {
[6066]615        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
[6043]616        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
617        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
618    }
619}
620
[6039]621parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6043]622    this->generateCompressionMarker(iBuilder);
[6039]623
624    // Deletion
[6066]625    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
626    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
[6039]627
628    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
[6043]629    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
[6039]630
[6066]631    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
[6039]632    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
633    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
634
635    return compressedBits;
636}
637
[5948]638int LZ4Generator::get4MbBufferBlocks() {
[6066]639    return 4 * 1024 * 1024 / codegen::BlockSize;
[5948]640}
641
[6066]642int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
643    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
[5864]644}
[6066]645int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
646    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
[5864]647}
648
649
650
[5921]651
[5864]652// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.