source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6136

Last change on this file since 6136 was 6136, checked in by xwa163, 9 months ago
  1. Cleanup legacy slow LZ4 related kernels
  2. Rename lz4d_ext_dep to lz4_decompression
  3. Rename LZ4 AIO related kernels to LZ4 Decompression Kernel
File size: 13.7 KB
RevLine 
[5864]1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/kernel_builder.h>
16#include <kernels/deletion.h>
17#include <kernels/swizzle.h>
18#include <kernels/pdep_kernel.h>
[6026]19#include <kernels/swizzled_multiple_pdep_kernel.h>
[5948]20#include <kernels/lz4/lz4_block_decoder.h>
[6136]21#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
22#include <kernels/lz4/decompression/lz4_parallel_bytestream_decompression.h>
23#include <kernels/lz4/decompression/lz4_swizzled_decompression.h>
24#include <kernels/lz4/decompression/lz4_bitstream_decompression.h>
25#include <kernels/lz4/decompression/lz4_twist_decompression.h>
[6029]26#include <kernels/bitstream_pdep_kernel.h>
[6136]27#include <kernels/lz4/twist_kernel.h>
28#include <kernels/lz4/untwist_kernel.h>
[5864]29
30namespace re { class CC; }
31
32using namespace llvm;
33using namespace parabix;
34using namespace kernel;
35
[6111]36LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
[5864]37}
38
39MainFunctionType LZ4Generator::getMainFunc() {
[6026]40    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
[5864]41}
42
[6136]43void LZ4Generator::generateDecompressionPipeline(const std::string &outputFile) {
44    auto & b = mPxDriver.getBuilder();
[6059]45
[6136]46    this->generateMainFunc(b);
47    this->generateLoadByteStreamAndBitStream(b);
48    parabix::StreamSetBuffer* uncompressedByteStream = this->generateAIODecompression(b);
[6059]49
[6136]50    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(b, 8);
51    outK->setInitialArguments({b->GetString(outputFile)});
52    mPxDriver.makeKernelCall(outK, {uncompressedByteStream}, {});
[6059]53
54    mPxDriver.generatePipelineIR();
55    mPxDriver.deallocateBuffers();
56
[6136]57    b->CreateRetVoid();
[6059]58
59    mPxDriver.finalizeObject();
60}
61
[5864]62
63void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
64    Module * M = iBuilder->getModule();
65    Type * const sizeTy = iBuilder->getSizeTy();
66    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
67    Type * const voidTy = iBuilder->getVoidTy();
68    Type * const inputType = iBuilder->getInt8PtrTy();
69
70    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
71    main->setCallingConv(CallingConv::C);
72    Function::arg_iterator args = main->arg_begin();
[6026]73    mInputStream = &*(args++);
74    mInputStream->setName("input");
[5864]75
[6026]76    mHeaderSize = &*(args++);
77    mHeaderSize->setName("mHeaderSize");
[5864]78
[6026]79    mFileSize = &*(args++);
80    mFileSize->setName("mFileSize");
[5864]81
[6026]82    mHasBlockChecksum = &*(args++);
83    mHasBlockChecksum->setName("mHasBlockChecksum");
[6020]84    // TODO for now, we do not handle blockCheckSum
[6026]85    mHasBlockChecksum = iBuilder->getInt1(false);
[5864]86
87    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
88}
89
[6064]90void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
91    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
92    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
93    sourceK->setInitialArguments({mInputStream, mFileSize});
94    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
95}
[5864]96void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6047]97    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
[6066]98    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
[5864]99
[6026]100    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
101    sourceK->setInitialArguments({mInputStream, mFileSize});
102    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
[6119]103    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
[6026]104    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
[5864]105}
106
[6118]107StreamSetBuffer * LZ4Generator::generateBitStreamAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6119]108    return this->convertCompressedBitsStreamWithBitStreamAioApproach({mCompressedBasisBits}, "combined")[0];
109}
110
111std::vector<StreamSetBuffer*> LZ4Generator::convertCompressedBitsStreamWithBitStreamAioApproach(
112        std::vector<StreamSetBuffer*> compressedBitStreams, std::string prefix) {
113    auto mGrepDriver = &mPxDriver;
114    auto & iBuilder = mGrepDriver->getBuilder();
115
[6136]116    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
[6118]117
[6135]118    size_t numOfStreams = compressedBitStreams[0]->getNumOfStreams();
119
120    // 1, 2, 4, 8
121
122    if (numOfStreams <= 2) {
123        StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder));
124        kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::TwistByPDEPKernel>(iBuilder, numOfStreams, 2);
125        mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses});
126
127        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder));
[6136]128        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(iBuilder, 2);
[6135]129        lz4I4AioK->setInitialArguments({mFileSize});
130        mGrepDriver->makeKernelCall(lz4I4AioK, {
131                mCompressedByteStream,
132
[6136]133                blockInfo.isCompress,
134                blockInfo.blockStart,
135                blockInfo.blockEnd,
[6135]136
137                twistedCharClasses
138        }, {
139                                            uncompressedTwistedCharClasses
140                                    });
141
142        StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams), this->getInputBufferBlocks(iBuilder));
143        kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::UntwistByPEXTKernel>(iBuilder, numOfStreams, 2);
144        mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
145        return {untwistedCharClasses};
146    }
147
148    if (numOfStreams <= 4) {
[6132]149        StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
[6135]150        kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::TwistByPDEPKernel>(iBuilder, numOfStreams, 4);
[6132]151        mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses});
152
153
154        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
[6135]155
[6136]156        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(iBuilder, 4);
[6132]157        lz4I4AioK->setInitialArguments({mFileSize});
158        mGrepDriver->makeKernelCall(lz4I4AioK, {
159                mCompressedByteStream,
160
[6136]161                blockInfo.isCompress,
162                blockInfo.blockStart,
163                blockInfo.blockEnd,
[6132]164
165                twistedCharClasses
166        }, {
167                                            uncompressedTwistedCharClasses
168                                    });
169
[6135]170        StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams), this->getInputBufferBlocks(iBuilder));
171        kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::UntwistByPEXTKernel>(iBuilder, numOfStreams, 4);
[6132]172        mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
173        return {untwistedCharClasses};
174    }
175
[6119]176    std::vector<StreamSetBuffer *> inputStreams = {
177            mCompressedByteStream,
[6118]178
[6136]179            blockInfo.isCompress,
180            blockInfo.blockStart,
181            blockInfo.blockEnd,
[6119]182    };
[6118]183
[6119]184    std::vector<StreamSetBuffer *> outputStream;
185    std::vector<unsigned> numbersOfStreams;
[6118]186
[6119]187    for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
188        unsigned numOfStreams = compressedBitStreams[i]->getNumOfStreams();
189        numbersOfStreams.push_back(numOfStreams);
190        inputStreams.push_back(compressedBitStreams[i]);
191        outputStream.push_back(mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams, 1), this->getInputBufferBlocks(iBuilder)));
192    }
[6118]193
[6136]194    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamDecompressionKernel>(iBuilder, numbersOfStreams);
[6119]195    lz4AioK->setInitialArguments({mFileSize});
196    mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
197
198    return outputStream;
[6118]199}
200
[6119]201
[6059]202StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6136]203    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
[5864]204
[6059]205    // Produce unswizzled bit streams
[6066]206    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
207    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[6059]208    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
209    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
210
211
212
[6066]213    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
214    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
[6059]215
216
[6136]217    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledDecompressionKernel>(iBuilder, 4, 2, 4);
[6059]218    lz4AioK->setInitialArguments({mFileSize});
219    mPxDriver.makeKernelCall(
220            lz4AioK,
221            {
222                    mCompressedByteStream,
223
[6136]224                    blockInfo.isCompress,
225                    blockInfo.blockStart,
226                    blockInfo.blockEnd,
[6111]227
[6059]228                    u16Swizzle0,
229                    u16Swizzle1
230            }, {
231                    decompressedSwizzled0,
232                    decompressedSwizzled1
233            });
234
235
[6066]236    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
[6059]237
238    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
239    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
240
241    return decompressionBitStream;
242}
243
[6111]244parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
[6136]245    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
[6066]246    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
[6064]247
[6136]248    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamDecompressionKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
[6064]249    lz4AioK->setInitialArguments({mFileSize});
250    mPxDriver.makeKernelCall(
251            lz4AioK,
252            {
253                    mCompressedByteStream,
254
[6136]255                    blockInfo.isCompress,
256                    blockInfo.blockStart,
257                    blockInfo.blockEnd
[6064]258            }, {
259                    decompressionByteStream
260            });
261
262    return decompressionByteStream;
263
264}
265
[6059]266StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
[6132]267    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
[6059]268
[6066]269    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
[6136]270    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(iBuilder);
[6059]271    lz4AioK->setInitialArguments({mFileSize});
272    mPxDriver.makeKernelCall(
273            lz4AioK,
274            {
275                    mCompressedByteStream,
[5974]276
[6059]277                    // Block Data
[6132]278                    blockInfo.isCompress,
279                    blockInfo.blockStart,
280                    blockInfo.blockEnd
[6059]281            }, {
282                    decompressionByteStream
283            });
284
285    return decompressionByteStream;
286}
287
[5948]288int LZ4Generator::get4MbBufferBlocks() {
[6111]289    return mLz4BlockSize / codegen::BlockSize;
[5948]290}
291
[6066]292int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
[6136]293    return this->get4MbBufferBlocks() * 2;
[5864]294}
[6066]295int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
[6136]296    return this->get4MbBufferBlocks() * 2;
[5864]297}
298
[6132]299LZ4BlockInfo LZ4Generator::getBlockInfo(const std::unique_ptr<kernel::KernelBuilder> & b) {
300    LZ4BlockInfo blockInfo;
301    blockInfo.isCompress = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getInputBufferBlocks(b), 1);
302    blockInfo.blockStart = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
303    blockInfo.blockEnd = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
[5864]304
[6132]305    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(b);
306    blockDecoderK->setInitialArguments({b->CreateTrunc(mHasBlockChecksum, b->getInt1Ty()), mHeaderSize, mFileSize});
307    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {blockInfo.isCompress, blockInfo.blockStart, blockInfo.blockEnd});
[5864]308
[6132]309    return blockInfo;
310}
[5921]311
[6132]312
[5864]313// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.