source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6136

Last change on this file since 6136 was 6136, checked in by xwa163, 8 months ago
  1. Cleanup legacy slow LZ4 related kernels
  2. Rename lz4d_ext_dep to lz4_decompression
  3. Rename LZ4 AIO related kernels to LZ4 Decompression Kernel
File size: 13.7 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/kernel_builder.h>
16#include <kernels/deletion.h>
17#include <kernels/swizzle.h>
18#include <kernels/pdep_kernel.h>
19#include <kernels/swizzled_multiple_pdep_kernel.h>
20#include <kernels/lz4/lz4_block_decoder.h>
21#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
22#include <kernels/lz4/decompression/lz4_parallel_bytestream_decompression.h>
23#include <kernels/lz4/decompression/lz4_swizzled_decompression.h>
24#include <kernels/lz4/decompression/lz4_bitstream_decompression.h>
25#include <kernels/lz4/decompression/lz4_twist_decompression.h>
26#include <kernels/bitstream_pdep_kernel.h>
27#include <kernels/lz4/twist_kernel.h>
28#include <kernels/lz4/untwist_kernel.h>
29
30namespace re { class CC; }
31
32using namespace llvm;
33using namespace parabix;
34using namespace kernel;
35
36LZ4Generator::LZ4Generator():mPxDriver("lz4d"), mLz4BlockSize(4 * 1024 * 1024) {
37}
38
39MainFunctionType LZ4Generator::getMainFunc() {
40    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
41}
42
43void LZ4Generator::generateDecompressionPipeline(const std::string &outputFile) {
44    auto & b = mPxDriver.getBuilder();
45
46    this->generateMainFunc(b);
47    this->generateLoadByteStreamAndBitStream(b);
48    parabix::StreamSetBuffer* uncompressedByteStream = this->generateAIODecompression(b);
49
50    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(b, 8);
51    outK->setInitialArguments({b->GetString(outputFile)});
52    mPxDriver.makeKernelCall(outK, {uncompressedByteStream}, {});
53
54    mPxDriver.generatePipelineIR();
55    mPxDriver.deallocateBuffers();
56
57    b->CreateRetVoid();
58
59    mPxDriver.finalizeObject();
60}
61
62
63void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
64    Module * M = iBuilder->getModule();
65    Type * const sizeTy = iBuilder->getSizeTy();
66    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
67    Type * const voidTy = iBuilder->getVoidTy();
68    Type * const inputType = iBuilder->getInt8PtrTy();
69
70    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
71    main->setCallingConv(CallingConv::C);
72    Function::arg_iterator args = main->arg_begin();
73    mInputStream = &*(args++);
74    mInputStream->setName("input");
75
76    mHeaderSize = &*(args++);
77    mHeaderSize->setName("mHeaderSize");
78
79    mFileSize = &*(args++);
80    mFileSize->setName("mFileSize");
81
82    mHasBlockChecksum = &*(args++);
83    mHasBlockChecksum->setName("mHasBlockChecksum");
84    // TODO for now, we do not handle blockCheckSum
85    mHasBlockChecksum = iBuilder->getInt1(false);
86
87    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
88}
89
90void LZ4Generator::generateLoadByteStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
91    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
92    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
93    sourceK->setInitialArguments({mInputStream, mFileSize});
94    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
95}
96void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
97    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
98    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
99
100    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
101    sourceK->setInitialArguments({mInputStream, mFileSize});
102    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
103    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, cc::BitNumbering::BigEndian);
104    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
105}
106
107StreamSetBuffer * LZ4Generator::generateBitStreamAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
108    return this->convertCompressedBitsStreamWithBitStreamAioApproach({mCompressedBasisBits}, "combined")[0];
109}
110
111std::vector<StreamSetBuffer*> LZ4Generator::convertCompressedBitsStreamWithBitStreamAioApproach(
112        std::vector<StreamSetBuffer*> compressedBitStreams, std::string prefix) {
113    auto mGrepDriver = &mPxDriver;
114    auto & iBuilder = mGrepDriver->getBuilder();
115
116    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
117
118    size_t numOfStreams = compressedBitStreams[0]->getNumOfStreams();
119
120    // 1, 2, 4, 8
121
122    if (numOfStreams <= 2) {
123        StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder));
124        kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::TwistByPDEPKernel>(iBuilder, numOfStreams, 2);
125        mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses});
126
127        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 2), this->getInputBufferBlocks(iBuilder));
128        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(iBuilder, 2);
129        lz4I4AioK->setInitialArguments({mFileSize});
130        mGrepDriver->makeKernelCall(lz4I4AioK, {
131                mCompressedByteStream,
132
133                blockInfo.isCompress,
134                blockInfo.blockStart,
135                blockInfo.blockEnd,
136
137                twistedCharClasses
138        }, {
139                                            uncompressedTwistedCharClasses
140                                    });
141
142        StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams), this->getInputBufferBlocks(iBuilder));
143        kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::UntwistByPEXTKernel>(iBuilder, numOfStreams, 2);
144        mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
145        return {untwistedCharClasses};
146    }
147
148    if (numOfStreams <= 4) {
149        StreamSetBuffer* twistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
150        kernel::Kernel* twistK = mGrepDriver->addKernelInstance<kernel::TwistByPDEPKernel>(iBuilder, numOfStreams, 4);
151        mGrepDriver->makeKernelCall(twistK, {compressedBitStreams[0]}, {twistedCharClasses});
152
153
154        StreamSetBuffer* uncompressedTwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 4), this->getInputBufferBlocks(iBuilder));
155
156        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(iBuilder, 4);
157        lz4I4AioK->setInitialArguments({mFileSize});
158        mGrepDriver->makeKernelCall(lz4I4AioK, {
159                mCompressedByteStream,
160
161                blockInfo.isCompress,
162                blockInfo.blockStart,
163                blockInfo.blockEnd,
164
165                twistedCharClasses
166        }, {
167                                            uncompressedTwistedCharClasses
168                                    });
169
170        StreamSetBuffer* untwistedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams), this->getInputBufferBlocks(iBuilder));
171        kernel::Kernel* untwistK = mGrepDriver->addKernelInstance<kernel::UntwistByPEXTKernel>(iBuilder, numOfStreams, 4);
172        mGrepDriver->makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
173        return {untwistedCharClasses};
174    }
175
176    std::vector<StreamSetBuffer *> inputStreams = {
177            mCompressedByteStream,
178
179            blockInfo.isCompress,
180            blockInfo.blockStart,
181            blockInfo.blockEnd,
182    };
183
184    std::vector<StreamSetBuffer *> outputStream;
185    std::vector<unsigned> numbersOfStreams;
186
187    for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
188        unsigned numOfStreams = compressedBitStreams[i]->getNumOfStreams();
189        numbersOfStreams.push_back(numOfStreams);
190        inputStreams.push_back(compressedBitStreams[i]);
191        outputStream.push_back(mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(numOfStreams, 1), this->getInputBufferBlocks(iBuilder)));
192    }
193
194    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamDecompressionKernel>(iBuilder, numbersOfStreams);
195    lz4AioK->setInitialArguments({mFileSize});
196    mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
197
198    return outputStream;
199}
200
201
202StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
203    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
204
205    // Produce unswizzled bit streams
206    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
207    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
208    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
209    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
210
211
212
213    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
214    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
215
216
217    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledDecompressionKernel>(iBuilder, 4, 2, 4);
218    lz4AioK->setInitialArguments({mFileSize});
219    mPxDriver.makeKernelCall(
220            lz4AioK,
221            {
222                    mCompressedByteStream,
223
224                    blockInfo.isCompress,
225                    blockInfo.blockStart,
226                    blockInfo.blockEnd,
227
228                    u16Swizzle0,
229                    u16Swizzle1
230            }, {
231                    decompressedSwizzled0,
232                    decompressedSwizzled1
233            });
234
235
236    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
237
238    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
239    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0, decompressedSwizzled1}, {decompressionBitStream});
240
241    return decompressionBitStream;
242}
243
244parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, bool enableGather, bool enableScatter, int minParallelLevel) {
245    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
246    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
247
248    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamDecompressionKernel>(iBuilder, mLz4BlockSize, enableGather, enableScatter, minParallelLevel);
249    lz4AioK->setInitialArguments({mFileSize});
250    mPxDriver.makeKernelCall(
251            lz4AioK,
252            {
253                    mCompressedByteStream,
254
255                    blockInfo.isCompress,
256                    blockInfo.blockStart,
257                    blockInfo.blockEnd
258            }, {
259                    decompressionByteStream
260            });
261
262    return decompressionByteStream;
263
264}
265
266StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
267    LZ4BlockInfo blockInfo = this->getBlockInfo(iBuilder);
268
269    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
270    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(iBuilder);
271    lz4AioK->setInitialArguments({mFileSize});
272    mPxDriver.makeKernelCall(
273            lz4AioK,
274            {
275                    mCompressedByteStream,
276
277                    // Block Data
278                    blockInfo.isCompress,
279                    blockInfo.blockStart,
280                    blockInfo.blockEnd
281            }, {
282                    decompressionByteStream
283            });
284
285    return decompressionByteStream;
286}
287
288int LZ4Generator::get4MbBufferBlocks() {
289    return mLz4BlockSize / codegen::BlockSize;
290}
291
292int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
293    return this->get4MbBufferBlocks() * 2;
294}
295int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
296    return this->get4MbBufferBlocks() * 2;
297}
298
299LZ4BlockInfo LZ4Generator::getBlockInfo(const std::unique_ptr<kernel::KernelBuilder> & b) {
300    LZ4BlockInfo blockInfo;
301    blockInfo.isCompress = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getInputBufferBlocks(b), 1);
302    blockInfo.blockStart = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
303    blockInfo.blockEnd = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getInputBufferBlocks(b), 1);
304
305    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(b);
306    blockDecoderK->setInitialArguments({b->CreateTrunc(mHasBlockChecksum, b->getInt1Ty()), mHeaderSize, mFileSize});
307    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {blockInfo.isCompress, blockInfo.blockStart, blockInfo.blockEnd});
308
309    return blockInfo;
310}
311
312
313// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.