source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6026

Last change on this file since 6026 was 6026, checked in by xwa163, 11 months ago
  1. Implement SwizzledMultiplePDEPkernel with the same logic as new PDEPkernel, remove LZ4MultiplePDEPkernel, improve the performance
  2. Remove some unnecessary include
  3. Add prefix for some kernels
  4. Remove a legacy kernel
File size: 14.5 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7
8#include <cc/cc_compiler.h>
9
10#include <kernels/cc_kernel.h>
11#include <kernels/s2p_kernel.h>
12#include <kernels/p2s_kernel.h>
13#include <kernels/source_kernel.h>
14#include <kernels/stdout_kernel.h>
15#include <kernels/lz4/lz4_generate_deposit_stream.h>
16#include <kernels/kernel_builder.h>
17#include <kernels/deletion.h>
18#include <kernels/swizzle.h>
19#include <kernels/pdep_kernel.h>
20#include <kernels/swizzled_multiple_pdep_kernel.h>
21#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
22#include <kernels/lz4/lz4_block_decoder.h>
23#include <kernels/lz4/lz4_index_builder.h>
24
25namespace re { class CC; }
26
27using namespace llvm;
28using namespace parabix;
29using namespace kernel;
30
31LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
32
33}
34
35MainFunctionType LZ4Generator::getMainFunc() {
36    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
37}
38
39
40
41void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
42    auto & iBuilder = mPxDriver.getBuilder();
43    this->generateMainFunc(iBuilder);
44
45    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
46
47    // GeneratePipeline
48    this->generateLoadByteStreamAndBitStream(iBuilder);
49
50
51    this->generateExtractAndDepositMarkers(iBuilder);
52
53
54    auto swizzle = this->generateSwizzleExtractData(iBuilder);
55
56
57    // Produce unswizzled bit streams
58    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
59    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
60
61    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
62
63
64    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
65    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
66
67    // --------------------------------------------------------
68    // End
69    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
70
71    outK->setInitialArguments({iBuilder->GetString(outputFile)});
72    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
73
74    mPxDriver.generatePipelineIR();
75    mPxDriver.deallocateBuffers();
76
77    iBuilder->CreateRetVoid();
78
79    mPxDriver.finalizeObject();
80}
81
82void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
83    auto & iBuilder = mPxDriver.getBuilder();
84    this->generateMainFunc(iBuilder);
85
86    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
87
88    // GeneratePipeline
89    this->generateLoadByteStreamAndBitStream(iBuilder);
90    this->generateExtractAndDepositMarkers(iBuilder);
91
92    auto swizzle = this->generateSwizzleExtractData(iBuilder);
93
94    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
95    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
96
97    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
98    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
99
100    // Produce unswizzled bit streams
101    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
102    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
103    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
104
105    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
106    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
107
108    // --------------------------------------------------------
109    // End
110    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
111    outK->setInitialArguments({iBuilder->GetString(outputFile)});
112    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
113
114    mPxDriver.generatePipelineIR();
115    mPxDriver.deallocateBuffers();
116
117    iBuilder->CreateRetVoid();
118
119    mPxDriver.finalizeObject();
120}
121
122void LZ4Generator::generatePipeline(const std::string& outputFile) {
123    auto & iBuilder = mPxDriver.getBuilder();
124    this->generateMainFunc(iBuilder);
125
126    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
127
128    // GeneratePipeline
129    this->generateLoadByteStreamAndBitStream(iBuilder);
130    this->generateExtractAndDepositMarkers(iBuilder);
131
132    auto swizzle = this->generateSwizzleExtractData(iBuilder);
133
134    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
135    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
136
137    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
138    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
139
140    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
141    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
142
143    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
144    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
145
146
147    // Produce unswizzled bit streams
148    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
149    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
150    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
151
152
153    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
154    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
155
156    // --------------------------------------------------------
157    // End
158    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
159    outK->setInitialArguments({iBuilder->GetString(outputFile)});
160    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
161
162    mPxDriver.generatePipelineIR();
163    mPxDriver.deallocateBuffers();
164
165    iBuilder->CreateRetVoid();
166
167    mPxDriver.finalizeObject();
168}
169
170void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
171    Module * M = iBuilder->getModule();
172    Type * const sizeTy = iBuilder->getSizeTy();
173    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
174    Type * const voidTy = iBuilder->getVoidTy();
175    Type * const inputType = iBuilder->getInt8PtrTy();
176
177    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
178    main->setCallingConv(CallingConv::C);
179    Function::arg_iterator args = main->arg_begin();
180    mInputStream = &*(args++);
181    mInputStream->setName("input");
182
183    mHeaderSize = &*(args++);
184    mHeaderSize->setName("mHeaderSize");
185
186    mFileSize = &*(args++);
187    mFileSize->setName("mFileSize");
188
189    mHasBlockChecksum = &*(args++);
190    mHasBlockChecksum->setName("mHasBlockChecksum");
191    // TODO for now, we do not handle blockCheckSum
192    mHasBlockChecksum = iBuilder->getInt1(false);
193
194    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
195}
196
197void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
198    mCompressedByteStream = mPxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
199    mCompressedBasisBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
200
201    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
202    sourceK->setInitialArguments({mInputStream, mFileSize});
203    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
204    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
205//    s2pk->addAttribute(MustConsumeAll());
206    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
207}
208
209void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
210    //// Decode Block Information
211    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
212    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
213    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
214
215    //// Generate Helper Markers Extenders, FX, XF
216    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
217    mMatchOffsetMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
218        // FX and XF streams will be added to IndexBuilderKernel in the future
219//    StreamSetBuffer * const CC_0xFX = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
220//    StreamSetBuffer * const CC_0xXF = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
221
222    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
223//    extenderK->addAttribute(MustConsumeAll());
224    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
225
226
227    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
228    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
229    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
230
231//    re::CC* xfCC = re::makeCC(0x0f);
232//    re::CC* fxCC = re::makeCC(0xf0);
233//    for (re::codepoint_t i = 1; i <= 0xf; i++) {
234//        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
235//        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
236//    }
237
238//    Kernel * CC_0xFXKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
239//    mPxDriver.makeKernelCall(CC_0xFXKernel, {mCompressedBasisBits}, {CC_0xFX});
240
241//    Kernel * CC_0xXFKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
242//    mPxDriver.makeKernelCall(CC_0xXFKernel, {mCompressedBasisBits}, {CC_0xXF});
243
244    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
245
246    //TODO handle uncompressed part
247    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
248    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
249    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
250
251    mDeletionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
252    mM0Marker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
253    mDepositMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
254
255    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
256    Lz4IndexBuilderK->setInitialArguments({mFileSize});
257    mPxDriver.makeKernelCall(
258            Lz4IndexBuilderK,
259            {
260                    mCompressedByteStream,
261                    Extenders,
262//                    CC_0xFX,
263//                    CC_0xXF,
264
265                    // Block Data
266                    BlockData_IsCompressed,
267                    BlockData_BlockStart,
268                    BlockData_BlockEnd
269            }, {
270                    //Uncompressed Data
271                    UncompressedStartPos,
272                    UncompressedLength,
273                    UncompressedOutputPos,
274
275                    mDeletionMarker,
276                    mM0Marker,
277                    mMatchOffsetMarker
278            });
279
280    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
281    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
282
283}
284
285std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
286    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
287    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
288
289    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
290    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
291    return std::make_pair(u16Swizzle0, u16Swizzle1);
292}
293
294int LZ4Generator::get4MbBufferBlocks() {
295    return 4 * 1024 * 1024 / codegen::BlockSize;
296}
297
298int LZ4Generator::getInputBufferBlocks() {
299    return this->get4MbBufferBlocks() * 2;
300}
301int LZ4Generator::getDecompressedBufferBlocks() {
302    return this->get4MbBufferBlocks() * 2;
303}
304
305
306
307
308// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.