source: icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp @ 6020

Last change on this file since 6020 was 6020, checked in by xwa163, 5 months ago
  1. New version of lz4_swizzled_match_copy kernel with higher performance
  2. Adjust related pipeline code
  3. Remove legacy comments
File size: 14.4 KB
Line 
1
2#include "LZ4Generator.h"
3
4#include <boost/filesystem.hpp>
5#include <boost/iostreams/device/mapped_file.hpp>
6
7#include <llvm/Support/CommandLine.h>
8#include <llvm/Support/PrettyStackTrace.h>
9
10#include <cc/cc_compiler.h>
11
12#include <lz4FrameDecoder.h>
13#include <kernels/streamset.h>
14#include <kernels/cc_kernel.h>
15#include <kernels/s2p_kernel.h>
16#include <kernels/p2s_kernel.h>
17#include <kernels/source_kernel.h>
18#include <kernels/stdout_kernel.h>
19#include <kernels/lz4/lz4_generate_deposit_stream.h>
20#include <kernels/kernel_builder.h>
21#include <kernels/deletion.h>
22#include <kernels/swizzle.h>
23#include <kernels/pdep_kernel.h>
24#include <kernels/lz4/lz4_multiple_pdep_kernel.h>
25#include <kernels/lz4/lz4_match_copy_kernel.h>
26#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
27#include <kernels/lz4/lz4_block_decoder.h>
28#include <kernels/lz4/lz4_index_builder.h>
29
30namespace re { class CC; }
31
32using namespace llvm;
33using namespace parabix;
34using namespace kernel;
35
36LZ4Generator::LZ4Generator():pxDriver("lz4d") {
37
38}
39
40MainFunctionType LZ4Generator::getMainFunc() {
41    return reinterpret_cast<MainFunctionType>(pxDriver.getMain());
42}
43
44
45
46void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
47    auto & iBuilder = pxDriver.getBuilder();
48    this->generateMainFunc(iBuilder);
49
50    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
51
52    // GeneratePipeline
53    this->generateLoadByteStreamAndBitStream(iBuilder);
54
55
56    this->generateExtractAndDepositMarkers(iBuilder);
57
58
59    auto swizzle = this->generateSwizzleExtractData(iBuilder);
60
61
62    // Produce unswizzled bit streams
63    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
64    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
65
66    pxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
67
68
69    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
70    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
71
72    // --------------------------------------------------------
73    // End
74    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
75
76    outK->setInitialArguments({iBuilder->GetString(outputFile)});
77    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
78
79    pxDriver.generatePipelineIR();
80    pxDriver.deallocateBuffers();
81
82    iBuilder->CreateRetVoid();
83
84    pxDriver.finalizeObject();
85}
86
87void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
88    auto & iBuilder = pxDriver.getBuilder();
89    this->generateMainFunc(iBuilder);
90
91    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
92
93    // GeneratePipeline
94    this->generateLoadByteStreamAndBitStream(iBuilder);
95    this->generateExtractAndDepositMarkers(iBuilder);
96
97    auto swizzle = this->generateSwizzleExtractData(iBuilder);
98
99    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
100    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
101
102    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
103    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
104
105    // Produce unswizzled bit streams
106    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
107    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
108    pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
109
110    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
111    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
112
113    // --------------------------------------------------------
114    // End
115    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
116    outK->setInitialArguments({iBuilder->GetString(outputFile)});
117    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
118
119    pxDriver.generatePipelineIR();
120    pxDriver.deallocateBuffers();
121
122    iBuilder->CreateRetVoid();
123
124    pxDriver.finalizeObject();
125}
126
127void LZ4Generator::generatePipeline(const std::string& outputFile) {
128    auto & iBuilder = pxDriver.getBuilder();
129    this->generateMainFunc(iBuilder);
130
131    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
132
133    // GeneratePipeline
134    this->generateLoadByteStreamAndBitStream(iBuilder);
135    this->generateExtractAndDepositMarkers(iBuilder);
136
137    auto swizzle = this->generateSwizzleExtractData(iBuilder);
138
139    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
140    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
141
142    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
143    pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
144
145
146    StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
147    StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
148
149    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
150    pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
151
152
153    // Produce unswizzled bit streams
154    StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
155    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
156    pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
157
158
159    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
160    pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
161
162    // --------------------------------------------------------
163    // End
164    Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
165    outK->setInitialArguments({iBuilder->GetString(outputFile)});
166    pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
167
168    pxDriver.generatePipelineIR();
169    pxDriver.deallocateBuffers();
170
171    iBuilder->CreateRetVoid();
172
173    pxDriver.finalizeObject();
174}
175
176void LZ4Generator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
177    Module * M = iBuilder->getModule();
178    Type * const sizeTy = iBuilder->getSizeTy();
179    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
180    Type * const voidTy = iBuilder->getVoidTy();
181    Type * const inputType = iBuilder->getInt8PtrTy();
182
183    Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
184    main->setCallingConv(CallingConv::C);
185    Function::arg_iterator args = main->arg_begin();
186    inputStream = &*(args++);
187    inputStream->setName("input");
188
189    headerSize = &*(args++);
190    headerSize->setName("headerSize");
191
192    fileSize = &*(args++);
193    fileSize->setName("fileSize");
194
195    hasBlockChecksum = &*(args++);
196    hasBlockChecksum->setName("hasBlockChecksum");
197    // TODO for now, we do not handle blockCheckSum
198    hasBlockChecksum = iBuilder->getInt1(false);
199
200    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
201}
202
203void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
204    ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
205    BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
206
207    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
208    sourceK->setInitialArguments({inputStream, fileSize});
209    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
210    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
211//    s2pk->addAttribute(MustConsumeAll());
212    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
213}
214
215void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
216    //// Decode Block Information
217    StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
218    StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
219    StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
220
221    //// Generate Helper Markers Extenders, FX, XF
222    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
223    MatchOffsetMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
224        // FX and XF streams will be added to IndexBuilderKernel in the future
225//    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
226//    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
227
228    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
229//    extenderK->addAttribute(MustConsumeAll());
230    pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
231
232
233    Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
234    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize, fileSize});
235    pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
236
237//    re::CC* xfCC = re::makeCC(0x0f);
238//    re::CC* fxCC = re::makeCC(0xf0);
239//    for (re::codepoint_t i = 1; i <= 0xf; i++) {
240//        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
241//        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
242//    }
243
244//    Kernel * CC_0xFXKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
245//    pxDriver.makeKernelCall(CC_0xFXKernel, {BasisBits}, {CC_0xFX});
246
247//    Kernel * CC_0xXFKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
248//    pxDriver.makeKernelCall(CC_0xXFKernel, {BasisBits}, {CC_0xXF});
249
250    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
251
252    //TODO handle uncompressed part
253    StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
254    StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
255    StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
256
257    DeletionMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
258    M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
259    DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
260
261    Kernel* Lz4IndexBuilderK = pxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
262    Lz4IndexBuilderK->setInitialArguments({fileSize});
263    pxDriver.makeKernelCall(
264            Lz4IndexBuilderK,
265            {
266                    ByteStream,
267                    Extenders,
268//                    CC_0xFX,
269//                    CC_0xXF,
270
271                    // Block Data
272                    BlockData_IsCompressed,
273                    BlockData_BlockStart,
274                    BlockData_BlockEnd
275            }, {
276                    //Uncompressed Data
277                    UncompressedStartPos,
278                    UncompressedLength,
279                    UncompressedOutputPos,
280
281                    DeletionMarker,
282                    M0Marker,
283                    MatchOffsetMarker
284            });
285
286    Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
287    pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker});
288
289}
290
291std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
292    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
293    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
294
295    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
296    pxDriver.makeKernelCall(delK, {DeletionMarker, BasisBits}, {u16Swizzle0, u16Swizzle1});
297    return std::make_pair(u16Swizzle0, u16Swizzle1);
298}
299
300int LZ4Generator::get4MbBufferBlocks() {
301    return 4 * 1024 * 1024 / codegen::BlockSize;
302}
303
304int LZ4Generator::getInputBufferBlocks() {
305    return this->get4MbBufferBlocks() * 2;
306}
307int LZ4Generator::getDecompressedBufferBlocks() {
308    return this->get4MbBufferBlocks() * 2;
309}
310
311
312
313
314// Kernel Pipeline
Note: See TracBrowser for help on using the repository browser.